# Installation requirment

In [1]:
! pip install pyreadstat -q

# Imports

In [1]:
import yaml
import warnings
from utils import *
warnings.filterwarnings("ignore")

In [5]:
# read yaml configurations for experiments
with open('experiments_config.yaml') as f:
    configs = yaml.load(f, Loader=yaml.FullLoader)

configs = {v['id']: v for v in configs['experiments']}

# default parameters for experiments
default_config = {
    'n_iterations': 30,
    'test_size': 0.2,
    'split_by_env': True,
    'model_type': 'all',
    'model_args': {},
    'mu_limits': (0.1, 0.4),
    'controlled_mus': 10
}

In [6]:
results = {}
for k, exp in configs.items():
    current_config = default_config.copy()
    current_config.update(exp)
    
    path_to_data = current_config.get('path')
    data_type = current_config.get('type')
    if data_type == 'sav':
        df = pd.read_spss(path_to_data)
    elif data_type == 'csv':
        df = pd.read_csv(path_to_data)
    else:
        raise ValueError(f"Unknown data type: {data_type}")
    
    del current_config['id']
    del current_config['path']
    del current_config['type']
    
    print(f"Running experiment: {k}, from {path_to_data}")
    print(f"# samples: {df.shape[0]}, # features: {df.shape[1]}, # environments: {df[current_config.get('groups_column')].nunique()}")
    df[current_config.get('groups_column')] = df[current_config.get('groups_column')].astype('category')
    metrics, train_probs, test_probs = bootstrapping(df, **current_config)

    mean_metrics, std_metrics = compute_mean_std_metrics(metrics)
    
    fig1, ax1 = plt.subplots(1, 1, figsize=(10, 7))
    error_plot(metrics, ax1)
    plt.suptitle(f"Dataset {k}")
    plt.close()
    
    fig2, ax2 = plt.subplots(1, 1, figsize=(10, 7))
    mu_diff_vs_rmse_plot(metrics, train_probs, test_probs, ax2)
    plt.suptitle(f"Dataset {k}")
    plt.close()

    results[k] = {
        'mean_metrics': mean_metrics,
        'std_metrics': std_metrics,
        'figures': (fig1, fig2),
        'axes': (ax1, ax2)
    }

Running experiment: 0, from ./Datasets/popular2.sav
# samples: 2000, # features: 15, # environments: 100


  0%|          | 0/300 [00:00<?, ?it/s]

100%|██████████| 30/30 [06:52<00:00, 13.75s/it]t]
100%|██████████| 300/300 [06:52<00:00,  1.37s/it]


Running experiment: 1, from ./Datasets/dataset1reshape.csv
# samples: 348, # features: 10, # environments: 6


100%|██████████| 30/30 [01:28<00:00,  2.97s/it]s]
100%|██████████| 300/300 [01:28<00:00,  3.37it/s]


Running experiment: 2, from ./Datasets/dataset2reshape.csv
# samples: 200, # features: 33, # environments: 4


100%|██████████| 30/30 [02:42<00:00,  5.42s/it]s]
100%|██████████| 300/300 [02:42<00:00,  1.84it/s]


Running experiment: 3, from ./Datasets/dataset3reshape.csv
# samples: 360, # features: 12, # environments: 6


100%|██████████| 30/30 [01:47<00:00,  3.58s/it]s]
100%|██████████| 300/300 [01:47<00:00,  2.80it/s]


Running experiment: 4, from ./Datasets/dataset4reshape.csv
# samples: 480, # features: 17, # environments: 6


100%|██████████| 30/30 [03:48<00:00,  7.60s/it]s]
100%|██████████| 300/300 [03:48<00:00,  1.32it/s]


Running experiment: 5, from ./Datasets/dataset5reshape.csv
# samples: 500, # features: 22, # environments: 5


100%|██████████| 30/30 [02:23<00:00,  4.78s/it]s]
100%|██████████| 300/300 [02:23<00:00,  2.09it/s]


Running experiment: 6, from ./Datasets/dataset6reshape.csv
# samples: 480, # features: 62, # environments: 4


100%|██████████| 30/30 [12:15<00:00, 24.53s/it]t]
100%|██████████| 300/300 [12:15<00:00,  2.45s/it]


In [47]:
# mean results for all experiments
sum_mean_results = defaultdict(lambda: defaultdict(float))
for k, v in results.items():
    for m, val in v['mean_metrics'].items():
        sum_mean_results['train_rmse'][m] += val['train_rmse']
        sum_mean_results['test_rmse'][m] += val['test_rmse']
    
for m in sum_mean_results.keys():
    for k in sum_mean_results[m].keys():
        sum_mean_results[m][k] /= len(results)

# std results for all experiments
sum_std_results = defaultdict(lambda: defaultdict(float))
for k, v in results.items():
    for m, val in v['std_metrics'].items():
        sum_std_results['train_rmse'][m] += val['train_rmse']
        sum_std_results['test_rmse'][m] += val['test_rmse']

for m in sum_std_results.keys():
    for k in sum_std_results[m].keys():
        sum_std_results[m][k] /= len(results)


fig3, ax3 = plt.subplots(1, 1, figsize=(10, 7))
sum_results_df = pd.DataFrame(sum_mean_results)
sum_results_df.plot(kind='bar', ax=ax3, yerr=pd.DataFrame(sum_std_results), capsize=5)
plt.suptitle("Mean RMSE for all experiments", fontsize=24)

plt.legend(fontsize=14)
plt.xticks(rotation=45, ticks=ax3.get_xticks(), labels=[i.replace('_', ' ').capitalize() for i in sum_results_df.index], fontsize=22)
plt.yticks(fontsize=18)
plt.tight_layout()
fig3.savefig('paper/figures/mean_rmse.pdf')
plt.close()


In [46]:
# save all plots
if not os.path.exists('paper/figures'):
    os.makedirs('paper/figures')
for k, v in results.items():
    # add title
    v['figures'][0].suptitle(f"Dataset {k+1} - Error plot", fontsize=24)
    v['figures'][1].suptitle(f"Dataset {k+1} - RMSE and Distribution Divergence", fontsize=24)

    # set x axis tick labels font size
    for i, ax in enumerate(v['axes']):
        ax.tick_params(axis='x', labelsize=16)
        ax.tick_params(axis='y', labelsize=16)
        if i == 0:
            ax.set_xlabel("Data Split", fontsize=22)
        else:    
            ax.set_xlabel(r"$\Delta(\mu)$", fontsize=22)
        ax.set_ylabel(ax.get_ylabel(), fontsize=18)
        
    v['figures'][0].savefig(f"paper/figures/{k+1}_error_plot.pdf")
    v['figures'][1].savefig(f"paper/figures/{k+1}_mu_diff_vs_rmse_plot.pdf")