In [1]:
import wandb
import pandas as pd

In [2]:

entity = "evangeorgerex"
project = "fwal"

sweep_ids = [
    "zkvbu18r", # lasso baseline 
    "22eckaxx", # rf baseline
    "gty91unf", # rf baseline
    "efjov5qc", # lasso baseline
    "zkvbu18r", # lasso baseline more
    "u0xyvsw9", # xgboost
    "8yghctpp", # supervised cae
    "0lh1b7hs", # SEFS
    "40yayf8d", # MLP
    "ho0m3j5w", # FWAL
    "h245pc2i", # cae
    # "iw8hr7oj", # FWAl hierarchical
    "ryhck7mh", # FWAl hierarchical sigmoid 
    "bwhcxdf3", # FWAL hierarchical shared
    "54j0jkpy", # FWAL hierarchical shared sigmoid loss
    # finance:
    "18c8ij60", # xgboost finance
    "xclyvl0c", # supervised cae finance
    "2xn215vh", # SEFS finance
    "4upk4xrk", # RF finance
    "2bqm9fb6", # MLP finance
    "xpxe64ee", # LASSO finance
    "91aly80j", # FWAL finance
    "jqulrkke", # cae finance
    
    
    
    
    
    
    
    
    
]
'''
These sweeps are all broken because the reported bestmodel scores are wrong:
    # "ycv2gbna", # fwal only reconstruct masked mice protein more patience
    # "9tjs7cai", # fwal as mlp baseline
    # "nmsvf0u4", # fwal new datasets
    # "5kxy8gl8", # fwal as mlp baseline
    # "t1z9hleb", # fwal tti 
    # "3ubxo03v", # finish fwal tti
    # "t8i7yh69", # fwal gisette
    # "cwhd81fy", # fwal MNIST (states are failed despite being finished for some reason)
    # "z9b6sox8", # fwal only reconstruct masked
    # "xi4jr64q", # fwals only reconstruct masked GPU
    # "aie2q1ph", # cae
    # "uunpksug", # cae
'''


'\nThese sweeps are all broken because the reported bestmodel scores are wrong:\n    # "ycv2gbna", # fwal only reconstruct masked mice protein more patience\n    # "9tjs7cai", # fwal as mlp baseline\n    # "nmsvf0u4", # fwal new datasets\n    # "5kxy8gl8", # fwal as mlp baseline\n    # "t1z9hleb", # fwal tti \n    # "3ubxo03v", # finish fwal tti\n    # "t8i7yh69", # fwal gisette\n    # "cwhd81fy", # fwal MNIST (states are failed despite being finished for some reason)\n    # "z9b6sox8", # fwal only reconstruct masked\n    # "xi4jr64q", # fwals only reconstruct masked GPU\n    # "aie2q1ph", # cae\n    # "uunpksug", # cae\n'

In [3]:
def fetch_runs(entity, project, sweep_ids):
    api = wandb.Api()
    runs_data = []

    for sweep_id in sweep_ids:
        sweep_runs = api.sweep(f"{entity}/{project}/{sweep_id}").runs
        for run in sweep_runs:
            # Check if the run's state is 'finished'
            if run.state == "finished" or sweep_id == "cwhd81fy":
                dataset_name = run.config.get("dataset", "Unknown")
                if dataset_name in ["poly_binarised_decimalised_mod10_synth", "simple_trig_synth", "Unknown"]:
                    continue
                
                if run.config.get("as_MLP_baseline", False):
                    model_name = "MLP_same_capacity"
                else:
                    model_name = run.config.get("model", "Unknown")
                    if model_name == "fwal":
                        model_name = "F-Act"
                        
                    if run.config.get("hierarchical", False):
                        model_name = "F-Act hierarchical"
                        if run.config.get("share_mask", False):
                            model_name = "F-Act hierarchical shared"
                            if run.config.get("sigmoid_loss", False):
                                model_name = "F-Act hierarchical shared L_sig"
                run_data = {
                    "model": model_name,  # Assuming model name is stored in 'model_name'
                    "dataset": dataset_name,  # Assuming dataset name is stored in 'dataset_name'
                    "seed": run.config.get("seed_model_init", None),
                    "bestmodel_train/F1_weighted": run.summary.get("bestmodel_train/F1_weighted", float('nan')),
                    "bestmodel_valid/F1_weighted": run.summary.get("bestmodel_valid/F1_weighted", float('nan')),
                    "bestmodel_test/F1_weighted": run.summary.get("bestmodel_test/F1_weighted", float('nan')),
                }
                
                
                # Specific hyperparameters
                if model_name == "lasso":
                    run_data["lasso_C"] = run.config.get("lasso_C")
                    run_data["lasso_l1_ratio"] = run.config.get("lasso_l1_ratio")
                elif model_name == "rf":
                    run_data["rf_max_depth"] = run.config.get("rf_max_depth")
                elif model_name in ["cae", "supervised_cae"]:
                    run_data["CAE_neurons_ratio"] = run.config.get("CAE_neurons_ratio")
                    
                runs_data.append(run_data)

    return runs_data

def process_runs_data(runs_data):
    df = pd.DataFrame(runs_data)
    
    # Handling models with hyperparameter sweeps separately
    models_with_hyperparams = ['lasso', 'rf', 'cae', 'supervised_cae']
    best_hyperparams = {}
    
    for model in models_with_hyperparams:
        model_df = df[df['model'] == model]        
        
        # Further aggregate to find the best hyperparameter combination per dataset
        if model == 'lasso':
            best_combinations = model_df.groupby(['dataset', 'lasso_C', 'lasso_l1_ratio'])['bestmodel_valid/F1_weighted'].mean().reset_index()
        elif model == 'rf':
            best_combinations = model_df.groupby(['dataset', 'rf_max_depth'])['bestmodel_valid/F1_weighted'].mean().reset_index()
        elif model in ['cae', 'supervised_cae']:
            best_combinations = model_df.groupby(['dataset', 'CAE_neurons_ratio'])['bestmodel_valid/F1_weighted'].mean().reset_index()
        
        # Identify the best hyperparameters for each dataset
        best_combinations = best_combinations.loc[best_combinations.groupby('dataset')['bestmodel_valid/F1_weighted'].idxmax()]
        
        best_hyperparams[model] = best_combinations
        
        # Filter the original df to include only runs with the best hyperparameters
        if model == 'lasso':
            df = df[~((df['model'] == 'lasso') & ~df[['dataset', 'lasso_C', 'lasso_l1_ratio']].apply(tuple, 1).isin(best_combinations[['dataset', 'lasso_C', 'lasso_l1_ratio']].apply(tuple, 1)))]
        elif model == 'rf':
            df = df[~((df['model'] == 'rf') & ~df[['dataset', 'rf_max_depth']].apply(tuple, 1).isin(best_combinations[['dataset', 'rf_max_depth']].apply(tuple, 1)))]
        elif model == 'cae':
            df = df[~((df['model'] == 'cae') & ~df[['dataset', 'CAE_neurons_ratio']].apply(tuple, 1).isin(best_combinations[['dataset', 'CAE_neurons_ratio']].apply(tuple, 1)))]
        elif model == 'supervised_cae':
            df = df[~((df['model'] == 'supervised_cae') & ~df[['dataset', 'CAE_neurons_ratio']].apply(tuple, 1).isin(best_combinations[['dataset', 'CAE_neurons_ratio']].apply(tuple, 1)))]

    # Now, group by model and dataset to average across seeds for the final analysis
    final_grouped = df.groupby(['model', 'dataset']).mean().reset_index()
    
    # Printing best hyperparameters
    for model, params_df in best_hyperparams.items():
        print(f"Best hyperparameters for {model}:")
        print(params_df)
        print("\n")
    
    return final_grouped, best_hyperparams


# Fetch runs data
runs_data = fetch_runs(entity, project, sweep_ids)
# Process and group by model and dataset, then average
averaged_data, best_hyperparams = process_runs_data(runs_data)

Best hyperparameters for lasso:
                              dataset  lasso_C  lasso_l1_ratio  \
2                              COIL20     10.0            0.50   
21                             Isolet    100.0            0.25   
30                              MNIST     10.0            0.00   
39                               USPS     10.0            0.75   
53                            finance     10.0            0.50   
69                            gisette     10.0            0.75   
81                            madelon     10.0            0.00   
104                      mice_protein    100.0            0.75   
118  poly_binarised_decimalised_synth    100.0            0.50   

     bestmodel_valid/F1_weighted  
2                       0.982551  
21                      0.947942  
30                      0.911984  
39                      0.934424  
53                      0.595040  
69                      0.968095  
81                      0.548075  
104                     0.9

In [4]:
best_hyperparams.keys()
lasso_hyperparams = best_hyperparams['lasso']
rf_hyperparams = best_hyperparams['rf']
cae_hyperparams = best_hyperparams['cae']
supervised_cae_hyperparams = best_hyperparams['supervised_cae']

In [5]:
lasso_hyperparams.style.set_table_styles(
    [{'selector': 'th', 'props': [('background', '#606060'), ('color', 'white')]}]
).set_caption("Lasso Best Hyperparameters")

Unnamed: 0,dataset,lasso_C,lasso_l1_ratio,bestmodel_valid/F1_weighted
2,COIL20,10.0,0.5,0.982551
21,Isolet,100.0,0.25,0.947942
30,MNIST,10.0,0.0,0.911984
39,USPS,10.0,0.75,0.934424
53,finance,10.0,0.5,0.59504
69,gisette,10.0,0.75,0.968095
81,madelon,10.0,0.0,0.548075
104,mice_protein,100.0,0.75,0.948873
118,poly_binarised_decimalised_synth,100.0,0.5,0.502693


In [6]:
rf_hyperparams.style.set_table_styles(
    [{'selector': 'th', 'props': [('background', '#606060'), ('color', 'white')]}]
).set_caption("RF Best Hyperparameters")

Unnamed: 0,dataset,rf_max_depth,bestmodel_valid/F1_weighted
2,COIL20,7.0,0.970339
5,Isolet,7.0,0.921524
8,MNIST,7.0,0.910721
11,USPS,7.0,0.940427
14,finance,7.0,0.600702
17,gisette,7.0,0.949999
20,madelon,7.0,0.693339
23,mice_protein,7.0,0.95826
26,poly_binarised_decimalised_synth,7.0,0.506308


In [7]:
cae_hyperparams.style.set_table_styles(
    [{'selector': 'th', 'props': [('background', '#606060'), ('color', 'white')]}]
).set_caption("CAE Best Hyperparameters")

Unnamed: 0,dataset,CAE_neurons_ratio,bestmodel_valid/F1_weighted
9,COIL20,1.0,0.983572
17,Isolet,0.8,0.828027
29,USPS,1.0,0.904831
36,finance,0.7,0.611487
49,madelon,1.0,0.727068
55,mice_protein,0.6,0.894743
67,poly_binarised_decimalised_synth,0.8,0.419617


In [8]:
supervised_cae_hyperparams.style.set_table_styles(
    [{'selector': 'th', 'props': [('background', '#606060'), ('color', 'white')]}]
).set_caption("CAE Best Hyperparameters")

Unnamed: 0,dataset,CAE_neurons_ratio,bestmodel_valid/F1_weighted
0,COIL20,0.1,0.058931
11,Isolet,0.2,0.03976
20,USPS,0.1,0.21248
35,finance,0.6,0.547139
49,madelon,1.0,0.624305
59,mice_protein,1.0,0.167532
62,poly_binarised_decimalised_synth,0.3,0.368871


In [9]:
# Pivoting for 'bestmodel_train/F1_weighted'
train_pivot = averaged_data.pivot(index='model', columns='dataset', values='bestmodel_train/F1_weighted')
# print("Training F1 Weighted Averages:")
# print(train_pivot)
# print("\n")  # Add some space between tables

# Pivoting for 'bestmodel_valid/F1_weighted'
valid_pivot = averaged_data.pivot(index='model', columns='dataset', values='bestmodel_valid/F1_weighted')
# print("Validation F1 Weighted Averages:")
# print(valid_pivot)
# print("\n")  # Add some space between tables

# Pivoting for 'bestmodel_test/F1_weighted'
test_pivot = averaged_data.pivot(index='model', columns='dataset', values='bestmodel_test/F1_weighted')
# print("Test F1 Weighted Averages:")
# print(test_pivot)

def highlight_max(s):
    '''
    Highlight the maximum in a Series by changing font color to red and making it bold.
    '''
    is_max = s == s.max()
    return ['color: green; font-weight: bold' if v else '' for v in is_max]


table_styles = [
    {'selector': 'th',
     'props': [('background', '#606060'), ('color', 'white')]
    }
]

In [10]:
train_pivot.style.set_table_styles(table_styles).set_caption("Training F1 Weighted Averages").apply(highlight_max)

dataset,COIL20,Isolet,MNIST,USPS,finance,gisette,madelon,mice_protein,poly_binarised_decimalised_synth
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
F-Act,1.0,1.0,,0.997641,,,1.0,0.998956,0.434349
F-Act hierarchical,0.997839,0.999628,,0.997638,0.653828,,0.972438,0.913885,0.043662
F-Act hierarchical shared,1.0,0.998127,,0.977601,0.620791,,0.91942,0.849923,0.164884
F-Act hierarchical shared L_sig,1.0,0.9959,,0.976013,0.644653,,0.939221,0.808824,0.13641
MLP_same_capacity,0.998274,0.998511,,0.982363,,,0.820071,1.0,0.398638
SEFS,0.964073,0.935815,,0.948776,,,0.726851,0.883796,0.328577
cae,1.0,0.962531,,0.930945,0.6566,,0.873945,0.962683,0.445899
lasso,1.0,1.0,0.948401,0.994983,0.670358,1.0,0.80383,0.980407,0.525506
rf,0.986318,0.984742,0.92146,0.957755,0.877761,0.97373,0.99594,1.0,0.742473
supervised_cae,0.054281,0.040158,,0.207287,0.537526,,0.610881,0.191261,0.368529


In [11]:
valid_pivot.style.set_table_styles(table_styles).set_caption("Valid F1 Weighted Averages").apply(highlight_max)

dataset,COIL20,Isolet,MNIST,USPS,finance,gisette,madelon,mice_protein,poly_binarised_decimalised_synth
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
F-Act,0.994076,0.921656,,0.968271,,,0.598737,0.989262,0.421682
F-Act hierarchical,0.991894,0.898365,,0.968652,0.583989,,0.783955,0.874523,0.043705
F-Act hierarchical shared,0.989335,0.878884,,0.943962,0.579279,,0.585823,0.795298,0.163123
F-Act hierarchical shared L_sig,0.991719,0.876227,,0.94946,0.589581,,0.579508,0.75379,0.133794
MLP_same_capacity,0.987963,0.906103,,0.966356,,,0.568512,0.975422,0.383097
SEFS,0.945834,0.81743,,0.940525,,,0.650343,0.858909,0.332206
cae,0.983572,0.828027,,0.904831,0.611487,,0.727068,0.894743,0.419617
lasso,0.982551,0.947942,0.911984,0.934424,0.59504,0.968095,0.548075,0.948873,0.502693
rf,0.970339,0.921524,0.910721,0.940427,0.600702,0.949999,0.693339,0.95826,0.506308
supervised_cae,0.058931,0.03976,,0.21248,0.547139,,0.624305,0.167532,0.368871


In [12]:
test_pivot.style.set_table_styles(table_styles).set_caption("Test F1 Weighted Averages").apply(highlight_max)

dataset,COIL20,Isolet,MNIST,USPS,finance,gisette,madelon,mice_protein,poly_binarised_decimalised_synth
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
F-Act,0.991837,0.919414,,0.964203,,,0.578604,0.978521,0.415785
F-Act hierarchical,0.984957,0.87573,,0.963844,0.579871,,0.762789,0.887254,0.043767
F-Act hierarchical shared,0.990768,0.868285,,0.942659,0.5912,,0.590249,0.761938,0.163508
F-Act hierarchical shared L_sig,0.983769,0.867636,,0.945527,0.598617,,0.574465,0.745075,0.137366
MLP_same_capacity,0.98362,0.895551,,0.959278,,,0.565471,0.972048,0.379494
SEFS,0.943247,0.801379,,0.933565,,,0.613505,0.82017,0.330745
cae,0.970427,0.801414,,0.904749,0.592564,,0.701883,0.853472,0.434222
lasso,0.982413,0.945832,0.910755,0.933578,0.597777,0.978571,0.51532,0.941626,0.483242
rf,0.967034,0.900926,0.911655,0.933634,0.619456,0.955949,0.671629,0.975191,0.505766
supervised_cae,0.064637,0.036833,,0.209018,0.544434,,0.618427,0.172394,0.369297
