In [2]:
import wandb
import pandas as pd

In [1]:

entity = "evangeorgerex"
project = "fwal"

sweep_ids = [
    "zkvbu18r", # lasso baseline TODO filter by best hyperparameters
    "9tjs7cai", # fwal as mlp baseline
    "22eckaxx", # rf baseline
    "nmsvf0u4", # fwal new datasets
    "gty91unf", # rf baseline
    "efjov5qc", # lasso baseline
    "zkvbu18r", # lasso baseline more
    "5kxy8gl8", # fwal as mlp baseline
    "t1z9hleb", # fwal tti 
    "3ubxo03v", # finish fwal tti
    "t8i7yh69", # fwal gisette
]

In [3]:
def fetch_runs(entity, project, sweep_ids):
    api = wandb.Api()
    runs_data = []

    for sweep_id in sweep_ids:
        sweep_runs = api.sweep(f"{entity}/{project}/{sweep_id}").runs
        for run in sweep_runs:
            # Check if the run's state is 'finished'
            if run.state == "finished":
                dataset_name = run.config.get("dataset", "Unknown")
                if dataset_name in ["poly_binarised_decimalised_mod10_synth", "simple_trig_synth", "Unknown"]:
                    continue
                
                if run.config.get("as_MLP_baseline", False):
                    model_name = "fwal_as_MLP_baseline"
                else:
                    model_name = run.config.get("model", "Unknown")
                run_data = {
                    "model": model_name,  # Assuming model name is stored in 'model_name'
                    "dataset": dataset_name,  # Assuming dataset name is stored in 'dataset_name'
                    "seed": run.config.get("seed_model_init", None),
                    "bestmodel_train/F1_weighted": run.summary.get("bestmodel_train/F1_weighted", float('nan')),
                    "bestmodel_valid/F1_weighted": run.summary.get("bestmodel_valid/F1_weighted", float('nan')),
                    "bestmodel_test/F1_weighted": run.summary.get("bestmodel_test/F1_weighted", float('nan')),
                }
                
                
                # Specific hyperparameters
                if model_name == "lasso":
                    run_data["lasso_C"] = run.config.get("lasso_C")
                    run_data["lasso_l1_ratio"] = run.config.get("lasso_l1_ratio")
                elif model_name == "rf":
                    run_data["rf_max_depth"] = run.config.get("rf_max_depth")
                    
                runs_data.append(run_data)

    return runs_data

def process_runs_data(runs_data):
    df = pd.DataFrame(runs_data)
    
    # Handling models with hyperparameter sweeps separately
    models_with_hyperparams = ['lasso', 'rf']
    best_hyperparams = {}
    
    for model in models_with_hyperparams:
        model_df = df[df['model'] == model]        
        
        # Further aggregate to find the best hyperparameter combination per dataset
        if model == 'lasso':
            best_combinations = model_df.groupby(['dataset', 'lasso_C', 'lasso_l1_ratio'])['bestmodel_valid/F1_weighted'].mean().reset_index()
        elif model == 'rf':
            best_combinations = model_df.groupby(['dataset', 'rf_max_depth'])['bestmodel_valid/F1_weighted'].mean().reset_index()
        
        # Identify the best hyperparameters for each dataset
        best_combinations = best_combinations.loc[best_combinations.groupby('dataset')['bestmodel_valid/F1_weighted'].idxmax()]
        
        best_hyperparams[model] = best_combinations
        
        # Filter the original df to include only runs with the best hyperparameters
        if model == 'lasso':
            df = df[~((df['model'] == 'lasso') & ~df[['dataset', 'lasso_C', 'lasso_l1_ratio']].apply(tuple, 1).isin(best_combinations[['dataset', 'lasso_C', 'lasso_l1_ratio']].apply(tuple, 1)))]
        elif model == 'rf':
            df = df[~((df['model'] == 'rf') & ~df[['dataset', 'rf_max_depth']].apply(tuple, 1).isin(best_combinations[['dataset', 'rf_max_depth']].apply(tuple, 1)))]

    # Now, group by model and dataset to average across seeds for the final analysis
    final_grouped = df.groupby(['model', 'dataset']).mean().reset_index()
    
    # Printing best hyperparameters
    for model, params_df in best_hyperparams.items():
        print(f"Best hyperparameters for {model}:")
        print(params_df)
        print("\n")
    
    return final_grouped, best_hyperparams


# Fetch runs data
runs_data = fetch_runs(entity, project, sweep_ids)
# Process and group by model and dataset, then average
averaged_data, best_hyperparams = process_runs_data(runs_data)

Best hyperparameters for lasso:
                             dataset  lasso_C  lasso_l1_ratio  \
2                             COIL20     10.0            0.50   
21                            Isolet    100.0            0.25   
30                             MNIST     10.0            0.00   
39                              USPS     10.0            0.75   
45                           gisette     10.0            0.75   
57                           madelon     10.0            0.00   
80                      mice_protein    100.0            0.75   
94  poly_binarised_decimalised_synth    100.0            0.50   

    bestmodel_valid/F1_weighted  
2                      0.982551  
21                     0.947942  
30                     0.911984  
39                     0.934424  
45                     0.968095  
57                     0.548075  
80                     0.948873  
94                     0.502693  


Best hyperparameters for rf:
                             dataset  rf_max_

In [4]:
best_hyperparams.keys()
lasso_hyperparams = best_hyperparams['lasso']
rf_hyperparams = best_hyperparams['rf']

In [5]:
lasso_hyperparams.style.set_table_styles(
    [{'selector': 'th', 'props': [('background', '#606060'), ('color', 'white')]}]
).set_caption("Lasso Best Hyperparameters")

Unnamed: 0,dataset,lasso_C,lasso_l1_ratio,bestmodel_valid/F1_weighted
2,COIL20,10.0,0.5,0.982551
21,Isolet,100.0,0.25,0.947942
30,MNIST,10.0,0.0,0.911984
39,USPS,10.0,0.75,0.934424
45,gisette,10.0,0.75,0.968095
57,madelon,10.0,0.0,0.548075
80,mice_protein,100.0,0.75,0.948873
94,poly_binarised_decimalised_synth,100.0,0.5,0.502693


In [6]:
rf_hyperparams.style.set_table_styles(
    [{'selector': 'th', 'props': [('background', '#606060'), ('color', 'white')]}]
).set_caption("RF Best Hyperparameters")

Unnamed: 0,dataset,rf_max_depth,bestmodel_valid/F1_weighted
2,COIL20,7.0,0.970339
5,Isolet,7.0,0.921524
8,MNIST,7.0,0.910721
11,USPS,7.0,0.940427
14,gisette,7.0,0.949999
17,madelon,7.0,0.693339
20,mice_protein,7.0,0.95826
23,poly_binarised_decimalised_synth,7.0,0.506308


In [7]:
# Pivoting for 'bestmodel_train/F1_weighted'
train_pivot = averaged_data.pivot(index='model', columns='dataset', values='bestmodel_train/F1_weighted')
# print("Training F1 Weighted Averages:")
# print(train_pivot)
# print("\n")  # Add some space between tables

# Pivoting for 'bestmodel_valid/F1_weighted'
valid_pivot = averaged_data.pivot(index='model', columns='dataset', values='bestmodel_valid/F1_weighted')
# print("Validation F1 Weighted Averages:")
# print(valid_pivot)
# print("\n")  # Add some space between tables

# Pivoting for 'bestmodel_test/F1_weighted'
test_pivot = averaged_data.pivot(index='model', columns='dataset', values='bestmodel_test/F1_weighted')
# print("Test F1 Weighted Averages:")
# print(test_pivot)


In [8]:
train_pivot.style.set_table_styles(
    [{'selector': 'th', 'props': [('background', '#606060'), ('color', 'white')]}]
).set_caption("Training F1 Weighted Averages")

dataset,COIL20,Isolet,MNIST,USPS,gisette,madelon,mice_protein,poly_binarised_decimalised_synth
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
fwal,1.0,0.996278,,0.98579,,0.99197,0.913021,0.331054
fwal_as_MLP_baseline,0.973832,0.856391,0.979862,0.986622,0.997233,0.571231,0.982732,0.404778
lasso,1.0,1.0,0.948401,0.994983,1.0,0.80383,0.980407,0.525506
rf,0.986318,0.984742,0.92146,0.957755,0.97373,0.99594,1.0,0.742473


In [9]:
valid_pivot.style.set_table_styles(
    [{'selector': 'th', 'props': [('background', '#606060'), ('color', 'white')]}]
).set_caption("Valid F1 Weighted Averages")

dataset,COIL20,Isolet,MNIST,USPS,gisette,madelon,mice_protein,poly_binarised_decimalised_synth
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
fwal,0.997788,0.964075,,0.979418,,0.908867,0.899228,0.331132
fwal_as_MLP_baseline,0.968474,0.818954,0.975369,0.980403,0.990478,0.541173,0.962261,0.402096
lasso,0.982551,0.947942,0.911984,0.934424,0.968095,0.548075,0.948873,0.502693
rf,0.970339,0.921524,0.910721,0.940427,0.949999,0.693339,0.95826,0.506308


In [10]:
test_pivot.style.set_table_styles(
    [{'selector': 'th', 'props': [('background', '#606060'), ('color', 'white')]}]
).set_caption("Test F1 Weighted Averages")

dataset,COIL20,Isolet,MNIST,USPS,gisette,madelon,mice_protein,poly_binarised_decimalised_synth
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
fwal,0.994792,0.942302,,0.974443,,0.858918,0.886771,0.330347
fwal_as_MLP_baseline,0.962932,0.794518,0.971815,0.976441,0.98748,0.520425,0.950662,0.399815
lasso,0.982413,0.945832,0.910755,0.933578,0.978571,0.51532,0.941626,0.483242
rf,0.967034,0.900926,0.911655,0.933634,0.955949,0.671629,0.975191,0.505766
