In [1]:

import wandb
import pandas as pd
import os
import numpy as np

In [2]:
api = wandb.Api()

entity = "evangeorgerex"
project = "fwal"

#### Utilities

In [3]:
results_dir = r'/home/er647/projects/feature-wise-active-learning/results'

In [46]:
def get_tables(averaged_data):
    # Pivoting for 'bestmodel_train/F1_weighted'
    train_pivot = averaged_data.pivot(index='model', columns='dataset', values='bestmodel_train/F1_weighted')
    # print("Training F1 Weighted Averages:")
    # print(train_pivot)
    # print("\n")  # Add some space between tables

    # Pivoting for 'bestmodel_valid/F1_weighted'
    valid_pivot = averaged_data.pivot(index='model', columns='dataset', values='bestmodel_valid/F1_weighted')
    # print("Validation F1 Weighted Averages:")
    # print(valid_pivot)
    # print("\n")  # Add some space between tables

    # Pivoting for 'bestmodel_test/F1_weighted'
    test_pivot = averaged_data.pivot(index='model', columns='dataset', values='bestmodel_test/F1_weighted')
    # print("Test F1 Weighted Averages:")
    # print(test_pivot)
    for df in [train_pivot, valid_pivot, test_pivot]:
        # df.drop('MNIST', axis=1, inplace=True)
        # df.drop('gisette', axis=1, inplace=True) 
        df.drop('poly_binarised_decimalised_synth', axis=1, inplace=True)

    def highlight_max(s):
        '''
        Highlight the maximum in a Series by changing font color to red and making it bold.
        '''
        is_max = s == s.max()
        return ['color: green; font-weight: bold' if v else '' for v in is_max]


    table_styles = [
        {'selector': 'th',
        'props': [('background', '#606060'), ('color', 'white')]
        }
    ]
    
    train_table = train_pivot.style.set_table_styles(table_styles).set_caption("Training F1 Weighted Averages").apply(highlight_max)
    valid_table = valid_pivot.style.set_table_styles(table_styles).set_caption("Valid F1 Weighted Averages").apply(highlight_max)
    test_table = test_pivot.style.set_table_styles(table_styles).set_caption("Test F1 Weighted Averages").apply(highlight_max)
    
    return train_table, valid_table, test_table
import pandas as pd

import pandas as pd

def get_tables(averaged_data):
    # Function to format mean and standard deviation into a single string
    def format_mean_std(row, metric):
        mean = row[(metric, 'mean')]
        std = row[(metric, 'std')]
        return f"{mean:.3f} ± {std:.3f}"

    # Create formatted columns for each metric
    metrics = ['bestmodel_train/F1_weighted', 'bestmodel_valid/F1_weighted', 'bestmodel_test/F1_weighted']
    for metric in metrics:
        averaged_data[metric] = averaged_data.apply(format_mean_std, axis=1, metric=metric)

    # Pivot tables for each metric
    def pivot_data(averaged_data, value_col):
        return averaged_data.pivot(index='model', columns='dataset', values=value_col)

    # Create pivot tables
    train_pivot = pivot_data(averaged_data, 'bestmodel_train/F1_weighted')
    valid_pivot = pivot_data(averaged_data, 'bestmodel_valid/F1_weighted')
    test_pivot = pivot_data(averaged_data, 'bestmodel_test/F1_weighted')

    # Function to highlight the maximum in a Series
    def highlight_max(s):
        ''' Highlight the maximum in a Series yellow. '''
        is_max = s == s.max()
        return ['background-color: yellow' if v else '' for v in is_max]

    # Styling for the tables
    table_styles = [{'selector': 'th', 'props': [('background-color', '#606060'), ('color', 'white')]}]
    
    # Apply styles and highlight to the pivot tables
    train_table = train_pivot.style.set_table_styles(table_styles).apply(highlight_max)
    valid_table = valid_pivot.style.set_table_styles(table_styles).apply(highlight_max)
    test_table = test_pivot.style.set_table_styles(table_styles).apply(highlight_max)

    return train_table, valid_table, test_table

In [47]:
def summarize_performance(df):
    # Grouping by dataset and model, then calculating mean and std deviation
    grouped = df.groupby(['model', 'dataset']).agg(['mean', 'std'])

    # Extracting specific metrics for each of train, valid, and test
    train_results = grouped['bestmodel_train/F1_weighted']
    valid_results = grouped['bestmodel_valid/F1_weighted']
    test_results = grouped['bestmodel_test/F1_weighted']

    # Function to format the results as "mean ± std"
    format_results = lambda x: x['mean'].round(4).astype(str) + " ± " + x['std'].round(4).astype(str)

    # Creating tables for train, valid, test with formatted mean±std
    train_table = train_results.apply(format_results, axis=1).unstack(level=1)
    valid_table = valid_results.apply(format_results, axis=1).unstack(level=1)
    test_table = test_results.apply(format_results, axis=1).unstack(level=1)

    return train_table, valid_table, test_table


In [45]:
def fetch_runs(entity, project, sweep_ids):
    api = wandb.Api()
    runs_data = []

    for sweep_id in sweep_ids:
        sweep_runs = api.sweep(f"{entity}/{project}/{sweep_id}").runs
        for run in sweep_runs:
            # Check if the run's state is 'finished'
            if run.state == "finished" or sweep_id == "cwhd81fy":
                dataset_name = run.config.get("dataset", "Unknown")
                if dataset_name in ["poly_binarised_decimalised_mod10_synth", "simple_trig_synth", "Unknown"]:
                    continue
                
                if run.config.get("as_MLP_baseline", False):
                    model_name = "MLP_same_capacity"
                else:
                    model_name = run.config.get("model", "Unknown")
                    
                    if model_name == "fwal":
                        model_name = "F-Act"
                        
                    # if run.config.get("hierarchical", False):
                    #     model_name = "F-Act hierarchical"
                    #     if run.config.get("share_mask", False):
                    #         model_name = "F-Act hierarchical shared"
                    #         if run.config.get("sigmoid_loss", False):
                    #             model_name = "F-Act hierarchical shared L_sig"
                    # if sweep_id == "x9s9n05b":
                    #     model_name = "F-Act hierarchical TTI (old)"
                    # if sweep_id in ["9npryg91", "7i2o46ri"]:
                    #     model_name = "F-Act hierarchical TTI"
                run_data = {
                    "model": model_name,  # Assuming model name is stored in 'model_name'
                    "dataset": dataset_name,  # Assuming dataset name is stored in 'dataset_name'
                    "seed": run.config.get("seed_model_init", None),
                    "bestmodel_train/F1_weighted": run.summary.get("bestmodel_train/F1_weighted", float('nan')),
                    "bestmodel_valid/F1_weighted": run.summary.get("bestmodel_valid/F1_weighted", float('nan')),
                    "bestmodel_test/F1_weighted": run.summary.get("bestmodel_test/F1_weighted", float('nan')),
                }
                
                
                # Specific hyperparameters
                if model_name == "lasso":
                    run_data["lasso_C"] = run.config.get("lasso_C")
                    run_data["lasso_l1_ratio"] = run.config.get("lasso_l1_ratio")
                elif model_name == "rf":
                    run_data["rf_max_depth"] = run.config.get("rf_max_depth")
                elif model_name in ["cae", "supervised_cae"]:
                    run_data["CAE_neurons_ratio"] = run.config.get("CAE_neurons_ratio")
                elif model_name == "F-Act hierarchical TTI":
                    run_data["sparsity_regularizer_hyperparam"] = run.config.get("sparsity_regularizer_hyperparam")
                elif model_name == "F-Act hierarchical TTI (old)":
                    run_data["sparsity_regularizer_hyperparam"] = run.config.get("sparsity_regularizer_hyperparam")
                elif model_name == "F-Act":
                    if sweep_id in ["bga4xu49"]:
                        run_data['dropout'] = 0.2
                        run_data["lr"] = run.config.get("lr")
                        run_data["num_hidden"] = run.config.get("num_hidden")
                        run_data["sparsity_regularizer_hyperparam"] = run.config.get("sparsity_regularizer_hyperparam")
                    else:
                        # sweep_id == "7i2o46ri":
                        run_data['dropout'] = 0.0
                        run_data["lr"] = run.config.get("lr")
                        run_data["num_hidden"] = 4
                        run_data["sparsity_regularizer_hyperparam"] = run.config.get("sparsity_regularizer_hyperparam")
                elif model_name == "xgboost":
                    run_data["xgb_eta"] = run.config.get("xgb_eta")
                    run_data["xgb_max_depth"] = run.config.get("xgb_max_depth")
                elif model_name in ["MLP_same_capacity", "SEFS"]:
                    run_data["lr"] = run.config.get("lr")
                    run_data["num_hidden"] = run.config.get("num_hidden")
                    
                    
                    
                runs_data.append(run_data)
                if run_data['model'] == 'F-Act':
                    
                    opt_tti_valid, opt_tti_test, full_tti_valid, full_tti_test = get_tti_results(run)
                    opt_tti_run_data = run_data.copy()
                    opt_tti_run_data['bestmodel_valid/F1_weighted'] = opt_tti_valid
                    opt_tti_run_data['bestmodel_test/F1_weighted'] = opt_tti_test
                    opt_tti_run_data['model'] = 'F-Act (optimal TTI)'
                    runs_data.append(opt_tti_run_data)
                    
                    full_tti_run_data = run_data.copy()
                    full_tti_run_data['bestmodel_valid/F1_weighted'] = full_tti_valid
                    full_tti_run_data['bestmodel_test/F1_weighted'] = full_tti_test
                    full_tti_run_data['model'] = 'F-Act (full TTI)'
                    runs_data.append(full_tti_run_data)
                
    return runs_data

def process_runs_data(runs_data):
    df = pd.DataFrame(runs_data)
    
    # Models with hyperparameter sweeps
    models_with_hyperparams = [
        'lasso', 'rf', 'cae', 'supervised_cae', 'F-Act hierarchical TTI', 'F-Act hierarchical TTI (old)',
        'F-Act', 'MLP_same_capacity', 'SEFS', 'xgboost', 
    ]
    best_hyperparams = {}
    
    models_stored =  df['model'].unique()
    for model in models_with_hyperparams:
        if model not in models_stored:
            continue
        model_df = df[df['model'] == model]

        # For the 'F-Act' model, first find the best seed for each combination of hyperparameters
        if model == 'F-Act':
            max_seed_df = model_df.groupby(['dataset', 'sparsity_regularizer_hyperparam', 'lr', 'num_hidden', 'dropout', 'seed'])['bestmodel_valid/F1_weighted'].idxmax()
            model_df = model_df.loc[max_seed_df]
        
        # Further aggregate to find the best hyperparameter combination per dataset
        group_columns = {
            'lasso': ['dataset', 'lasso_C', 'lasso_l1_ratio'],
            'rf': ['dataset', 'rf_max_depth'],
            'cae': ['dataset', 'CAE_neurons_ratio'],
            'supervised_cae': ['dataset', 'CAE_neurons_ratio'],
            'F-Act hierarchical TTI': ['dataset', 'sparsity_regularizer_hyperparam'],
            'F-Act hierarchical TTI (old)': ['dataset', 'sparsity_regularizer_hyperparam'],
            'F-Act': ['dataset', 'sparsity_regularizer_hyperparam', 'lr', 'num_hidden', 'dropout'],
            'xgboost': ['dataset', 'xgb_eta', 'xgb_max_depth'],
            'MLP_same_capacity': ['dataset', 'lr', 'num_hidden'],
            'SEFS': ['dataset', 'lr', 'num_hidden']
        }

        best_combinations = model_df.groupby(group_columns[model])['bestmodel_valid/F1_weighted'].mean().reset_index()
        # Identify the best hyperparameters for each dataset
        best_combinations = best_combinations.loc[best_combinations.groupby('dataset')['bestmodel_valid/F1_weighted'].idxmax()]
        best_hyperparams[model] = best_combinations
        
        # Create a tuple representation for easy comparison
        best_combinations_tuples = best_combinations[group_columns[model]].apply(tuple, axis=1)

        # Filter the original df to include only runs with the best hyperparameters for the given model
        df = df[~((df['model'] == model) & ~df[group_columns[model]].apply(tuple, axis=1).isin(best_combinations_tuples)) | (df['model'] != model)]
    
    # Now, group by model and dataset to average across seeds for the final analysis
    # final_grouped = df.groupby(['model', 'dataset']).mean().reset_index()
    
    # Printing best hyperparameters for each model
    # for model, params_df in best_hyperparams.items():
    #     print(f"Best hyperparameters for {model}:")
    #     print(params_df)
    #     print("\n")
    
    # return final_grouped, best_hyperparams
    return df, best_hyperparams

# F-Act

In [15]:
def get_tti_results(run):
    '''
    returns:
        float: optimal test-time intervention validation f1
        float: optimal test-time intervention test f1
        float: full test-time intervention validation f1
        float: full test-time intervention test f1
    '''

    f1_weighted = []
    num_features = []

    history_dict = {}

    # Iterate through the run history to extract the desired metrics
    keys = [
        "tti_valid_metrics.F1_weighted", "tti_test_metrics.F1_weighted", "num_additional_features"
        ]
    for row in run.scan_history(keys=keys):
        # print('.')
        # Check if both keys exist in the current row
        for key in keys:
            if key in row and row[key] is not None:
                if key not in history_dict:
                    history_dict[key] = []
                history_dict[key].append(row[key])
            else:
                # print(key)
                continue
            
    max_index = np.argmax(history_dict['tti_valid_metrics.F1_weighted'])
    opt_tti_valid = history_dict['tti_valid_metrics.F1_weighted'][max_index]
    opt_tti_test = history_dict['tti_test_metrics.F1_weighted'][max_index]
    
    full_tti_valid = history_dict['tti_valid_metrics.F1_weighted'][-1]
    full_tti_test = history_dict['tti_test_metrics.F1_weighted'][-1]
    
    return opt_tti_valid, opt_tti_test, full_tti_valid, full_tti_test 
    

In [88]:
run_ids = [
    "bfi5l6qu" , # COIL20
    "ihpfigr4" , # COIL20
    "rhrqy9bb" , # COIL20
    "sj64pl05" , # Isolet
    "dtv0wjox" , # Isolet
    "gp378vp4" , # Isolet
    "vqcdnueq" , # PBMC
    # '3e7c2ala' , # PBMC
    # '9qx2b2na' , # PBMC
    "b8nrupvc" , # USPS
    "rcacu3yt" , # USPS
    "swbk9dsz" , # USPS
    "sawnisk5" , # finance
    "dqq6q5ej" , # finance
    "33iwvq59" , # finance
    "ko5c4p3v" , # madelon
    "h4m5vvcm" , # madelon
    "zyuohsyk" , # madelon
    "5le875vr" , # mice_protein
    "6utw0s44" , # mice_protein
    "r46u12zd" , # mice_protein
    "n0mcba2f" , # poly_binarised_decimalised_synth
    "94j8ch8b" , # poly_binarised_decimalised_synth
    "qskac7qn" , # poly_binarised_decimalised_synth
]
hyperparams = ['dataset', 'sparsity_regularizer_hyperparam', 'lr', 'num_hidden', 'dropout']
metrics = ['bestmodel_train/F1_weighted', 'bestmodel_valid/F1_weighted', 'bestmodel_test/F1_weighted']
model_name = 'F-Act'

In [89]:
run_ids = [
    "vqcdnueq" , # PBMC
    '3e7c2ala' , # PBMC
    '9qx2b2na' , # PBMC
]
hyperparams = ['dataset', 'sparsity_regularizer_hyperparam', 'lr', 'num_hidden', 'dropout']
metrics = ['bestmodel_train/F1_weighted', 'bestmodel_valid/F1_weighted', 'bestmodel_test/F1_weighted']
model_name = 'F-Act'

In [None]:
assert False
# Please don't auto run the cells below.

In [90]:
runs_data = []
for run_id in run_ids:
    run = api.run(f"{entity}/{project}/{run_id}")
    
    run_data = {
    "model": 'F-Act',  # Assuming model name is stored in 'model_name'
    "dataset": run.config.get("dataset", None),  # Assuming dataset name is stored in 'dataset_name'
    "seed": run.config.get("seed_model_init", None),
    "bestmodel_train/F1_weighted": run.summary.get("bestmodel_train/F1_weighted", float('nan')),
    "bestmodel_valid/F1_weighted": run.summary.get("bestmodel_valid/F1_weighted", float('nan')),
    "bestmodel_test/F1_weighted": run.summary.get("bestmodel_test/F1_weighted", float('nan')),
    }
    
    runs_data.append(run_data)
    
    opt_tti_valid, opt_tti_test, full_tti_valid, full_tti_test = get_tti_results(run)
    opt_tti_run_data = run_data.copy()
    opt_tti_run_data['bestmodel_valid/F1_weighted'] = opt_tti_valid
    opt_tti_run_data['bestmodel_test/F1_weighted'] = opt_tti_test
    opt_tti_run_data['model'] = 'F-Act (optimal TTI)'
    runs_data.append(opt_tti_run_data)
    
    full_tti_run_data = run_data.copy()
    full_tti_run_data['bestmodel_valid/F1_weighted'] = full_tti_valid
    full_tti_run_data['bestmodel_test/F1_weighted'] = full_tti_test
    full_tti_run_data['model'] = 'F-Act (full TTI)'
    runs_data.append(full_tti_run_data)
    

In [26]:
pd.DataFrame(runs_data)

Unnamed: 0,model,dataset,seed,bestmodel_train/F1_weighted,bestmodel_valid/F1_weighted,bestmodel_test/F1_weighted
0,F-Act,COIL20,2,1.000000,0.985809,0.982589
1,F-Act (optimal TTI),COIL20,2,1.000000,0.992923,0.989579
2,F-Act (full TTI),COIL20,2,1.000000,0.989451,0.989548
3,F-Act,COIL20,1,1.000000,0.992920,0.982490
4,F-Act (optimal TTI),COIL20,1,1.000000,0.992923,0.986231
...,...,...,...,...,...,...
61,F-Act (optimal TTI),poly_binarised_decimalised_synth,1,0.287016,0.445471,0.457345
62,F-Act (full TTI),poly_binarised_decimalised_synth,1,0.287016,0.414036,0.407769
63,F-Act,poly_binarised_decimalised_synth,0,0.314253,0.280263,0.281314
64,F-Act (optimal TTI),poly_binarised_decimalised_synth,0,0.314253,0.435732,0.414680


In [91]:
pd.DataFrame(runs_data).groupby(['dataset', 'model']).agg(['mean', 'std']).reset_index()

Unnamed: 0_level_0,dataset,model,seed,seed,bestmodel_train/F1_weighted,bestmodel_train/F1_weighted,bestmodel_valid/F1_weighted,bestmodel_valid/F1_weighted,bestmodel_test/F1_weighted,bestmodel_test/F1_weighted
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
0,PBMC,F-Act,1.666667,0.57735,0.99671,0.004353,0.860394,0.014468,0.842445,0.024193
1,PBMC,F-Act (full TTI),1.666667,0.57735,0.99671,0.004353,0.842534,0.014366,0.870076,0.037357
2,PBMC,F-Act (optimal TTI),1.666667,0.57735,0.99671,0.004353,0.881119,0.002763,0.868122,0.034493


In [48]:
fact_df =pd.DataFrame(runs_data)

In [49]:
# Apply the function to the existing DataFrame
train_table, valid_table, test_table = summarize_performance(fact_df)

test_table

dataset,COIL20,Isolet,PBMC,USPS,finance,madelon,mice_protein,poly_binarised_decimalised_synth
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
F-Act,0.986 ± 0.0061,0.9197 ± 0.0056,0.8317 ± nan,0.9683 ± 0.0011,0.6029 ± 0.0162,0.7225 ± 0.0214,0.9877 ± 0.0026,0.2683 ± 0.0231
F-Act (full TTI),0.9826 ± 0.006,0.9286 ± 0.0118,0.9131 ± nan,0.9603 ± 0.002,0.5995 ± 0.0134,0.7022 ± 0.0594,0.9846 ± 0.0116,0.401 ± 0.018
F-Act (optimal TTI),0.9884 ± 0.0019,0.9286 ± 0.0118,0.8987 ± nan,0.9595 ± 0.0031,0.5981 ± 0.019,0.729 ± 0.0233,0.983 ± 0.0107,0.4176 ± 0.0384


In [17]:
# fact_df.to_csv(os.path.join(results_dir, 'fact.csv'))

In [9]:
# load fact_df from csv:
fact_df = pd.read_csv(os.path.join(results_dir, 'fact.csv'), index_col=0)

# Lasso

In [43]:
lasso_sweep_ids = [
    'rdvu3fg8', # everything except finance
    'xpxe64ee', # finance
]

In [44]:
# Fetch and process the data
lasso_runs_data = fetch_runs(entity, project, lasso_sweep_ids)

In [50]:
lasso_df, lasso_best_hyperparams = process_runs_data(lasso_runs_data)

In [29]:
lasso_averaged_data, lasso_best_hyperparams = process_runs_data(lasso_runs_data)


Best hyperparameters for lasso:
                              dataset  lasso_C  lasso_l1_ratio  \
2                              COIL20       10            0.50   
21                             Isolet      100            0.25   
30                               PBMC       10            0.00   
48                               USPS       10            0.75   
62                            finance       10            0.50   
75                            madelon       10            0.00   
95                       mice_protein      100            0.00   
112  poly_binarised_decimalised_synth      100            0.50   

     bestmodel_valid/F1_weighted  
2                       0.982551  
21                      0.947942  
30                      0.873614  
48                      0.934424  
62                      0.595040  
75                      0.548075  
95                      0.964513  
112                     0.502693  




In [57]:
lasso_averaged_data.to_csv(os.path.join(results_dir, 'lasso.csv'))


In [81]:
lasso_averaged_data

Unnamed: 0,model,dataset,seed,bestmodel_train/F1_weighted,bestmodel_valid/F1_weighted,bestmodel_test/F1_weighted,lasso_C,lasso_l1_ratio
0,lasso,COIL20,1.0,1.0,0.982551,0.982413,10.0,0.5
1,lasso,Isolet,1.0,1.0,0.947942,0.945832,100.0,0.25
2,lasso,PBMC,1.0,1.0,0.873614,0.892459,10.0,0.0
3,lasso,USPS,1.0,0.994983,0.934424,0.933578,10.0,0.75
4,lasso,finance,1.0,0.670358,0.59504,0.597777,10.0,0.5
5,lasso,madelon,1.0,0.80383,0.548075,0.51532,10.0,0.0
6,lasso,mice_protein,1.0,0.988156,0.964513,0.952401,100.0,0.0
7,lasso,poly_binarised_decimalised_synth,1.0,0.525506,0.502693,0.483242,100.0,0.5


In [11]:
lasso_df = pd.read_csv(os.path.join(results_dir, 'lasso.csv'), index_col=0)

# XGBoost

In [52]:
xgb_sweep_ids = [
    'yr11wqmg', # everything
]


In [53]:
# Fetch and process the data
xgb_runs_data = fetch_runs(entity, project, xgb_sweep_ids)

In [54]:
xgb_df, xgb_best_hyperparams = process_runs_data(xgb_runs_data)


In [37]:
xgb_averaged_data, xgb_best_hyperparams = process_runs_data(xgb_runs_data)


In [58]:
xgb_averaged_data.to_csv(os.path.join(results_dir, 'xgb.csv'))


In [80]:
xgb_averaged_data

Unnamed: 0,model,dataset,seed,bestmodel_train/F1_weighted,bestmodel_valid/F1_weighted,bestmodel_test/F1_weighted,xgb_eta,xgb_max_depth
0,xgboost,COIL20,1.0,1.0,0.98539,0.986075,0.1,6.0
1,xgboost,Isolet,1.0,1.0,0.902887,0.887528,0.1,9.0
2,xgboost,PBMC,1.0,0.995177,0.898703,0.894172,0.3,3.0
3,xgboost,USPS,1.0,1.0,0.968251,0.973689,0.3,3.0
4,xgboost,finance,1.0,0.861745,0.634798,0.588311,0.5,6.0
5,xgboost,madelon,1.0,1.0,0.80759,0.809609,0.1,6.0
6,xgboost,mice_protein,1.0,1.0,0.978342,0.98149,0.3,3.0
7,xgboost,poly_binarised_decimalised_synth,1.0,0.97231,0.504693,0.501841,0.1,9.0


# RF

In [55]:
rf_sweep_ids = [
    "22eckaxx", # rf baseline
    "gty91unf", # rf baseline
    "4upk4xrk", # RF finance
    'l102r3et', # PBMC   
]

In [56]:
# Fetch and process the data
rf_runs_data = fetch_runs(entity, project, rf_sweep_ids)

In [57]:
rf_df, rf_best_hyperparams = process_runs_data(rf_runs_data)


In [138]:
rf_averaged_data, xgb_best_hyperparams = process_runs_data(rf_runs_data)
# drop "gisette" dataset
rf_averaged_data = rf_averaged_data[(rf_averaged_data['dataset'] !='gisette' )&( rf_averaged_data['dataset'] !='MNIST') ]


In [18]:
rf_df = rf_df[(rf_df['dataset'] !='gisette' )&( rf_df['dataset'] !='MNIST') ]

In [19]:
rf_df.to_csv(os.path.join(results_dir, 'rf.csv'))


In [131]:
rf_averaged_data

Unnamed: 0,model,dataset,seed,bestmodel_train/F1_weighted,bestmodel_valid/F1_weighted,bestmodel_test/F1_weighted,rf_max_depth
0,rf,COIL20,1.0,0.986327,0.970085,0.967561,7.0
1,rf,Isolet,1.0,0.984742,0.921524,0.900926,7.0
2,rf,MNIST,1.0,0.92146,0.910721,0.911655,7.0
3,rf,PBMC,1.0,0.937396,0.869854,0.886592,7.0
4,rf,USPS,1.0,0.957755,0.940427,0.933634,7.0
5,rf,finance,1.0,0.877761,0.600702,0.619456,7.0
6,rf,gisette,1.0,0.97373,0.949999,0.955949,7.0
7,rf,madelon,1.0,0.995769,0.693069,0.671901,7.0
8,rf,mice_protein,1.0,0.999807,0.962906,0.969803,7.0
9,rf,poly_binarised_decimalised_synth,1.0,0.742473,0.506308,0.505766,7.0


# MLP

In [58]:
mlp_sweep_ids = [
    's5m2sqsv', # everything
]


In [59]:
# Fetch and process the data
mlp_runs_data = fetch_runs(entity, project, mlp_sweep_ids)

In [60]:
mlp_df, mlp_best_hyperparams = process_runs_data(mlp_runs_data)


In [40]:
mlp_averaged_data, mlp_best_hyperparams = process_runs_data(mlp_runs_data)


In [59]:
mlp_averaged_data.to_csv(os.path.join(results_dir, 'mlp.csv'))


In [79]:
mlp_averaged_data

Unnamed: 0,model,dataset,seed,bestmodel_train/F1_weighted,bestmodel_valid/F1_weighted,bestmodel_test/F1_weighted,lr,num_hidden
0,MLP_same_capacity,COIL20,1.0,1.0,0.992998,0.988311,0.001,1.0
1,MLP_same_capacity,Isolet,1.0,0.999255,0.944965,0.934809,0.001,2.0
2,MLP_same_capacity,PBMC,1.0,0.999349,0.898915,0.894191,0.001,4.0
3,MLP_same_capacity,USPS,1.0,0.990137,0.969688,0.967768,0.001,1.0
4,MLP_same_capacity,finance,1.0,0.61096,0.570507,0.570214,0.001,2.0
5,MLP_same_capacity,madelon,1.0,0.819199,0.593281,0.571757,0.001,1.0
6,MLP_same_capacity,mice_protein,1.0,0.999479,0.992254,0.983014,0.0001,1.0
7,MLP_same_capacity,poly_binarised_decimalised_synth,1.0,0.527624,0.50368,0.472102,0.0001,2.0


# SEFS

In [61]:
sefs_sweep_ids = [
    'n6zb5lc9', # everything
    '7rkxj7g4', # PBMC
]

In [62]:
# Fetch and process the data
sefs_runs_data = fetch_runs(entity, project, sefs_sweep_ids)
for record in sefs_runs_data:
    if record.get('num_hidden') is None:  # Check if 'num_hidden' is None
        record['num_hidden'] = 4  # Set 'num_hidden' to 4

In [63]:
sefs_df, sefs_best_hyperparams = process_runs_data(sefs_runs_data)


In [33]:
sefs_averaged_data, sefs_best_hyperparams = process_runs_data(sefs_runs_data)


In [34]:
sefs_averaged_data

Unnamed: 0,model,dataset,seed,bestmodel_train/F1_weighted,bestmodel_valid/F1_weighted,bestmodel_test/F1_weighted,lr,num_hidden
0,SEFS,COIL20,1.0,0.979644,0.963678,0.949701,0.003,1.0
1,SEFS,Isolet,1.0,0.976174,0.882264,0.886111,0.003,1.0
2,SEFS,PBMC,1.0,0.963813,0.838093,0.831686,0.003,4.0
3,SEFS,USPS,1.0,0.938292,0.935731,0.92544,0.003,1.0
4,SEFS,finance,1.0,0.615141,0.580251,0.599271,0.003,2.0
5,SEFS,madelon,1.0,0.761723,0.642004,0.652284,0.003,2.0
6,SEFS,mice_protein,1.0,0.904336,0.860212,0.850778,0.003,1.0
7,SEFS,poly_binarised_decimalised_synth,1.0,0.374214,0.362826,0.366728,0.003,1.0


In [35]:
sefs_averaged_data.to_csv(os.path.join(results_dir, 'sefs.csv'))


# CAE

In [64]:
cae_sweep_ids = [
    'h245pc2i', 
    'jqulrkke', # finance
    'xh18ha0v', # PBMC
]

In [65]:
# Fetch and process the data
cae_runs_data = fetch_runs(entity, project, cae_sweep_ids)

In [66]:
cae_df, cae_best_hyperparams = process_runs_data(cae_runs_data)


In [49]:
cae_averaged_data, sefs_best_hyperparams = process_runs_data(cae_runs_data)


In [61]:
cae_averaged_data.to_csv(os.path.join(results_dir, 'cae.csv'))


In [77]:
cae_averaged_data

Unnamed: 0,model,dataset,seed,bestmodel_train/F1_weighted,bestmodel_valid/F1_weighted,bestmodel_test/F1_weighted,CAE_neurons_ratio
0,cae,COIL20,1.0,1.0,0.983572,0.970427,1.0
1,cae,Isolet,1.0,0.962531,0.828027,0.801414,0.8
2,cae,PBMC,1.0,0.882927,0.667353,0.680679,0.1
3,cae,USPS,1.0,0.930945,0.904831,0.904749,1.0
4,cae,finance,1.0,0.6566,0.611487,0.592564,0.7
5,cae,madelon,1.0,0.873945,0.727068,0.701883,1.0
6,cae,mice_protein,1.0,0.962683,0.894743,0.853472,0.6
7,cae,poly_binarised_decimalised_synth,1.0,0.445899,0.419617,0.434222,0.8


# Supervised CAE

In [67]:
scae_sweep_ids = [
    '8yghctpp', 
    'xclyvl0c', # finance
    'vle39arl', # PBMC
]

In [68]:
# Fetch and process the data
scae_runs_data = fetch_runs(entity, project, scae_sweep_ids)

In [69]:
scae_df, scae_best_hyperparams = process_runs_data(scae_runs_data)


In [86]:
scae_averaged_data, sefs_best_hyperparams = process_runs_data(scae_runs_data)


In [87]:
scae_averaged_data.to_csv(os.path.join(results_dir, 'scae.csv'))


In [88]:
scae_averaged_data

Unnamed: 0,model,dataset,seed,bestmodel_train/F1_weighted,bestmodel_valid/F1_weighted,bestmodel_test/F1_weighted,CAE_neurons_ratio
0,supervised_cae,COIL20,1.0,0.054281,0.058931,0.064637,0.1
1,supervised_cae,Isolet,1.0,0.040158,0.03976,0.036833,0.2
2,supervised_cae,PBMC,0.0,0.830378,0.832341,0.853663,0.1
3,supervised_cae,USPS,1.0,0.207287,0.21248,0.209018,0.1
4,supervised_cae,finance,1.0,0.537526,0.547139,0.544434,0.6
5,supervised_cae,madelon,1.0,0.610881,0.624305,0.618427,1.0
6,supervised_cae,mice_protein,1.0,0.191261,0.167532,0.172394,1.0
7,supervised_cae,poly_binarised_decimalised_synth,1.0,0.368529,0.368871,0.369297,0.3


# Full table

In [5]:
fact_df = pd.read_csv(os.path.join(results_dir, 'fact.csv'), index_col=0)
lasso_df = pd.read_csv(os.path.join(results_dir, 'lasso.csv'), index_col=0)
xgb_df = pd.read_csv(os.path.join(results_dir, 'xgb.csv'), index_col=0)
rf_df = pd.read_csv(os.path.join(results_dir, 'rf.csv'), index_col=0)
mlp_df = pd.read_csv(os.path.join(results_dir, 'mlp.csv'), index_col=0)
sefs_df = pd.read_csv(os.path.join(results_dir, 'sefs.csv'), index_col=0)
cae_df = pd.read_csv(os.path.join(results_dir, 'cae.csv'), index_col=0)
scae_df = pd.read_csv(os.path.join(results_dir, 'scae.csv'), index_col=0)

In [70]:
averaged_dfs = [
    fact_df,
    lasso_df,
    xgb_df,
    rf_df,
    mlp_df,
    sefs_df,
    cae_df,
    scae_df
]

results_df = pd.concat(averaged_dfs)

In [80]:
df = results_df[results_df['dataset']!= 'gisette']
df = df[df['dataset']!= 'MNIST']
df = df[df['dataset']!= 'poly_binarised_decimalised_synth']



In [83]:
train_table, valid_table, test_table = summarize_performance(df)


In [84]:
train_table

dataset,COIL20,Isolet,PBMC,USPS,finance,madelon,mice_protein
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
F-Act,1.0 ± 0.0,0.9996 ± 0.0006,1.0 ± nan,0.9903 ± 0.0021,0.6586 ± 0.039,0.8693 ± 0.0176,0.9995 ± 0.0009
F-Act (full TTI),1.0 ± 0.0,0.9996 ± 0.0006,1.0 ± nan,0.9903 ± 0.0021,0.6586 ± 0.039,0.8693 ± 0.0176,0.9995 ± 0.0009
F-Act (optimal TTI),1.0 ± 0.0,0.9996 ± 0.0006,1.0 ± nan,0.9903 ± 0.0021,0.6586 ± 0.039,0.8693 ± 0.0176,0.9995 ± 0.0009
MLP_same_capacity,1.0 ± 0.0,0.9993 ± 0.0006,0.9993 ± 0.0011,0.9901 ± 0.0042,0.611 ± 0.0452,0.8192 ± 0.0063,0.9995 ± 0.0009
SEFS,0.9796 ± 0.0052,0.9762 ± 0.0034,0.9638 ± 0.0146,0.9383 ± 0.0044,0.6151 ± 0.0165,0.7617 ± 0.0209,0.9043 ± 0.0166
cae,1.0 ± 0.0,0.9625 ± 0.019,0.8829 ± 0.1135,0.9309 ± 0.0136,0.6566 ± 0.0109,0.8739 ± 0.0347,0.9627 ± 0.0321
lasso,1.0 ± 0.0,1.0 ± 0.0,1.0 ± 0.0,0.995 ± 0.0,0.6704 ± 0.0004,0.8038 ± 0.0,0.9882 ± 0.011
rf,0.9863 ± 0.0011,0.9847 ± 0.0015,0.9374 ± 0.0025,0.9578 ± 0.0007,0.8778 ± 0.0033,0.9958 ± 0.0009,0.9998 ± 0.0005
supervised_cae,0.0543 ± 0.0369,0.0402 ± 0.007,0.8304 ± nan,0.2073 ± 0.023,0.5375 ± 0.0451,0.6109 ± 0.0053,0.1913 ± 0.0393
xgboost,1.0 ± 0.0,1.0 ± 0.0,0.9952 ± 0.0,1.0 ± 0.0,0.8617 ± 0.0,1.0 ± 0.0,1.0 ± 0.0


In [85]:
valid_table

dataset,COIL20,Isolet,PBMC,USPS,finance,madelon,mice_protein
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
F-Act,0.9929 ± 0.0071,0.9264 ± 0.0053,0.8461 ± nan,0.9681 ± 0.0032,0.5992 ± 0.0054,0.7287 ± 0.0253,0.9815 ± 0.0047
F-Act (full TTI),0.9918 ± 0.0041,0.943 ± 0.0158,0.8548 ± nan,0.9625 ± 0.0008,0.5754 ± 0.0042,0.7223 ± 0.0577,0.9861 ± 0.0092
F-Act (optimal TTI),0.9953 ± 0.0041,0.943 ± 0.0158,0.8793 ± nan,0.9666 ± 0.0043,0.6027 ± 0.0181,0.7415 ± 0.0375,0.9908 ± 0.0122
MLP_same_capacity,0.993 ± 0.0001,0.945 ± 0.0087,0.8989 ± 0.0048,0.9697 ± 0.004,0.5705 ± 0.0326,0.5933 ± 0.0159,0.9923 ± 0.0054
SEFS,0.9637 ± 0.0071,0.8823 ± 0.0168,0.8381 ± 0.0111,0.9357 ± 0.0025,0.5803 ± 0.0132,0.642 ± 0.0109,0.8602 ± 0.0135
cae,0.9836 ± 0.0101,0.828 ± 0.0176,0.6674 ± 0.0574,0.9048 ± 0.0085,0.6115 ± 0.0067,0.7271 ± 0.0126,0.8947 ± 0.0606
lasso,0.9826 ± 0.0,0.9479 ± 0.0,0.8736 ± 0.0,0.9344 ± 0.0003,0.595 ± 0.0,0.5481 ± 0.0,0.9645 ± 0.0163
rf,0.9701 ± 0.0021,0.9215 ± 0.0026,0.8699 ± 0.0014,0.9404 ± 0.0005,0.6007 ± 0.0061,0.6931 ± 0.0104,0.9629 ± 0.0172
supervised_cae,0.0589 ± 0.0354,0.0398 ± 0.0139,0.8323 ± nan,0.2125 ± 0.0242,0.5471 ± 0.0282,0.6243 ± 0.0089,0.1675 ± 0.0578
xgboost,0.9854 ± 0.0,0.9029 ± 0.0,0.8987 ± 0.0,0.9683 ± 0.0,0.6348 ± 0.0,0.8076 ± 0.0,0.9783 ± 0.0134


In [86]:
test_table 

dataset,COIL20,Isolet,PBMC,USPS,finance,madelon,mice_protein
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
F-Act,0.986 ± 0.0061,0.9197 ± 0.0056,0.8317 ± nan,0.9683 ± 0.0011,0.6029 ± 0.0162,0.7225 ± 0.0214,0.9877 ± 0.0026
F-Act (full TTI),0.9826 ± 0.006,0.9286 ± 0.0118,0.9131 ± nan,0.9603 ± 0.002,0.5995 ± 0.0134,0.7022 ± 0.0594,0.9846 ± 0.0116
F-Act (optimal TTI),0.9884 ± 0.0019,0.9286 ± 0.0118,0.8987 ± nan,0.9595 ± 0.0031,0.5981 ± 0.019,0.729 ± 0.0233,0.983 ± 0.0107
MLP_same_capacity,0.9883 ± 0.0053,0.9348 ± 0.0129,0.8942 ± 0.0048,0.9678 ± 0.0015,0.5702 ± 0.0396,0.5718 ± 0.0089,0.983 ± 0.0027
SEFS,0.9497 ± 0.014,0.8861 ± 0.0208,0.8317 ± 0.0127,0.9254 ± 0.0075,0.5993 ± 0.0046,0.6523 ± 0.0169,0.8508 ± 0.0459
cae,0.9704 ± 0.0087,0.8014 ± 0.0128,0.6807 ± 0.0534,0.9047 ± 0.0083,0.5926 ± 0.012,0.7019 ± 0.0183,0.8535 ± 0.0537
lasso,0.9824 ± 0.0,0.9458 ± 0.0001,0.8925 ± 0.0014,0.9336 ± 0.0003,0.5978 ± 0.0,0.5153 ± 0.0,0.9524 ± 0.0229
rf,0.9676 ± 0.0018,0.9009 ± 0.0045,0.8866 ± 0.0037,0.9336 ± 0.0015,0.6195 ± 0.0047,0.6719 ± 0.0026,0.9698 ± 0.0182
supervised_cae,0.0646 ± 0.0445,0.0368 ± 0.0079,0.8537 ± nan,0.209 ± 0.0256,0.5444 ± 0.018,0.6184 ± 0.003,0.1724 ± 0.0678
xgboost,0.9861 ± 0.0,0.8875 ± 0.0,0.8942 ± 0.0,0.9737 ± 0.0,0.5883 ± 0.0,0.8096 ± 0.0,0.9815 ± 0.0081


In [7]:
train_table, valid_table, test_table = get_tables(results_df)


In [8]:
train_table

dataset,COIL20,Isolet,PBMC,USPS,finance,madelon,mice_protein
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
F-Act,1.0,0.999628,1.0,0.990255,0.658565,0.869327,0.999479
F-Act (full TTI),1.0,0.999628,1.0,0.990255,0.658565,0.869327,0.999479
F-Act (optimal TTI),1.0,0.999628,1.0,0.990255,0.658565,0.869327,0.999479
MLP_same_capacity,1.0,0.999255,0.999349,0.990137,0.61096,0.819199,0.999479
SEFS,0.979644,0.976174,0.963813,0.938292,0.615141,0.761723,0.904336
cae,1.0,0.962531,0.882927,0.930945,0.6566,0.873945,0.962683
lasso,1.0,1.0,1.0,0.994983,0.670358,0.80383,0.988156
rf,0.986327,0.984742,0.937396,0.957755,0.877761,0.995769,0.999807
supervised_cae,0.054281,0.040158,0.830378,0.207287,0.537526,0.610881,0.191261
xgboost,1.0,1.0,0.995177,1.0,0.861745,1.0,1.0


In [9]:
valid_table

dataset,COIL20,Isolet,PBMC,USPS,finance,madelon,mice_protein
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
F-Act,0.99291,0.926447,0.846068,0.968084,0.599168,0.728721,0.981472
F-Act (full TTI),0.99181,0.94299,0.854802,0.962459,0.57543,0.722341,0.986119
F-Act (optimal TTI),0.995282,0.94299,0.879263,0.966608,0.602687,0.741475,0.990787
MLP_same_capacity,0.992998,0.944965,0.898915,0.969688,0.570507,0.593281,0.992254
SEFS,0.963678,0.882264,0.838093,0.935731,0.580251,0.642004,0.860212
cae,0.983572,0.828027,0.667353,0.904831,0.611487,0.727068,0.894743
lasso,0.982551,0.947942,0.873614,0.934424,0.59504,0.548075,0.964513
rf,0.970085,0.921524,0.869854,0.940427,0.600702,0.693069,0.962906
supervised_cae,0.058931,0.03976,0.832341,0.21248,0.547139,0.624305,0.167532
xgboost,0.98539,0.902887,0.898703,0.968251,0.634798,0.80759,0.978342


In [10]:
test_table

dataset,COIL20,Isolet,PBMC,USPS,finance,madelon,mice_protein
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
F-Act,0.986039,0.919675,0.831719,0.968292,0.602923,0.72248,0.987732
F-Act (full TTI),0.982608,0.928581,0.913124,0.960261,0.599476,0.702223,0.984608
F-Act (optimal TTI),0.988419,0.928581,0.898703,0.959526,0.598131,0.729048,0.982995
MLP_same_capacity,0.988311,0.934809,0.894191,0.967768,0.570214,0.571757,0.983014
SEFS,0.949701,0.886111,0.831686,0.92544,0.599271,0.652284,0.850778
cae,0.970427,0.801414,0.680679,0.904749,0.592564,0.701883,0.853472
lasso,0.982413,0.945832,0.892459,0.933578,0.597777,0.51532,0.952401
rf,0.967561,0.900926,0.886592,0.933634,0.619456,0.671901,0.969803
supervised_cae,0.064637,0.036833,0.853663,0.209018,0.544434,0.618427,0.172394
xgboost,0.986075,0.887528,0.894172,0.973689,0.588311,0.809609,0.98149


In [87]:
def to_latex(df):
    latex_table = df.to_latex()
    modified_string = latex_table.replace(r"\th#606060", r"\centering")
    modified_string = modified_string.replace(r"MLP_same_capacity", r"MLP")
    # \n\\begin{tabular}
    modified_string = modified_string.replace("\n\\begin{tabular}{lrrrrrrr}", "\n\\begin{tabular}{lrrrrrrr}\n\\hline")
    modified_string = modified_string.replace("\\\\\nmodel &  &  &  &  &  &  &  \\\\\nF-Act", "\\\\\nmodel &  &  &  &  &  &  &  \\\\\n\\hline\nF-Act")
    modified_string = modified_string.replace("\n\\end{tabular}", "\n\\hline\n\\end{tabular}")


    import re

    def replace_color_and_bold(text):
        # Define a regex pattern to match the specific format
        pattern = r'\\colorgreen \\font-weightbold (\d+\.\d+)'

        # Define a function to replace each match
        def replace_match(match):
            number = match.group(1)  # This is the number captured by (\d+\.\d+)
            return r'\textcolor{green}{\textbf{' + number + '}}'

        # Use re.sub to replace all occurrences in the text
        modified_text = re.sub(pattern, replace_match, text)
        return modified_text

    # Example usage
    modified_string = replace_color_and_bold(modified_string)

    print(modified_string)

to_latex(test_table)

\begin{tabular}{llllllll}
\toprule
dataset &           COIL20 &           Isolet &             PBMC &             USPS &          finance &          madelon &     mice\_protein \\
model               &                  &                  &                  &                  &                  &                  &                  \\
\midrule
F-Act               &   0.986 ± 0.0061 &  0.9197 ± 0.0056 &     0.8317 ± nan &  0.9683 ± 0.0011 &  0.6029 ± 0.0162 &  0.7225 ± 0.0214 &  0.9877 ± 0.0026 \\
F-Act (full TTI)    &   0.9826 ± 0.006 &  0.9286 ± 0.0118 &     0.9131 ± nan &   0.9603 ± 0.002 &  0.5995 ± 0.0134 &  0.7022 ± 0.0594 &  0.9846 ± 0.0116 \\
F-Act (optimal TTI) &  0.9884 ± 0.0019 &  0.9286 ± 0.0118 &     0.8987 ± nan &  0.9595 ± 0.0031 &   0.5981 ± 0.019 &   0.729 ± 0.0233 &   0.983 ± 0.0107 \\
MLP\_same\_capacity   &  0.9883 ± 0.0053 &  0.9348 ± 0.0129 &  0.8942 ± 0.0048 &  0.9678 ± 0.0015 &  0.5702 ± 0.0396 &  0.5718 ± 0.0089 &   0.983 ± 0.0027 \\
SEFS                &   0.949

  latex_table = df.to_latex()
