In [23]:
import numpy as np
import json
import glob
import pandas as pd
import matplotlib as plt

In [62]:
def get_results(model_name, context=""):
    if context:
        # To get results from individual models. 
        iter_files =glob.glob("results/{}/{}/results_checkpoint_{}_{}_iter_*.json".format(model_name, context, model_name, context))
    else:
        # To get results from combined models.  
        iter_files = glob.glob("results/{}/results_checkpoint_{}__iter_*.json".format(model_name, model_name))
    print(len(iter_files), model_name)
    # Check for all 3 iterations of models with different weight intialisations
    iters_results_df = []
    for iter_file in iter_files:
        with open(iter_file) as iter_f:
            iter_file_json = json.load(iter_f)
            # Get names of all contexts. 
            context_keys = sorted(list(dict(iter_file_json).keys()))
            results_dict = dict()
            # Get results from each context. 
            for key in context_keys:
                results_dict[key] = iter_file_json[key]["AUC"] 
            # Put results from this column into a dataframe 
            results_df = pd.DataFrame.from_dict(results_dict, orient='index', columns=[model_name])
            # Add this column to a list for every iteration
            iters_results_df.append(results_df)
    #Get the mean of all the mean of all the iterations.
    return pd.concat(iters_results_df).groupby(level=0).mean()


In [61]:
model_names = [
               'AE_sound_all', 
               'AE_sound_FiLM_one_hot',
               'AE_sound_FiLM_embed_32',
               'AE_sound_FiLM_embed_64',
               'AE_sound_FiLM_embed_128',
               'AE_sound_FiLM_embed_256'
            ]
# Go through the combined model first.
results_list = []
for model_name in model_names:
    results_list.append(get_results(model_name))
single_model_list = []
# Go through the individual models section. 
for i in range(16):
    single_model_list.append(get_results("AE_sound", context=str(i)))
single_model_results = pd.concat(single_model_list)
results_df = pd.concat([single_model_results]+results_list, axis=1)
results_df


3 AE_sound_all
3 AE_sound_FiLM_one_hot
3 AE_sound_FiLM_embed_32
3 AE_sound_FiLM_embed_64
3 AE_sound_FiLM_embed_128
3 AE_sound_FiLM_embed_256
3 AE_sound
3 AE_sound
3 AE_sound
3 AE_sound
3 AE_sound
3 AE_sound
3 AE_sound
3 AE_sound
3 AE_sound
3 AE_sound
3 AE_sound
3 AE_sound
3 AE_sound
3 AE_sound
3 AE_sound
3 AE_sound


Unnamed: 0,AE_sound,AE_sound_all,AE_sound_FiLM_one_hot,AE_sound_FiLM_embed_32,AE_sound_FiLM_embed_64,AE_sound_FiLM_embed_128,AE_sound_FiLM_embed_256
fan_0_0dB,0.616174,0.589729,0.615468,0.622427,0.614699,0.633047,0.607145
fan_0_6dB,0.816429,0.700737,0.773916,0.786953,0.786967,0.779111,0.720252
fan_0_min6dB,0.556089,0.551353,0.551081,0.549368,0.543949,0.564215,0.546257
fan_1_0dB,0.880383,0.741764,0.867069,0.867133,0.872565,0.891525,0.811607
fan_1_6dB,0.976694,0.939875,0.957281,0.946734,0.944587,0.947753,0.85991
fan_1_min6dB,0.693995,0.558367,0.710676,0.727273,0.733219,0.722452,0.679482
fan_2_0dB,0.759413,0.711512,0.783415,0.803293,0.806635,0.785515,0.719748
fan_2_6dB,0.896689,0.895426,0.939317,0.934222,0.931857,0.916788,0.744148
fan_2_min6dB,0.589083,0.536454,0.567836,0.583922,0.588472,0.570989,0.559525
fan_3_0dB,0.974591,0.796405,0.957776,0.989597,0.984858,0.989933,0.943949


In [46]:
with open("machine_ids_map", "r") as f:
    machine_id_map = json.load(f)
inv_machine_id_map = {v: k for k, v in machine_id_map.items()}
# Get the machine names etc. in a nicer format. 
results_df[["Machine Name", "id", "SNR"]] = pd.DataFrame([x.split('_') for x in results_df.index.tolist()], index=results_df.index)
results_df["ID"] = pd.to_numeric(results_df["id"])
results_df["ID"] = results_df["ID"].map(inv_machine_id_map)
results_df["ID"] = pd.DataFrame([x.split('_')[-1] for x in results_df["ID"].tolist()], index=results_df.index)
results_df


Unnamed: 0,AE_sound,AE_sound_all,AE_sound_FiLM_one_hot,AE_sound_FiLM_embed_32,AE_sound_FiLM_embed_64,AE_sound_FiLM_embed_128,AE_sound_FiLM_embed_256,Machine Name,id,SNR,ID
fan_0_0dB,0.616174,0.589729,0.615468,0.622427,0.614699,0.633047,0.607145,fan,0,0dB,0
fan_0_6dB,0.816429,0.700737,0.773916,0.786953,0.786967,0.779111,0.720252,fan,0,6dB,0
fan_0_min6dB,0.556089,0.551353,0.551081,0.549368,0.543949,0.564215,0.546257,fan,0,min6dB,0
fan_1_0dB,0.880383,0.741764,0.867069,0.867133,0.872565,0.891525,0.811607,fan,1,0dB,2
fan_1_6dB,0.976694,0.939875,0.957281,0.946734,0.944587,0.947753,0.85991,fan,1,6dB,2
fan_1_min6dB,0.693995,0.558367,0.710676,0.727273,0.733219,0.722452,0.679482,fan,1,min6dB,2
fan_2_0dB,0.759413,0.711512,0.783415,0.803293,0.806635,0.785515,0.719748,fan,2,0dB,4
fan_2_6dB,0.896689,0.895426,0.939317,0.934222,0.931857,0.916788,0.744148,fan,2,6dB,4
fan_2_min6dB,0.589083,0.536454,0.567836,0.583922,0.588472,0.570989,0.559525,fan,2,min6dB,4
fan_3_0dB,0.974591,0.796405,0.957776,0.989597,0.984858,0.989933,0.943949,fan,3,0dB,6


In [47]:
rank_results_df = results_df[
          ["AE_sound_all", 
           "AE_sound_FiLM_one_hot", 
           "AE_sound_FiLM_embed_32",
           "AE_sound_FiLM_embed_64",
           "AE_sound_FiLM_embed_128",
           "AE_sound_FiLM_embed_256"
          ]
        ].rank(1, ascending=False, method='first')
# Rank all combined models. 
mean_rank_row = rank_results_df.mean().to_frame().T
mean_rank_row = mean_rank_row.rename(index={0: 'average rank'})
results_and_ranks_df = pd.concat([results_df, mean_rank_row])
results_and_ranks_df

Unnamed: 0,AE_sound,AE_sound_all,AE_sound_FiLM_one_hot,AE_sound_FiLM_embed_32,AE_sound_FiLM_embed_64,AE_sound_FiLM_embed_128,AE_sound_FiLM_embed_256,Machine Name,id,SNR,ID
fan_0_0dB,0.616174,0.589729,0.615468,0.622427,0.614699,0.633047,0.607145,fan,0.0,0dB,0.0
fan_0_6dB,0.816429,0.700737,0.773916,0.786953,0.786967,0.779111,0.720252,fan,0.0,6dB,0.0
fan_0_min6dB,0.556089,0.551353,0.551081,0.549368,0.543949,0.564215,0.546257,fan,0.0,min6dB,0.0
fan_1_0dB,0.880383,0.741764,0.867069,0.867133,0.872565,0.891525,0.811607,fan,1.0,0dB,2.0
fan_1_6dB,0.976694,0.939875,0.957281,0.946734,0.944587,0.947753,0.85991,fan,1.0,6dB,2.0
fan_1_min6dB,0.693995,0.558367,0.710676,0.727273,0.733219,0.722452,0.679482,fan,1.0,min6dB,2.0
fan_2_0dB,0.759413,0.711512,0.783415,0.803293,0.806635,0.785515,0.719748,fan,2.0,0dB,4.0
fan_2_6dB,0.896689,0.895426,0.939317,0.934222,0.931857,0.916788,0.744148,fan,2.0,6dB,4.0
fan_2_min6dB,0.589083,0.536454,0.567836,0.583922,0.588472,0.570989,0.559525,fan,2.0,min6dB,4.0
fan_3_0dB,0.974591,0.796405,0.957776,0.989597,0.984858,0.989933,0.943949,fan,3.0,0dB,6.0


In [50]:
# Made final table, making column names more readable. 
table_cols = [
           "Machine Name",
           "ID",
           "SNR",
           "AE_sound",
           "AE_sound_all", 
           "AE_sound_FiLM_one_hot", 
           "AE_sound_FiLM_embed_32",
           "AE_sound_FiLM_embed_64",
           "AE_sound_FiLM_embed_128",
           "AE_sound_FiLM_embed_256"
          ]

results_table = results_and_ranks_df[table_cols]
formatted_col_map= {
           "Machine Name": "Machine Name",
           "ID": "Machine ID",
           "SNR": "SNR",
           "AE_sound": "AE separate (reference)",
           "AE_sound_all": "AE no cond.", 
           "AE_sound_FiLM_one_hot"  :"AE FiLM one hot"  , 
           "AE_sound_FiLM_embed_32" :"AE FiLM 32 embed" ,
           "AE_sound_FiLM_embed_64" :"AE FiLM 64 embed" ,
           "AE_sound_FiLM_embed_128":"AE FiLM 128 embed",
           "AE_sound_FiLM_embed_256":"AE FiLM 256 embed"
}
formatted_results_table = results_table.rename(columns=formatted_col_map, errors="raise")
formatted_results_table

Unnamed: 0,Machine Name,Machine ID,SNR,AE separate (reference),AE no cond.,AE FiLM one hot,AE FiLM 32 embed,AE FiLM 64 embed,AE FiLM 128 embed,AE FiLM 256 embed
fan_0_0dB,fan,0.0,0dB,0.616174,0.589729,0.615468,0.622427,0.614699,0.633047,0.607145
fan_0_6dB,fan,0.0,6dB,0.816429,0.700737,0.773916,0.786953,0.786967,0.779111,0.720252
fan_0_min6dB,fan,0.0,min6dB,0.556089,0.551353,0.551081,0.549368,0.543949,0.564215,0.546257
fan_1_0dB,fan,2.0,0dB,0.880383,0.741764,0.867069,0.867133,0.872565,0.891525,0.811607
fan_1_6dB,fan,2.0,6dB,0.976694,0.939875,0.957281,0.946734,0.944587,0.947753,0.85991
fan_1_min6dB,fan,2.0,min6dB,0.693995,0.558367,0.710676,0.727273,0.733219,0.722452,0.679482
fan_2_0dB,fan,4.0,0dB,0.759413,0.711512,0.783415,0.803293,0.806635,0.785515,0.719748
fan_2_6dB,fan,4.0,6dB,0.896689,0.895426,0.939317,0.934222,0.931857,0.916788,0.744148
fan_2_min6dB,fan,4.0,min6dB,0.589083,0.536454,0.567836,0.583922,0.588472,0.570989,0.559525
fan_3_0dB,fan,6.0,0dB,0.974591,0.796405,0.957776,0.989597,0.984858,0.989933,0.943949


In [51]:
print(formatted_results_table.to_latex(index=False, columns=formatted_results_table.columns, float_format='%.3f'))

\begin{tabular}{lllrrrrrrr}
\toprule
Machine Name & Machine ID &     SNR &  AE separate (reference) &  AE no cond. &  AE FiLM one hot &  AE FiLM 32 embed &  AE FiLM 64 embed &  AE FiLM 128 embed &  AE FiLM 256 embed \\
\midrule
         fan &         00 &     0dB &                    0.616 &        0.590 &            0.615 &             0.622 &             0.615 &              0.633 &              0.607 \\
         fan &         00 &     6dB &                    0.816 &        0.701 &            0.774 &             0.787 &             0.787 &              0.779 &              0.720 \\
         fan &         00 &  min6dB &                    0.556 &        0.551 &            0.551 &             0.549 &             0.544 &              0.564 &              0.546 \\
         fan &         02 &     0dB &                    0.880 &        0.742 &            0.867 &             0.867 &             0.873 &              0.892 &              0.812 \\
         fan &         02 &     6dB &       

In [54]:
# Get models with best validation accuracy for discriminative model. 
best_val_accs = []
layer_sizes = []
for mlp_i in ["32", "64", "128", "256"]:
    with open("logs/mlp_sound_{}/MLP_sound_{}.json".format(mlp_i, mlp_i), "r") as mlp_json:
        mlp_i_log = json.load(mlp_json)
        val_accs = mlp_i_log["val_accs"]
        best_val_epoch = mlp_i_log["best_val_epoch"]
        best_val_accs.append(np.max(val_accs)*100)
        layer_sizes.append(mlp_i)
        assert np.argmax(val_accs) == mlp_i_log["best_val_epoch"]-1
supervised_results = pd.DataFrame.from_dict(
        {'Layer size': layer_sizes, 
         '% Validation Accuracy': best_val_accs
        }
    )
supervised_results 
        

Unnamed: 0,Layer size,% Validation Accuracy
0,32,82.892031
1,64,83.054175
2,128,83.018848
3,256,82.450019


In [55]:
print(supervised_results.to_latex(index=False, float_format='%.2f'))        

\begin{tabular}{lr}
\toprule
Layer size &  \% Validation Accuracy \\
\midrule
        32 &                  82.89 \\
        64 &                  83.05 \\
       128 &                  83.02 \\
       256 &                  82.45 \\
\bottomrule
\end{tabular}

