In [20]:
# Import required libraries
import os
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import warnings

warnings.filterwarnings('ignore')
# Set display options for pandas
pd.set_option('display.max_columns', None)

def results_to_dataframe(results,model):
    rows = []
    for result in results:
        # Convert params tuple to a dictionary if needed
        #if isinstance(result["params"], tuple):
        #    param_keys = ["param" + str(i) for i in range(len(result["params"]))]
        #    param_dict = dict(zip(param_keys, result["params"]))
        #else:
        #    param_dict = result["params"]
        # Flatten the results
        row = {
            "model": model,
          
            "custom": result["custom"],
            "dunn": result['dunn'],
            "fold": result["fold_idx"]
            #"model": result["model"],
            #"fusion": result["fusion"],
            #"clf": result["clf"],
            #**param_dict,  # Add classifier hyperparameters
            #**result["results"],  # Add result metrics
        }
        rows.append(row)
    return pd.DataFrame(rows)

# Function to load all .pkl files from a directory
def load_results(directory):
    model_names = ["cnn_vae", "lstm_vae", "rnn_vae", "transformer_vae","cnn", "lstm", "rnn", "transformer"] 
    results_df = pd.DataFrame()
    for root, _, files in os.walk(directory):  # Traverse through folders and files
        for filename in files:
            if filename.endswith(".pkl"):
                model = next((name for name in model_names if name in filename.lower()), "unknown") 
                file_path = os.path.join(root, filename)
                with open(file_path, "rb") as f:
                    data = pickle.load(f)
                    df =  results_to_dataframe(data,model)                        
                    results_df = pd.concat((results_df,df))
    return results_df


# Plot results (e.g., accuracy by model and fusion type)
def plot_results(results_df, metric, title):
    plt.figure(figsize=(10, 6))
    sns.barplot(data=results_df, x="model", y=metric, hue="fusion")
    plt.title(title)
    plt.ylabel(metric)
    plt.xlabel("Model")
    plt.legend(title="Fusion Type")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

def plot_binary_results(results_df, metric, title):
    plt.figure(figsize=(10, 6))
    sns.barplot(data=results_df, x="bdi_task", y=metric, hue="model")
    plt.title(title)
    plt.ylabel(metric)
    plt.xlabel("BDI Task")
    plt.legend(title="Model")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# Analyze best hyperparameters
def analyze_hyperparameters(results_df, metric):
    best_results = results_df.loc[results_df.groupby(["model", "fusion", "clf"])[metric].idxmax()]
    print(f"Best results for {metric}:")
    display(best_results)

In [43]:
# Define paths to results directories
results_clf_dir = "/Users/crisgallego/Desktop/SMART_deepRLearning/results_all_experiments/clustering/multimodal/embeddings_32"

# Load classifier results
clf_results_df = load_results(results_clf_dir)

display(clf_results_df)

# Visualize metrics
#plot_results(clf_results_df, metric="precision_bdi", title="Accuracy by Model and Fusion Type")
# Analyze the best hyperparameters based on accuracy
#analyze_hyperparameters(clf_results_df, metric="accuracy_bdi")


Unnamed: 0,model,custom,dunn,fold
0,lstm_vae,0.000000,0.255868,0
1,lstm_vae,0.000000,0.301163,1
2,lstm_vae,0.000000,0.279761,2
3,lstm_vae,0.000000,0.252443,3
4,lstm_vae,0.000000,0.240583,4
...,...,...,...,...
5,cnn,0.123404,0.165113,5
6,cnn,0.000000,0.062722,6
7,cnn,0.000000,0.122239,7
8,cnn,0.155603,0.105960,8


In [44]:
summary_df = clf_results_df.groupby('model').agg(
    custom_mean=('custom', 'mean'),
    custom_std=('custom', 'std'),
    dunn_mean=('dunn', 'mean'),
    dunn_std=('dunn', 'std')
).reset_index()

In [45]:
summary_df

Unnamed: 0,model,custom_mean,custom_std,dunn_mean,dunn_std
0,cnn,0.101254,0.092667,0.132826,0.034878
1,cnn_vae,0.049717,0.081572,0.372235,0.094425
2,lstm,0.103438,0.058869,0.147526,0.063248
3,lstm_vae,0.048712,0.15404,0.220448,0.057539
4,rnn_vae,0.008445,0.026705,0.260568,0.096927
5,transformer,0.03943,0.083496,0.171519,0.061431
6,transformer_vae,0.047006,0.112227,0.333105,0.04734


## embeddings 64: cnn_vae

In [48]:
file_path = results_clf_dir + '/clusters_lstm_early.pkl'
with open(file_path, "rb") as f:
    data = pickle.load(f)

In [49]:
data

[{'fold_idx': 0,
  'silhouette': np.float64(0.09367101863214732),
  'dunn': np.float64(0.15293969429149226),
  'custom': np.float64(0.10541711310319699),
  'algorithm': 'GaussianMixture',
  'params': {'covariance_type': 'full',
   'init_params': 'kmeans',
   'max_iter': 100,
   'n_components': 5,
   'random_state': 42,
   'tol': 0.001}},
 {'fold_idx': 1,
  'silhouette': np.float64(0.09687153194172832),
  'dunn': np.float64(0.15565179373883234),
  'custom': np.float64(0.21628246804031356),
  'algorithm': 'GaussianMixture',
  'params': {'covariance_type': 'full',
   'init_params': 'kmeans',
   'max_iter': 100,
   'n_components': 5,
   'random_state': 42,
   'tol': 0.001}},
 {'fold_idx': 2,
  'silhouette': np.float64(0.0693401964484843),
  'dunn': np.float64(0.14649864186237624),
  'custom': np.float64(0.11509310984996082),
  'algorithm': 'GaussianMixture',
  'params': {'covariance_type': 'full',
   'init_params': 'kmeans',
   'max_iter': 100,
   'n_components': 5,
   'random_state': 42,
