In [29]:
# Import required libraries
import os
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import warnings
from sklearn.metrics import (
    classification_report,
    accuracy_score,
    balanced_accuracy_score,
    multilabel_confusion_matrix,
    precision_score,
    recall_score,
    f1_score
)
warnings.filterwarnings('ignore')
# Set display options for pandas
pd.set_option('display.max_columns', None)

# Function to load all .pkl files from a directory
def load_results(directory):
    model_names = ["cnn_vae", "lstm_vae", "rnn_vae", "transformer_vae","cnn", "lstm", "rnn", "transformer"] 
    results_df = pd.DataFrame()
    for root, _, files in os.walk(directory):  # Traverse through folders and files
        for filename in files:
            if filename.endswith(".pkl"):
                model = next((name for name in model_names if name in filename.lower()), "unknown") 
                file_path = os.path.join(root, filename)
                with open(file_path, "rb") as f:
                    data = pickle.load(f)
                    try:
                        df =  results_to_dataframe(data,model)
                    except:
                        df = foldresult_to_dataframe(data,model)
                        
                    results_df = pd.concat((results_df,df))
    return results_df

# Convert results to DataFrame for easier analysis
def results_to_dataframe(results,model):
    rows = []
    for result in results:
        # Convert params tuple to a dictionary if needed
        #if isinstance(result["params"], tuple):
        #    param_keys = ["param" + str(i) for i in range(len(result["params"]))]
        #    param_dict = dict(zip(param_keys, result["params"]))
        #else:
        #    param_dict = result["params"]
        # Flatten the results
        row = {
            "model": model,
            "fold": result["fold"],
            "y_test": result["y_test"],
            "y_pred": result["y_pred"]
            #"model": result["model"],
            #"fusion": result["fusion"],
            #"clf": result["clf"],
            #**param_dict,  # Add classifier hyperparameters
            #**result["results"],  # Add result metrics
        }
        rows.append(row)
    return pd.DataFrame(rows)

def foldresult_to_dataframe(result,model):
    rows = []
    row = {
        "model": model,
        "fold": result["fold"],
        "y_test": result["results"]["y_test"],
        "y_pred": result["results"]["y_pred"]
        #"model": result["model"],
        #"fusion": result["fusion"],
        #"clf": result["clf"],
        #**param_dict,  # Add classifier hyperparameters
        #**result["results"],  # Add result metrics
    }
    rows.append(row)
    return pd.DataFrame(rows)


def binary_results_to_dataframe(results):
    rows = []
    for result in results:
        for bdi_key, metrics in result["results"].items():
            row = {
                "fold": result["fold"],
                "model": result["model"],
                "fusion": result["fusion"],
                "clf": result["clf"],
                "bdi_task": bdi_key,  # Add the BDI task (e.g., "bdi_1", "bdi_2")
                **metrics,  # Add metrics for this task (e.g., accuracy, precision, recall)
            }
            rows.append(row)
    return pd.DataFrame(rows)

# Plot results (e.g., accuracy by model and fusion type)
def plot_results(results_df, metric, title):
    plt.figure(figsize=(10, 6))
    sns.barplot(data=results_df, x="model", y=metric, hue="fusion")
    plt.title(title)
    plt.ylabel(metric)
    plt.xlabel("Model")
    plt.legend(title="Fusion Type")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

def plot_binary_results(results_df, metric, title):
    plt.figure(figsize=(10, 6))
    sns.barplot(data=results_df, x="bdi_task", y=metric, hue="model")
    plt.title(title)
    plt.ylabel(metric)
    plt.xlabel("BDI Task")
    plt.legend(title="Model")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# Analyze best hyperparameters
def analyze_hyperparameters(results_df, metric):
    best_results = results_df.loc[results_df.groupby(["model", "fusion", "clf"])[metric].idxmax()]
    print(f"Best results for {metric}:")
    display(best_results)

In [33]:
# Define paths to results directories
results_clf_dir = "/Users/crisgallego/Desktop/SMART_deepRLearning/results_all_experiments/classification/multimodal_emb/embeddings_64_GroupKFold"

# Load classifier results
clf_results_df = load_results(results_clf_dir)

# Display a summary of the results
print("Classifier Results:")
display(clf_results_df)

# Visualize metrics
#plot_results(clf_results_df, metric="precision_bdi", title="Accuracy by Model and Fusion Type")
# Analyze the best hyperparameters based on accuracy
#analyze_hyperparameters(clf_results_df, metric="accuracy_bdi")


Classifier Results:


Unnamed: 0,model,fold,y_test,y_pred
0,lstm,6,"[[0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,...","[[0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0,..."
0,lstm,7,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,..."
0,lstm,5,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,..."
0,lstm,4,"[[1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,..."
0,lstm,0,"[[1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,...","[[1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0,..."
0,lstm,1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1,..."
0,lstm,3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1,..."
0,lstm,2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
0,lstm,9,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1,..."
0,lstm,8,"[[1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1,...","[[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,..."


In [41]:
# Define a function to compute metrics for each row
def calculate_metrics(row):
    y_test = row['y_test']  # Ground truth
    y_pred = row['y_pred']  # Predictions

    # Exclude symptom 9 (index 8) from both y_test and y_pred
    y_test = np.delete(y_test, 8, axis=1)  # Remove symptom 9 from ground truth
    y_pred = np.delete(y_pred, 8, axis=1)  # Remove symptom 9 from predictions
    

    # Calculate metrics
    balanced_acc_per_label = [
        balanced_accuracy_score(y_test[:, i], y_pred[:, i]) for i in range(y_test.shape[1])
    ]
    balanced_acc = np.mean(balanced_acc_per_label)
    f1 = f1_score(y_test, y_pred, average='macro')
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')

    # Return as a dictionary
    return {
        'balanced_accuracy': balanced_acc,
        'f1_macro': f1,
        'precision_macro': precision,
        'recall_macro': recall,
        'acc_per_label': balanced_acc_per_label
    }

def calculate_metrics_label(row):
    y_test = np.array(row['y_test'])  # Convert to NumPy array if needed
    y_pred = np.array(row['y_pred'])  # Convert to NumPy array if needed

    # Ensure valid shape before deleting symptom 9 (index 8)
    if y_test.shape[1] > 8 and y_pred.shape[1] > 8:
        y_test = np.delete(y_test, 8, axis=1)
        y_pred = np.delete(y_pred, 8, axis=1)

    num_labels = y_test.shape[1]  # Should be 20 after removing symptom 9

    # Compute per-label metrics and store them in a list
    for i in range(num_labels):
        metrics_list.append({
            'model': row['model'],
            'fold': row['fold'],
            'label': i + 1,  # Label index (1-based)
            'balanced_accuracy': balanced_accuracy_score(y_test[:, i], y_pred[:, i]),
            'f1': f1_score(y_test[:, i], y_pred[:, i], zero_division=0),
            'precision': precision_score(y_test[:, i], y_pred[:, i], zero_division=0),
            'recall': recall_score(y_test[:, i], y_pred[:, i], zero_division=0)
        })

    return metrics_list

# Compute per-label metrics for each row in the DataFrame
metrics_list = []
for _, row in clf_results_df.iterrows():
    metrics_list.extend(calculate_metrics_label(row))  # Flatten the nested lists

# Convert to DataFrame
metrics_df = pd.DataFrame(metrics_list)

# Aggregate results per label and model
summary_df = metrics_df.groupby(['model', 'label']).agg(
    balanced_accuracy_mean=('balanced_accuracy', 'mean'),
    balanced_accuracy_std=('balanced_accuracy', 'std'),
    f1_mean=('f1', 'mean'),
    f1_std=('f1', 'std'),
    precision_mean=('precision', 'mean'),
    precision_std=('precision', 'std'),
    recall_mean=('recall', 'mean'),
    recall_std=('recall', 'std')
).reset_index()

In [37]:
# Assuming clf_results_df is the DataFrame
metrics_list = []
for _, row in clf_results_df.iterrows():
    metrics = calculate_metrics(row)
    metrics['model'] = row['model']
    metrics['fold'] = row['fold']
    metrics_list.append(metrics)

# Convert to DataFrame
metrics_df = pd.DataFrame(metrics_list)

# Group by 'model' and calculate the mean and standard deviation for each metric
summary_df = metrics_df.groupby('model').agg(
    balanced_accuracy_mean=('balanced_accuracy', 'mean'),
    balanced_accuracy_std=('balanced_accuracy', 'std'),
    f1_macro_mean=('f1_macro', 'mean'),
    f1_macro_std=('f1_macro', 'std'),
    precision_macro_mean=('precision_macro', 'mean'),
    precision_macro_std=('precision_macro', 'std'),
    recall_macro_mean=('recall_macro', 'mean'),
    recall_macro_std=('recall_macro', 'std')
).reset_index()

[{'balanced_accuracy': np.float64(0.6179050095248682),
  'f1_macro': np.float64(0.2975388766142437),
  'precision_macro': np.float64(0.24988011819973774),
  'recall_macro': np.float64(0.47089112903225805),
  'acc_per_label': [np.float64(0.719047619047619),
   np.float64(0.7482319660537482),
   np.float64(0.4857142857142857),
   np.float64(0.4673252279635258),
   np.float64(0.47916666666666663),
   np.float64(0.7142857142857143),
   np.float64(0.6044985941893158),
   np.float64(0.4958106409719313),
   np.float64(0.5548387096774194),
   np.float64(0.9333333333333333),
   np.float64(0.4901960784313726),
   np.float64(0.6279761904761905),
   np.float64(0.6537349397590362),
   np.float64(0.4411764705882353),
   np.float64(0.4520408163265306),
   np.float64(0.761480787253983),
   np.float64(0.7065217391304348),
   np.float64(0.6597222222222222),
   np.float64(0.6182065217391304),
   np.float64(0.7447916666666666)],
  'model': 'lstm',
  'fold': 6},
 {'balanced_accuracy': np.float64(0.47974300

In [42]:
summary_df

Unnamed: 0,model,label,balanced_accuracy_mean,balanced_accuracy_std,f1_mean,f1_std,precision_mean,precision_std,recall_mean,recall_std
0,lstm,1,0.608164,0.113182,0.118637,0.060388,0.074918,0.050286,0.443616,0.260017
1,lstm,2,0.629797,0.138723,0.268179,0.131092,0.241114,0.197375,0.464144,0.270901
2,lstm,3,0.510214,0.033641,0.176908,0.048307,0.33133,0.329869,0.278997,0.127552
3,lstm,4,0.505433,0.045417,0.229493,0.147067,0.19766,0.143956,0.37474,0.227437
4,lstm,5,0.533329,0.061351,0.276174,0.115853,0.332522,0.127114,0.247747,0.122248
5,lstm,6,0.674011,0.122519,0.330793,0.225846,0.315907,0.24136,0.390008,0.183991
6,lstm,7,0.602532,0.064986,0.327845,0.090825,0.290458,0.197077,0.474652,0.169681
7,lstm,8,0.51002,0.029344,0.266561,0.057331,0.300664,0.133542,0.267773,0.077082
8,lstm,9,0.534639,0.074262,0.226315,0.105169,0.210502,0.188757,0.404667,0.087912
9,lstm,10,0.695567,0.248928,0.180724,0.149453,0.1493,0.182991,0.521607,0.483134


In [2]:
# Function to remap labels
def remap_labels(labels):
    # Convert nested lists (if present) to numpy arrays for easier manipulation
    labels = np.array(labels)
    labels = np.where(labels <= 1, 0, 1)  # Map 0 or 1 to 0, and 2 or 3 to 1
    return labels.tolist()

# Apply remap_labels to y_test and y_pred columns
clf_results_df["y_test"] = clf_results_df["y_test"].apply(lambda x: [remap_labels(row) for row in x])
clf_results_df["y_pred"] = clf_results_df["y_pred"].apply(lambda x: [remap_labels(row) for row in x])

NameError: name 'clf_results_df' is not defined

In [3]:
import numpy as np
import pandas as pd
from sklearn.metrics import (
    classification_report,
    accuracy_score,
    balanced_accuracy_score,
    multilabel_confusion_matrix,
    precision_score,
    recall_score,
    f1_score
)
import seaborn as sns
import matplotlib.pyplot as plt

def calculate_metrics(y_test, y_pred):
    """
    Calculates and prints classification metrics and confusion matrices for multi-label data.

    Args:
        y_test (np.ndarray): Ground truth labels, shape (n_samples, n_labels)
        y_pred (np.ndarray): Predicted labels, shape (n_samples, n_labels)
    """
    # Classification report for per-label precision, recall, and F1-score
    print("Classification Report:")
    print(classification_report(y_test, y_pred, zero_division=0))

    # Accuracy score (averaged across all labels)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Overall Accuracy: {accuracy:.4f}")

    # Macro average and balanced accuracy
    balanced_acc_per_label = [
        balanced_accuracy_score(y_test[:, i], y_pred[:, i]) for i in range(y_test.shape[1])
    ]
    macro_balanced_acc = np.mean(balanced_acc_per_label)
    print(f"Balanced Accuracy (Macro Average): {macro_balanced_acc:.4f}\n")

    # F1, Precision, and Recall (Macro Average across labels)
    precision = precision_score(y_test, y_pred, average="macro", zero_division=0)
    recall = recall_score(y_test, y_pred, average="macro", zero_division=0)
    f1 = f1_score(y_test, y_pred, average="macro", zero_division=0)
    print(f"Macro Precision: {precision:.4f}")
    print(f"Macro Recall: {recall:.4f}")
    print(f"Macro F1-Score: {f1:.4f}")

    # Confusion matrix (one for each label)
    mcm = multilabel_confusion_matrix(y_test, y_pred)

    # Create a 21x21 aggregated confusion matrix
    aggregated_cm = np.zeros((21, 21), dtype=int)
    for i in range(len(mcm)):
        aggregated_cm[i, i] = mcm[i][1, 1]  # True Positives
        aggregated_cm[i, :] += mcm[i][1, :]  # Add other predictions for label i

    # Normalize the confusion matrix
    normalized_cm = aggregated_cm / aggregated_cm.sum(axis=1, keepdims=True)
    normalized_cm = np.nan_to_num(normalized_cm)  # Handle division by zero

    # Visualize the normalized confusion matrix
    plt.figure(figsize=(12, 10))
    sns.heatmap(normalized_cm, annot=True, fmt='.2f', cmap='Blues', cbar=True, xticklabels=range(1, 22), yticklabels=range(1, 22))
    plt.title("Normalized Aggregated 21x21 Confusion Matrix")
    plt.xlabel("Predicted Labels")
    plt.ylabel("True Labels")
    plt.show()

def average_metrics_by_folds(df):
    """
    Computes average metrics across folds for each combination of model, fusion, clf, param0, and param1.

    Args:
        df (pd.DataFrame): DataFrame containing the columns: fold, model, fusion, clf, param0, param1, y_test, y_pred.

    Returns:
        pd.DataFrame: Aggregated metrics.
    """
    results = []

    # Group by unique combinations of model, fusion, clf, param0, and param1
    group_columns = ['model', 'fusion', 'clf', 'param0', 'param1']
    grouped = df.groupby(group_columns)

    for group_name, group_df in grouped:
        # Initialize accumulators for metrics
        total_accuracy = 0
        total_macro_balanced_acc = 0
        total_precision = 0
        total_recall = 0
        total_f1 = 0
        n_folds = len(group_df)

        for _, row in group_df.iterrows():
            y_test = np.array(row['y_test'])
            y_pred = np.array(row['y_pred'])
            
            print(y_test)
            print(y_pred)

            # Compute metrics for the current fold
            total_accuracy += accuracy_score(y_test, y_pred)

            # Compute balanced accuracy per label and average across labels
            balanced_acc_per_label = [
                balanced_accuracy_score(y_test[:, i], y_pred[:, i]) for i in range(y_test.shape[1])
            ]
            total_macro_balanced_acc += np.mean(balanced_acc_per_label)

            # Precision, Recall, F1
            total_precision += precision_score(y_test, y_pred, average="macro", zero_division=0)
            total_recall += recall_score(y_test, y_pred, average="macro", zero_division=0)
            total_f1 += f1_score(y_test, y_pred, average="macro", zero_division=0)

        # Average metrics over all folds
        avg_accuracy = total_accuracy / n_folds
        avg_macro_balanced_acc = total_macro_balanced_acc / n_folds
        avg_precision = total_precision / n_folds
        avg_recall = total_recall / n_folds
        avg_f1 = total_f1 / n_folds

        # Append the results
        results.append({
            'model': group_name[0],
            'fusion': group_name[1],
            'clf': group_name[2],
            'param0': group_name[3],
            'param1': group_name[4],
            'avg_accuracy': avg_accuracy,
            'avg_macro_balanced_acc': avg_macro_balanced_acc,
            'avg_precision': avg_precision,
            'avg_recall': avg_recall,
            'avg_f1': avg_f1
        })

    return pd.DataFrame(results)

# Example usage:
if __name__ == "__main__":
    # Compute average metrics by folds
    aggregated_metrics = average_metrics_by_folds(clf_results_df)


NameError: name 'clf_results_df' is not defined

In [4]:
import numpy as np
import pandas as pd
from sklearn.metrics import (
    classification_report,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix
)
import seaborn as sns
import matplotlib.pyplot as plt

def calculate_metrics_per_symptom(y_test, y_pred):
    """
    Calculates metrics and confusion matrices for each symptom.

    Args:
        y_test (np.ndarray): Ground truth labels, shape (n_samples, n_labels)
        y_pred (np.ndarray): Predicted labels, shape (n_samples, n_labels)

    Returns:
        pd.DataFrame: DataFrame with metrics for each symptom.
    """
    n_labels = y_test.shape[1]
    metrics = []

    for i in range(n_labels):
        # Extract data for the current symptom
        y_test_symptom = y_test[:, i]
        y_pred_symptom = y_pred[:, i]

        # Calculate confusion matrix
        cm = confusion_matrix(y_test_symptom, y_pred_symptom)
        #tn, fp, fn, tp = cm.ravel()

        # Calculate metrics
        accuracy = balanced_accuracy_score(y_test_symptom, y_pred_symptom)
        precision = precision_score(y_test_symptom, y_pred_symptom, zero_division=0)
        recall = recall_score(y_test_symptom, y_pred_symptom, zero_division=0)
        f1 = f1_score(y_test_symptom, y_pred_symptom, zero_division=0)

        # Store metrics
        metrics.append({
            'Symptom': f'Symptom {i+1}',
            'Accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1-Score': f1,
            'cm':cm
            #'TP': tp,
            #'FP': fp,
            #'FN': fn,
            #'TN': tn
        })

    return pd.DataFrame(metrics)

def calculate_metrics_by_combination(df):
    """
    Calculates average metrics across folds for each combination of model, fusion, clf, param0, and param1.

    Args:
        df (pd.DataFrame): DataFrame containing the columns: fold, model, fusion, clf, param0, param1, y_test, y_pred.

    Returns:
        pd.DataFrame: Aggregated metrics per combination.
    """
    results = []
    group_columns = ['model', 'fusion', 'clf', 'param0', 'param1']
    grouped = df.groupby(group_columns)

    for group_name, group_df in grouped:
        n_folds = len(group_df)
        symptom_metrics = []
        
        for _, row in group_df.iterrows():
            y_test = np.array(row['y_test'])
            y_pred = np.array(row['y_pred'])

            # Calculate metrics for this fold
            fold_metrics = calculate_metrics_per_symptom(y_test, y_pred)
            symptom_metrics.append(fold_metrics)

        # Average metrics across folds for each symptom
        aggregated_metrics = (
            pd.concat(symptom_metrics)
            .groupby('Symptom')
            .mean()
            .reset_index()
        )
        aggregated_metrics['Combination'] = [group_name] * len(aggregated_metrics)
        results.append(aggregated_metrics)

    return pd.concat(results, ignore_index=True)

In [5]:
with open('/Users/crisgallego/Desktop/results_output/results_autoencoder_fusion_clf/pkl/clf_ema_noDemo_groupKFold.pkl', "rb") as f:
    data = pickle.load(f)

In [6]:
results = data['results']

In [7]:
all_folds_metrics = []
for fold in results:
    y_test = fold['y_test']
    y_pred = fold['y_pred']
    df_fold = calculate_metrics_per_symptom(y_test,y_pred)
    all_folds_metrics.append(df_fold)    

In [8]:
# Initialize a dictionary to store lists for each symptom's metrics and confusion matrices
symptom_metrics = {
    'Symptom': [],
    'Accuracy': [],
    'Precision': [],
    'Recall': [],
    'F1-Score': []
}

# Loop through each DataFrame and extract the metrics for each symptom
for df in all_folds_metrics:
    for idx, row in df.iterrows():
        symptom = row['Symptom']
        accuracy = row['Accuracy']
        precision = row['Precision']
        recall = row['Recall']
        f1_score = row['F1-Score']

        # Append the values to the corresponding lists
        symptom_metrics['Symptom'].append(symptom)
        symptom_metrics['Accuracy'].append(accuracy)
        symptom_metrics['Precision'].append(precision)
        symptom_metrics['Recall'].append(recall)
        symptom_metrics['F1-Score'].append(f1_score)

# Convert the dictionary to a DataFrame
metrics_df = pd.DataFrame(symptom_metrics)

# Now, calculate the averages for each symptom across all folds
average_results = []
for symptom in metrics_df['Symptom'].unique():
    symptom_df = metrics_df[metrics_df['Symptom'] == symptom]
    
    # Compute the mean of each column for this symptom
    avg_accuracy = symptom_df['Accuracy'].mean()
    avg_precision = symptom_df['Precision'].mean()
    avg_recall = symptom_df['Recall'].mean()
    avg_f1_score = symptom_df['F1-Score'].mean()
    
    # Append the averaged values for this symptom to the results
    average_results.append({
        'Symptom': symptom,
        'Accuracy': avg_accuracy,
        'Precision': avg_precision,
        'Recall': avg_recall,
        'F1-Score': avg_f1_score
    })

# Create a new DataFrame with the averaged results
average_df = pd.DataFrame(average_results)

In [9]:
average_df[average_df['F1-Score']>=0.5] 

# 1: Sadness
# 2: Pessimism
# 7: self-dislike
# 8: self-criticism
# 10: crying  
# 14: worthlesness
# 19: focus
# 20: tiredness

Unnamed: 0,Symptom,Accuracy,Precision,Recall,F1-Score
0,Symptom 1,0.859957,0.637175,0.763517,0.658583
1,Symptom 2,0.762482,0.538217,0.6595,0.52373
6,Symptom 7,0.811525,0.644131,0.671407,0.64444
7,Symptom 8,0.811898,0.595992,0.653788,0.576282
8,Symptom 9,0.789337,0.556653,0.608389,0.542406
12,Symptom 13,0.81627,0.620544,0.665187,0.615789
13,Symptom 14,0.845954,0.707639,0.81134,0.695806
17,Symptom 18,0.768982,0.63265,0.696285,0.631247
18,Symptom 19,0.849603,0.705969,0.817714,0.72462


In [2]:
average_df

NameError: name 'average_df' is not defined

In [3]:
balanced_acc_per_label = [
    balanced_accuracy_score(y_test[:, i], y_pred[:, i]) for i in range(y_test.shape[1])
]
macro_balanced_acc = np.mean(balanced_acc_per_label)

NameError: name 'y_test' is not defined

In [4]:
macro_balanced_acc

NameError: name 'macro_balanced_acc' is not defined

In [5]:
calculate_metrics_per_symptom(y_test,y_pred)

NameError: name 'calculate_metrics_per_symptom' is not defined

In [7]:
import shap

# Aggregate SHAP values for each symptom
#shap_values_per_symptom = {symptom: [] for symptom in y_test.columns}

for result in results:
    shap_values_fold = result['shap_values']
    
    # Aggregate SHAP values per symptom across folds
    for symptom, shap_values in shap_values_fold.items():
        shap_values_per_symptom[symptom].append(shap_values.values)

# Example: Plot SHAP summary for each symptom across folds
for symptom, shap_values_list in shap_values_per_symptom.items():
    shap_values_combined = shap_values_list[0]  # Start with the first fold's SHAP values
    for shap_values in shap_values_list[1:]:
        shap_values_combined = shap_values_combined.append(shap_values, ignore_index=True)

    # Generate a summary plot for the symptom's SHAP values across folds
    plt.figure(figsize=(10, 6))
    shap.summary_plot(shap_values_combined, X_test, show=False)
    plt.title(f'SHAP Summary for {symptom}')
    plt.show()


NameError: name 'results' is not defined

In [None]:
result

{'fold': 0,
 'best_clf': 'XGBoost',
 'best_params': {'learning_rate': 0.1,
  'max_depth': 5,
  'n_estimators': 200,
  'subsample': 0.8},
 'y_test': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [1, 1, 0, ..., 1, 1, 0],
        [1, 1, 0, ..., 1, 1, 0],
        [0, 0, 0, ..., 0, 0, 0]]),
 'y_pred': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [1, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 1, 0, ..., 1, 1, 0],
        [0, 1, 0, ..., 1, 1, 0],
        [0, 0, 0, ..., 0, 0, 0]]),
 'y_proba': [array([[9.95897055e-01, 4.10297373e-03],
         [9.92674649e-01, 7.32534425e-03],
         [4.61813629e-01, 5.38186371e-01],
         [9.58029389e-01, 4.19706181e-02],
         [9.87446308e-03, 9.90125537e-01],
         [1.20863676e-01, 8.79136324e-01],
         [3.33672762e-03, 9.96663272e-01],
         [1.16708219e-01, 8.83291781e-01],
         [3.32597852e-01, 6.67402148e-01],
         [7.5831222

In [24]:
with open('/Users/crisgallego/Desktop/SMART_deepRLearning/results_all_experiments/clustering/multimodal/embeddings_32/clusters_transformer_early_9.pkl', "rb") as f:
    data = pickle.load(f)


In [25]:
data

[{'fold_idx': 0,
  'silhouette': np.float64(0.132198171632812),
  'dunn': np.float64(0.2252969197648058),
  'custom': 0,
  'algorithm': 'AgglomerativeClustering',
  'params': {'linkage': 'average',
   'metric': 'euclidean',
   'n_clusters': np.int64(12)}},
 {'fold_idx': 1,
  'silhouette': np.float64(-0.03671804293767248),
  'dunn': np.float64(0.2808412160626807),
  'custom': 0,
  'algorithm': 'AgglomerativeClustering',
  'params': {'linkage': 'average',
   'metric': 'euclidean',
   'n_clusters': np.int64(12)}},
 {'fold_idx': 2,
  'silhouette': np.float64(0.25668028617941013),
  'dunn': np.float64(0.27119760304590257),
  'custom': 0,
  'algorithm': 'AgglomerativeClustering',
  'params': {'linkage': 'average',
   'metric': 'euclidean',
   'n_clusters': np.int64(12)}},
 {'fold_idx': 3,
  'silhouette': np.float64(0.12914492591048637),
  'dunn': np.float64(0.09396056569207975),
  'custom': np.float64(0.30544297228245343),
  'algorithm': 'AgglomerativeClustering',
  'params': {'linkage': 'av