This notebook contains the detailed clustering extraction and analysis.

- **Author**: Benkirane Ismail
- **Email**: [ibenkirane@mgb.org](mailto:ibenkirane@mgb.org)
- **Version**: 1.0.0
- **Date**: 2023-10-19

## Imports

In [None]:
import sys
import pandas as pd

sys.path.append('../')

from utils import UTILITIES, CORRELATION, FEATURES

## Get Metadata

In [None]:
subject_coding ={
    'Low': [1003, 1005, 1008, 1011, 1013, 1025, 1032, 1033, 1034, 1037, 1042],
    "Medium": [1002, 1007, 1009, 1015, 1020, 1023, 1024, 1026, 1031, 1040, 1041],
    'High': [1001, 1017, 1021, 1022, 1029, 1039]
}

subject_removed = [1002, 1005, 1009, 1011, 1016, 1021, 1023, 1029, 1034, 1036]

In [None]:
desired_measurement = ['Empatica', 'Transcript', 'Audio', 'FaceReader', 'GoPro', 'SRE']
save = True

In [None]:
utilities = UTILITIES()
correlation = CORRELATION(desired_measurement)
features = FEATURES(desired_measurement)

# Data Loading

In [None]:
if isinstance(desired_measurement, str):
    all_features = pd.read_csv(f'../computed_features/{desired_measurement}/all_features.csv')
    stand_features = pd.read_csv(f'../computed_features/{desired_measurement}/stand_features.csv')
else:
    all_features = pd.read_csv(f'../computed_features/all_features.csv')
    stand_features = pd.read_csv(f'../computed_features/stand_features.csv')

features_names = utilities.get_feature_names(all_features, desired_measurement)
features_grouping = utilities.group_features_by_label(stand_features, affect_subject=True)

# Express Clustering Analysis

In [None]:
if save:
    features.save_all_clustering_results(stand_features)

## Get Clustering

In [None]:
clusters_dict, feature_importance = features.get_subjects_clusters(features_grouping, analysis='single',method = 'Silhouette', nb_clusters=2, projections='PCA', plot=False)

In [None]:
coding_dict = features.map_subjects_to_code(clusters_dict, subject_coding)

In [None]:
clusters, count = features.get_subject_consistency(clusters_dict, verbose=True, min_nb_emotions=4)

In [None]:
for pair in count:
    if count[pair] == 4:
        print(pair)

In [None]:
features.plot_intersections(clusters_dict, nb_emotions=3) #only if 2 clsuters

## Results

### Clusters

In [None]:
for emotion in clusters_dict:
    print(f'Emotion : {emotion}')
    for cluster in clusters_dict[emotion]:
        print(f'Cluster {cluster} : {clusters_dict[emotion][cluster]}')
        for subject in clusters_dict[emotion][cluster]:
            if subject in subject_coding['Low']:
                print(f'{subject} : Low')
            elif subject in subject_coding['Medium']:
                print(f'{subject} : Medium')
            elif subject in subject_coding['High']:
                print(f'{subject} : High')

### Feature Importance

In [None]:
for emotion in feature_importance.keys():
    print(f'Emotion : {emotion}')
    for cluster in feature_importance[emotion].keys():
        print("   ", cluster)
        for feature in feature_importance[emotion][cluster].keys():
            if feature_importance[emotion][cluster][feature] == 0:
                continue
            print(f'        {feature} : {feature_importance[emotion][cluster][feature]}')

### Visualize Subjects projections

In [None]:
features.plot_subjects_pca_projections(features_grouping)

In [None]:
features.plot_subject_coding_projections(features_grouping, subject_coding)

In [None]:
d = {
    'Cluster 1' : [1003, 1007, 1013, 1015, 1020, 1024, 1026],
    'Cluster 2' : [1001, 1031, 1032, 1037, 1039],
    'Cluster 3' : [1008, 1017, 1022, 1025, 1033, 1040, 1041, 1042]
}

In [None]:
features.plot_subject_coding_projections(features_grouping, d)