This notebook contains the detailed correlation extraction and analysis.

- **Author**: Benkirane Ismail
- **Email**: [ibenkirane@mgb.org](mailto:ibenkirane@mgb.org)
- **Version**: 1.0.0
- **Date**: 2023-10-19

## Imports

In [None]:
import sys
import pandas as pd

sys.path.append('../')

from utils import UTILITIES, CORRELATION

## Get Metadata

In [None]:
desired_measurement = ['Empatica', 'Audio', 'FaceReader', 'GoPro']
# desired_measurement = 'Empatica'
save = True

groups = {
    'Cluster 1' : [1003, 1007, 1013, 1015, 1020, 1024, 1026],
    'Cluster 2' : [1001, 1031, 1032, 1037, 1039],
    'Cluster 3' : [1008, 1017, 1022, 1025, 1033, 1040, 1041, 1042],
    'All Subjects': [1001, 1003, 1007, 1008, 1013, 1015, 1017, 1020, 1022, 1024, 1025, 1026, 1031, 1032, 1033, 1037, 1039, 1040, 1041]
    }

In [None]:
utilities = UTILITIES()
correlation = CORRELATION(desired_measurement)

# Express Correlation extraction

In [None]:
if save:
    correlation.save_all_correlation_results(desired_measurement=desired_measurement, subject_groups= groups, include_pairs=True, select_features=False)

# Data Loading

In [None]:
if isinstance(desired_measurement, str):
    all_features = pd.read_csv(f'../computed_features/{desired_measurement}/all_features_windows.csv')
    stand_features = pd.read_csv(f'../computed_features/{desired_measurement}/stand_features_windows.csv')
else:
    all_features = pd.read_csv(f'../computed_features/all_features_windows.csv')
    stand_features = pd.read_csv(f'../computed_features/stand_features_windows.csv')

features_names = utilities.get_feature_names(all_features, desired_measurement)

# Correlation Extraction

In [None]:
features_grouping = utilities.group_features_by_label(stand_features, windows=True)

In [None]:
for emotion in features_grouping.keys():
    for row in range(len(features_grouping[emotion])):
        features = features_grouping[emotion].iloc[row].index
        for feature in features:
            if feature != 'label':
                basic = len(features_grouping[emotion].iloc[row][features[1]])
                if basic != len(features_grouping[emotion].iloc[row][feature]):
                    print(emotion, row, feature, basic, len(features_grouping[emotion].iloc[row][feature]))

## Extract the correlation matrices

In [None]:
corr_matrices = correlation.get_discrete_windows_correlation_matrices(features_grouping, features_names, include_pairs=False)
features_correlation = correlation.group_correlated_features(corr_matrices, desired_measurement, time_windows = True,save=save)
pair_count_across_subjects = correlation.get_pair_count_across_subjects(features_correlation, desired_measurement, ['Good correlation', 'Strong correlation'], group_cat = True, save=save)
pair_count_across_emotions = correlation.get_pair_count_across_emotions(features_correlation, desired_measurement, ['Good correlation', 'Strong correlation'], group_cat = True, save=save)
correlation_consistency = correlation.get_correlation_consistency(features_correlation, desired_measurement=desired_measurement, save=save)

## Results

In [None]:
import json

with open('Analysis\Correlation\Empatica\correlation_consistency.json', 'r') as f:
    correlation_consistency = json.load(f)

### Consistent correlated features

In [None]:
for pair in correlation_consistency.keys():
    for emotion_valence in correlation_consistency[pair].keys():
        if len(correlation_consistency[pair][emotion_valence]) >= 10:
            print(f'The pair {pair} is consistent across {len(correlation_consistency[pair][emotion_valence])} subjects for {emotion_valence} emotions: ', correlation_consistency[pair][emotion_valence])

In [None]:
for pair in correlation_consistency.keys():
    for emotion_valence in correlation_consistency[pair].keys():
        for subject in correlation_consistency[pair][emotion_valence]:
            group1 = True
            group2 = True

            if subject not in groups['Group 1']:
                group1 = False
            if subject not in groups['Group 2']:
                group2 = False
        
        if group1:
            print(f'{pair} is consistent across {len(correlation_consistency[pair][emotion_valence])} subjects for {emotion_valence} emotions in Group 1')
        if group2:
            print(f'{pair} is consistent across {len(correlation_consistency[pair][emotion_valence])} subjects for {emotion_valence} emotions in Group 2')

### Number of correlated features per subject and per emotion

In [None]:
for emotion in features_correlation.keys():
    print(emotion)
    for subject_id in features_correlation[emotion].keys():
        print("     Subject: ", subject_id)
        for correlation_type in features_correlation[emotion][subject_id].keys():
            print("          ", correlation_type, len(features_correlation[emotion][subject_id][correlation_type]))

### Number of correlated features per emotion and per correlation threshold across subjects

In [None]:
for emotion in pair_count_across_subjects.keys():
    print(emotion)
    for correlation_type in pair_count_across_subjects[emotion].keys():
        print("     ", correlation_type)
        print("         ", pair_count_across_subjects[emotion][correlation_type])

### Number of correlated features per subject and per correlation threshold across emotions

In [None]:
for threshold in pair_count_across_emotions.keys():
    print(threshold)
    for correlation_type in pair_count_across_emotions[threshold].keys():
        print("     ", correlation_type)
        print("         ", pair_count_across_emotions[threshold][correlation_type])

## Plotting Area

In [None]:
import matplotlib.pyplot as plt
import json
import os

desired_measurement = 'Empatica'

### Number of Good and Strong Correlations per Subject and Per emotion

In [None]:
with open(f'Correlation/{desired_measurement}/features_correlation.json', 'r') as file:
    correlation = json.load(file)
correlation.save_correlation_plots_per_subect_per_emotion(correlation, desired_measurement, save=save)

### Number of Good Correlation per subject across emotions

In [None]:
with open(f'Correlation/{desired_measurement}/PairCount/AcrossEmotions/Good_correlation.json', 'r') as file:
    correlation = json.load(file)

fig, axs = plt.subplots(2, 2, figsize=(20, 12))
axs = axs.flatten()  

for idx, nb_emotions in enumerate([1,2,3,4]):
    d = dict()

    for id in correlation.keys():
        t = 0
        for pair in correlation[id].keys():
            for key in correlation[id][pair].keys():
                if correlation[id][pair][key] == nb_emotions:
                    t += 1
        d[id] = t

    ax = axs[idx]
    bars = ax.bar(d.keys(), d.values(), color='#DC143C')
    ax.set_xlabel('Subject ID', fontsize=12)
    ax.set_ylabel('Number of correlated pairs', fontsize=12)
    ax.set_title(f'Number of correlated pairs present in {nb_emotions} emotions', fontsize=14)
    ax.tick_params(axis='x', rotation=45, labelsize=10)
    ax.grid(axis='y', linestyle='--', alpha=0.7)

    for bar in bars:
        yval = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2, yval, int(yval), ha='center', va='bottom', fontsize=10)

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
fig.suptitle(f'{desired_measurement} - Distribution of Correlation Pair Counts Across Emotions', fontsize=16, fontweight='bold')

plt.show()

### Number of Good Correlation per emotion across subjects

In [None]:
nb_subjects_list = [5, 10, 15, 20]
emotions = ['Frustration', 'Pride', 'Joy', 'Shame']

fig, axs = plt.subplots(2, 2, figsize=(20, 12))
axs = axs.flatten() 

for idx, nb_subjects in enumerate(nb_subjects_list):
    d = dict()
    for emotion in emotions:
        with open(f'Correlation/{desired_measurement}/PairCount/AcrossSubjects/{emotion}/Good_correlation.json', 'r') as file:
            correlation = json.load(file)
        t = 0
        for pair in correlation.keys():
            for key in correlation[pair].keys():
                if correlation[pair][key] >= nb_subjects:            
                    t += 1

        d[emotion] = t

    ax = axs[idx]
    bars = ax.bar(d.keys(), d.values(), color='#DC143C')
    ax.set_xlabel('Emotion', fontsize=12)
    ax.set_ylabel('Number of correlated pairs', fontsize=12)
    ax.set_title(f'Number of correlated pairs present in at least {nb_subjects} subjects', fontsize=14)
    ax.tick_params(axis='x', labelrotation=45, labelsize=10)
    ax.grid(axis='y', linestyle='--', alpha=0.7)

    for bar in bars:
        yval = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2, yval, int(yval), ha='center', va='bottom', fontsize=10)

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
fig.suptitle(f'{desired_measurement} - Distribution of Correlation Pair Counts Across Subjects', fontsize=16, fontweight='bold')
plt.show()