### An exploration of clustering methods to derive insights from a heart rate dataset

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import numpy as np
import seaborn as sns

from sklearn.impute import KNNImputer
from matplotlib.cm import get_cmap
from matplotlib.colors import Normalize
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans, AgglomerativeClustering
from scipy.spatial.distance import cdist

##### Data loading

In [2]:
df = pd.read_csv("data/HR_data.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,HR_TD_Mean,HR_TD_Median,HR_TD_std,HR_TD_Min,HR_TD_Max,HR_TD_AUC,HR_TD_Kurtosis,HR_TD_Skew,HR_TD_Slope_min,...,upset,hostile,alert,ashamed,inspired,nervous,attentive,afraid,active,determined
0,0,78.663878,76.7,7.480043,67.25,92.48,23048.45,-1.091448,0.369955,-0.73,...,1.0,1.0,2.0,1.0,2.0,2.0,3.0,1.0,2.0,2.0
1,1,76.540732,76.61,2.584756,69.82,82.33,23959.92,-0.245338,0.338732,-0.36,...,2.0,1.0,3.0,2.0,2.0,2.0,3.0,1.0,3.0,3.0
2,2,78.173563,77.92,2.681255,72.22,82.8,20324.605,-0.615922,-0.233047,-0.63,...,1.0,1.0,2.0,1.0,3.0,2.0,3.0,2.0,3.0,3.0
3,3,83.073688,83.88,7.363598,69.42,96.12,24924.3,-0.86661,-0.046021,-0.465,...,1.0,1.0,2.0,1.0,3.0,2.0,3.0,2.0,3.0,3.0
4,4,72.28125,72.91,3.193762,64.95,79.98,23052.1,0.200401,-0.560948,-0.3725,...,3.0,1.0,3.0,2.0,3.0,3.0,4.0,2.0,4.0,4.0


##### Data preprocessing

In [3]:
# Define metadata and emotion columns
meta_cols = ['Unnamed: 0', 'Round', 'Phase', 'Individual', 'Puzzler', 'Cohort']
emotion_cols = ['Frustrated', 'upset', 'hostile', 'alert', 'ashamed',
                'inspired', 'nervous', 'attentive', 'afraid', 'active', 'determined']


# Handle missing values
imputer = KNNImputer(n_neighbors=5)
df.loc[:, 'HR_TD_Mean':'EDA_TD_P_ReT'] = imputer.fit_transform(df.loc[:, 'HR_TD_Mean':'EDA_TD_P_ReT'])

# Standardize
X = df.drop(columns=meta_cols + emotion_cols)
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X))


##### Data exploration 

##### K-means clustering

##### Agglomerative clustering

##### Results