# KNeighbors

In [148]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.metrics import accuracy_score, balanced_accuracy_score,  f1_score, precision_score, recall_score, confusion_matrix

In [149]:
df = pd.read_csv("../../data/combined_subjects.csv")

In [150]:
df.head()

Unnamed: 0.1,Unnamed: 0,net_acc_mean,net_acc_std,net_acc_min,net_acc_max,EDA_phasic_mean,EDA_phasic_std,EDA_phasic_min,EDA_phasic_max,EDA_smna_mean,...,ACC_z_min,ACC_z_max,0_mean,0_std,0_min,0_max,BVP_peak_freq,TEMP_slope,subject,label
0,0,1.331891,0.153556,1.014138,1.678399,2.247876,1.112076,0.367977,4.459367,1.592308,...,-2.6e-05,6e-05,0.027558,0.013523,0.0,0.087383,0.080556,-0.000102,2,1
1,1,1.218994,0.090108,1.014138,1.4858,1.781323,1.203991,0.232625,4.459367,1.34775,...,-2.6e-05,6e-05,0.02342,0.01531,0.0,0.087383,0.144444,-0.000424,2,1
2,2,1.143312,0.110987,0.948835,1.4858,1.173169,1.285422,0.00695,4.459367,0.752335,...,-1.5e-05,4.9e-05,0.018759,0.012604,0.0,0.071558,0.102778,-0.000814,2,1
3,3,1.020669,0.135308,0.81109,1.239944,0.311656,0.27865,0.00695,1.303071,0.198576,...,-5e-06,3.7e-05,0.022888,0.01218,0.000688,0.054356,0.108333,-0.000524,2,1
4,4,0.887458,0.116048,0.727406,1.125306,0.163826,0.110277,0.00695,0.369298,0.11808,...,2e-06,3.7e-05,0.028105,0.010415,0.002752,0.054356,0.147222,-0.000165,2,1


In [151]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2091 entries, 0 to 2090
Data columns (total 45 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Unnamed: 0       2091 non-null   int64  
 1   net_acc_mean     2091 non-null   float64
 2   net_acc_std      2091 non-null   float64
 3   net_acc_min      2091 non-null   float64
 4   net_acc_max      2091 non-null   float64
 5   EDA_phasic_mean  2091 non-null   float64
 6   EDA_phasic_std   2091 non-null   float64
 7   EDA_phasic_min   2091 non-null   float64
 8   EDA_phasic_max   2091 non-null   float64
 9   EDA_smna_mean    2091 non-null   float64
 10  EDA_smna_std     2091 non-null   float64
 11  EDA_smna_min     2091 non-null   float64
 12  EDA_smna_max     2091 non-null   float64
 13  EDA_tonic_mean   2091 non-null   float64
 14  EDA_tonic_std    2091 non-null   float64
 15  EDA_tonic_min    2091 non-null   float64
 16  EDA_tonic_max    2091 non-null   float64
 17  BVP_mean      

## Data Preparation

In [152]:
features = ["EDA_tonic_max", "label"]

In [153]:
df_feat = df[features]
df_feat.head()

Unnamed: 0,EDA_tonic_max,label
0,2.55475,1
1,2.477276,1
2,2.037179,1
3,2.037179,1
4,2.037179,1


In [154]:
df_feat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2091 entries, 0 to 2090
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   EDA_tonic_max  2091 non-null   float64
 1   label          2091 non-null   int64  
dtypes: float64(1), int64(1)
memory usage: 32.8 KB


### Merged Amusement

In [155]:
df_feat_merged_amusement = df_feat.copy()
df_feat_merged_amusement["label"] = df_feat_merged_amusement["label"].replace([0], 1)

In [156]:
df_feat_merged_amusement["label"].unique()

array([1, 2])

In [157]:
y_merged_amusement = np.array(df_feat_merged_amusement.pop('label'))
X_merged_amusement = np.array(df_feat_merged_amusement)

In [158]:
X_train_merged_amusement, X_test_merged_amusement, y_train_merged_amusement, y_test_merged_amusement = train_test_split(X_merged_amusement, y_merged_amusement, test_size=0.25, random_state=42)

### Dropped amusement

In [159]:
df_feat_no_amusement = df_feat[df_feat["label"] != 0]

In [160]:
df_feat_no_amusement["label"].unique()

array([1, 2])

In [161]:
y_no_amusement = np.array(df_feat_no_amusement.pop('label'))
X_no_amusement = np.array(df_feat_no_amusement)

In [162]:
X_train_no_amusement, X_test_no_amusement, y_train_no_amusement, y_test_no_amusement = train_test_split(X_no_amusement, y_no_amusement, test_size=0.25, random_state=42)

## Training

In [163]:
parameters = dict(
    n_neighbors=(2,),
    weights=("uniform", "distance"),
    algorithm=("ball_tree", "kd_tree", "brute")
)

In [164]:
neighbor = KNeighborsClassifier()

### Merged Amusement

In [165]:
clf_neighbor_merged_amusement = GridSearchCV(estimator=neighbor, param_grid=parameters)

In [166]:
clf_neighbor_merged_amusement.fit(X_train_merged_amusement, y_train_merged_amusement)

In [167]:
clf_neighbor_merged_amusement.best_estimator_

### Dropped Amusement

In [168]:
clf_neighbor_no_amusement = GridSearchCV(estimator=neighbor, param_grid=parameters)

In [169]:
clf_neighbor_no_amusement.fit(X_train_no_amusement, y_train_no_amusement)

In [170]:
clf_neighbor_no_amusement.best_estimator_

### Evaluation

### Merged Amusement

In [171]:
y_pred_merged_amusement = clf_neighbor_merged_amusement.predict(X_test_merged_amusement)

In [172]:
accuracy_score(y_test_merged_amusement, y_pred_merged_amusement)

0.892925430210325

In [173]:
balanced_accuracy_score(y_test_merged_amusement, y_pred_merged_amusement)

0.8606791177646161

In [174]:
f1_score(y_test_merged_amusement, y_pred_merged_amusement)

0.9263157894736842

In [175]:
precision_score(y_test_merged_amusement, y_pred_merged_amusement)

0.9190600522193212

In [176]:
recall_score(y_test_merged_amusement, y_pred_merged_amusement)

0.9336870026525199

In [177]:
confusion_matrix(y_test_merged_amusement, y_pred_merged_amusement, labels=[1, 2])

array([[352,  25],
       [ 31, 115]])

### Dropped Amusement

In [178]:
y_pred_no_amusement = clf_neighbor_no_amusement.predict(X_test_no_amusement)

In [179]:
accuracy_score(y_test_no_amusement, y_pred_no_amusement)

0.8681818181818182

In [180]:
balanced_accuracy_score(y_test_no_amusement, y_pred_no_amusement)

0.8624708624708625

In [181]:
f1_score(y_test_no_amusement, y_pred_no_amusement)

0.8999999999999999

In [182]:
precision_score(y_test_no_amusement, y_pred_no_amusement)

0.9222614840989399

In [183]:
recall_score(y_test_no_amusement, y_pred_no_amusement)

0.8787878787878788

In [184]:
confusion_matrix(y_test_no_amusement, y_pred_no_amusement, labels=[1, 2])

array([[261,  36],
       [ 22, 121]])

## XAI

In [185]:
X_merged_amusement.shape

(2091, 1)

In [186]:
DecisionBoundaryDisplay.from_estimator(
    clf_neighbor_merged_amusement, 
    X_merged_amusement, 
    alpha=.4,
    response_method="predict"
)
plt.show()

IndexError: index 1 is out of bounds for axis 1 with size 1