In [59]:
import pandas as pd

In [60]:
data = pd.read_csv('emo.csv')
data.head()

Unnamed: 0,HeartRate,SkinConductance,EEG,Temperature,PupilDiameter,SmileIntensity,FrownIntensity,CortisolLevel,ActivityLevel,AmbientNoiseLevel,LightingLevel,EmotionalState,CognitiveState,EngagementLevel
0,61,8.937204,11.794946,36.501723,3.330181,0.689238,0.189024,0.603035,136,59,394,engaged,distracted,3
1,60,12.635397,19.151412,36.61891,3.428995,0.561056,0.091367,0.566671,155,39,479,engaged,distracted,1
2,81,3.660028,6.226098,36.176898,2.819286,0.417951,0.227355,1.422475,55,30,832,partially engaged,focused,3
3,119,0.56307,4.542968,37.205293,2.192961,0.140186,0.502965,1.669045,39,40,602,disengaged,focused,3
4,118,0.477378,0.996209,37.248118,2.450139,0.064471,0.695604,1.854076,10,42,908,disengaged,focused,3


In [61]:
# Check if there are any missing values
print(data.isnull().sum())

HeartRate            0
SkinConductance      0
EEG                  0
Temperature          0
PupilDiameter        0
SmileIntensity       0
FrownIntensity       0
CortisolLevel        0
ActivityLevel        0
AmbientNoiseLevel    0
LightingLevel        0
EmotionalState       0
CognitiveState       0
EngagementLevel      0
dtype: int64


In [62]:
# Check how many and which columns are categorical 
data.dtypes

HeartRate              int64
SkinConductance      float64
EEG                  float64
Temperature          float64
PupilDiameter        float64
SmileIntensity       float64
FrownIntensity       float64
CortisolLevel        float64
ActivityLevel          int64
AmbientNoiseLevel      int64
LightingLevel          int64
EmotionalState        object
CognitiveState        object
EngagementLevel        int64
dtype: object

In [63]:
# Check the unique variables in the Cognitive State column
data['CognitiveState'].unique()

array(['distracted', 'focused'], dtype=object)

In [64]:
# Check the unique variables in the Emotional State column
data['EmotionalState'].unique()

array(['engaged', 'partially engaged', 'disengaged'], dtype=object)

In [65]:
# Do one-hot encoding on the categorical columns which are "EmotionalState" and "CognitiveState"
from sklearn.preprocessing import OneHotEncoder

categorical_columns = data.select_dtypes(include = ['object']).columns
encoder = OneHotEncoder(sparse_output=False)
one_hot_encoded = encoder.fit_transform(data[categorical_columns])
data_encoded = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(categorical_columns))
data= pd.concat([data.drop(categorical_columns, axis=1), data_encoded], axis=1)
data

Unnamed: 0,HeartRate,SkinConductance,EEG,Temperature,PupilDiameter,SmileIntensity,FrownIntensity,CortisolLevel,ActivityLevel,AmbientNoiseLevel,LightingLevel,EngagementLevel,EmotionalState_disengaged,EmotionalState_engaged,EmotionalState_partially engaged,CognitiveState_distracted,CognitiveState_focused
0,61,8.937204,11.794946,36.501723,3.330181,0.689238,0.189024,0.603035,136,59,394,3,0.0,1.0,0.0,1.0,0.0
1,60,12.635397,19.151412,36.618910,3.428995,0.561056,0.091367,0.566671,155,39,479,1,0.0,1.0,0.0,1.0,0.0
2,81,3.660028,6.226098,36.176898,2.819286,0.417951,0.227355,1.422475,55,30,832,3,0.0,0.0,1.0,0.0,1.0
3,119,0.563070,4.542968,37.205293,2.192961,0.140186,0.502965,1.669045,39,40,602,3,1.0,0.0,0.0,0.0,1.0
4,118,0.477378,0.996209,37.248118,2.450139,0.064471,0.695604,1.854076,10,42,908,3,1.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,98,3.897648,7.681519,36.274526,2.624275,0.404309,0.204719,1.215872,65,50,913,2,0.0,0.0,1.0,1.0,0.0
996,109,0.439062,0.352790,37.173929,2.489483,0.070776,0.638161,1.826544,23,43,642,2,1.0,0.0,0.0,1.0,0.0
997,108,1.077287,1.836462,37.073454,2.370298,0.011001,0.595518,1.781096,8,43,620,2,1.0,0.0,0.0,1.0,0.0
998,76,14.260010,19.309704,36.708047,3.393744,0.653693,0.171151,0.783958,110,38,779,1,0.0,1.0,0.0,1.0,0.0


In [66]:
#Double check to see all columns are now numerical
data.dtypes

HeartRate                             int64
SkinConductance                     float64
EEG                                 float64
Temperature                         float64
PupilDiameter                       float64
SmileIntensity                      float64
FrownIntensity                      float64
CortisolLevel                       float64
ActivityLevel                         int64
AmbientNoiseLevel                     int64
LightingLevel                         int64
EngagementLevel                       int64
EmotionalState_disengaged           float64
EmotionalState_engaged              float64
EmotionalState_partially engaged    float64
CognitiveState_distracted           float64
CognitiveState_focused              float64
dtype: object

In [67]:
import pandas as pd
import plotly.express as px

correlation_matrix = data.corr()


fig = px.imshow(
    correlation_matrix,
    text_auto=True,
    color_continuous_scale='Viridis',
    title="Correlation Matrix Heatmap"
)

fig.update_layout(
    coloraxis_colorbar=dict(
        title="Correlation",  
        thickness=15,  
        len=0.7,  
        x=1.05,  
        y=0.5,  
    ),
    width=800,
    height=800,
    margin=dict(l=20, r=20, t=50, b=20),
)

fig.show()


In [68]:
# Split the data with Engagement Level as the target variable
from sklearn.model_selection import train_test_split

X = data.drop('EngagementLevel', axis=1) 
y = data['EngagementLevel'] 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)


In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [70]:
# Random Forest model
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

rf = RandomForestClassifier(random_state=2)
rf.fit(X_train, y_train)
rf_y_pred = rf.predict(X_test)

rf_accuracy = accuracy_score(y_test, rf_y_pred)
print(f"Random Forest Model Accuracy: {rf_accuracy*100:.2f}%\n")
print(classification_report(y_test, rf_y_pred))

Random Forest Model Accuracy: 100.00%

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        20
           2       1.00      1.00      1.00        61
           3       1.00      1.00      1.00       119

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



In [71]:
# Gradient Boosting Classifer Model
from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier(random_state=42)
gb.fit(X_train, y_train)
gb_y_pred = gb.predict(X_test)

gb_accuracy = accuracy_score(y_test, gb_y_pred)
print(f"Gradient Boosting Model Accuracy: {gb_accuracy*100:.2f}%\n")
print(classification_report(y_test, gb_y_pred))

Gradient Boosting Model Accuracy: 100.00%

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        20
           2       1.00      1.00      1.00        61
           3       1.00      1.00      1.00       119

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



In [72]:
#Decision Tree Classifier Model
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
dt_y_pred = dt.predict(X_test)

dt_accuracy = accuracy_score(y_test, dt_y_pred)
print(f"Decision Tree Model Accuracy: {dt_accuracy*100:.2f}%\n")
print(classification_report(y_test, dt_y_pred))

Decision Tree Model Accuracy: 100.00%

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        20
           2       1.00      1.00      1.00        61
           3       1.00      1.00      1.00       119

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



In [73]:
#k-NN Model
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
knn_y_pred = knn.predict(X_test)

# Evaluate performance
knn_accuracy = accuracy_score(y_test, knn_y_pred)
print(f"k-NN Model Accuracy: {knn_accuracy*100:.2f}%\n")
print(classification_report(y_test, knn_y_pred))


k-NN Model Accuracy: 83.50%

              precision    recall  f1-score   support

           1       0.65      0.55      0.59        20
           2       0.88      0.82      0.85        61
           3       0.84      0.89      0.87       119

    accuracy                           0.83       200
   macro avg       0.79      0.75      0.77       200
weighted avg       0.83      0.83      0.83       200



In [74]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

dt_accuracy = f"{dt_accuracy*100:.2f}"
rf_accuracy = f"{rf_accuracy*100:.2f}"

dt_recall = f"{recall_score(y_test,dt_y_pred,average='weighted')*100:.2f}"
rf_recall = f"{recall_score(y_test,rf_y_pred,average='weighted')*100:.2f}"

dt_precision = f"{precision_score(y_test,dt_y_pred, average='weighted')*100:.2f}"
rf_precision = f"{precision_score(y_test,rf_y_pred, average='weighted')*100:.2f}"

dt_f1 = f"{f1_score(y_test, dt_y_pred, average='weighted')*100:.2f}"
rf_f1 = f"{f1_score(y_test, rf_y_pred, average = 'weighted')*100:.2f}"

df = {'Model':['Decision Tree','Random Forest'],
      'Accuracy Score': [dt_accuracy,rf_accuracy],
      'Recall Score':[dt_recall, rf_recall],
      'Precision Score': [dt_precision,rf_precision],
      'F1 Score':[dt_f1,rf_f1]}

comparison = pd.DataFrame(df)
comparison

Unnamed: 0,Model,Accuracy Score,Recall Score,Precision Score,F1 Score
0,Decision Tree,100.0,100.0,100.0,100.0
1,Random Forest,100.0,100.0,100.0,100.0
