# Analysing and modeling

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

dataset = pd.read_csv("../api/data/emotional_monitoring_dataset_with_target.csv")
dataset["EngagementLevel"]= dataset["EngagementLevel"].map({1: "Disengaged", 2: "Moderately Engaged", 3: "Highly Engaged"})
dataset.head()

In [None]:
dataset.info()

In [None]:
dataset.isnull().sum()

# EDA

In [None]:
dataset.describe()

In [None]:
sns.pairplot(dataset)

## Engagement Level

In [None]:
sns.countplot(dataset, x = "EngagementLevel")
plt.title("Count of levels of categotical variable")

In [None]:
pd.crosstab(index = dataset["EngagementLevel"], columns = "count")

In [None]:
fig, axs = plt.subplots(4, 3, figsize  = (20, 24))

sns.boxplot(x="EngagementLevel", y="HeartRate", data=dataset, ax=axs[0, 0])

sns.boxplot(x="EngagementLevel", y="SkinConductance", data=dataset, ax=axs[0, 1])

sns.boxplot(x="EngagementLevel", y="EEG", data=dataset, ax=axs[0, 2])

sns.boxplot(x="EngagementLevel", y="Temperature", data=dataset, ax=axs[1, 0])

sns.boxplot(x="EngagementLevel", y="PupilDiameter", data=dataset, ax=axs[1, 1])

sns.boxplot(x="EngagementLevel", y="SmileIntensity", data=dataset, ax=axs[1, 2])

sns.boxplot(x="EngagementLevel", y="FrownIntensity", data=dataset, ax=axs[2, 0])

sns.boxplot(x="EngagementLevel", y="CortisolLevel", data=dataset, ax=axs[2, 1])

sns.boxplot(x="EngagementLevel", y="ActivityLevel", data=dataset, ax=axs[2, 2])

sns.boxplot(x="EngagementLevel", y="AmbientNoiseLevel", data=dataset, ax=axs[3, 0])

sns.boxplot(x="EngagementLevel", y="LightingLevel", data=dataset, ax=axs[3, 1])

plt.show()

## Emotional State

In [None]:
sns.countplot(dataset, x = "EmotionalState")
plt.title("Count of levels of categotical variable")

In [None]:
pd.crosstab(index = dataset["EmotionalState"], columns = "count")

In [None]:
fig, axs = plt.subplots(4, 3, figsize  = (20, 24))

sns.boxplot(x="EmotionalState", y="HeartRate", data=dataset, ax=axs[0, 0])

sns.boxplot(x="EmotionalState", y="SkinConductance", data=dataset, ax=axs[0, 1])

sns.boxplot(x="EmotionalState", y="EEG", data=dataset, ax=axs[0, 2])

sns.boxplot(x="EmotionalState", y="Temperature", data=dataset, ax=axs[1, 0])

sns.boxplot(x="EmotionalState", y="PupilDiameter", data=dataset, ax=axs[1, 1])

sns.boxplot(x="EmotionalState", y="SmileIntensity", data=dataset, ax=axs[1, 2])

sns.boxplot(x="EmotionalState", y="FrownIntensity", data=dataset, ax=axs[2, 0])

sns.boxplot(x="EmotionalState", y="CortisolLevel", data=dataset, ax=axs[2, 1])

sns.boxplot(x="EmotionalState", y="ActivityLevel", data=dataset, ax=axs[2, 2])

sns.boxplot(x="EmotionalState", y="AmbientNoiseLevel", data=dataset, ax=axs[3, 0])

sns.boxplot(x="EmotionalState", y="LightingLevel", data=dataset, ax=axs[3, 1])

plt.show()

## Cognitive State

In [None]:
sns.countplot(dataset, x = "CognitiveState")
plt.title("Count of levels of categotical variable")

In [None]:
pd.crosstab(index = dataset["CognitiveState"], columns = "count")

In [None]:
fig, axs = plt.subplots(4, 3, figsize  = (20, 24))

sns.boxplot(x="CognitiveState", y="HeartRate", data=dataset, ax=axs[0, 0])

sns.boxplot(x="CognitiveState", y="SkinConductance", data=dataset, ax=axs[0, 1])

sns.boxplot(x="CognitiveState", y="EEG", data=dataset, ax=axs[0, 2])

sns.boxplot(x="CognitiveState", y="Temperature", data=dataset, ax=axs[1, 0])

sns.boxplot(x="CognitiveState", y="PupilDiameter", data=dataset, ax=axs[1, 1])

sns.boxplot(x="CognitiveState", y="SmileIntensity", data=dataset, ax=axs[1, 2])

sns.boxplot(x="CognitiveState", y="FrownIntensity", data=dataset, ax=axs[2, 0])

sns.boxplot(x="CognitiveState", y="CortisolLevel", data=dataset, ax=axs[2, 1])

sns.boxplot(x="CognitiveState", y="ActivityLevel", data=dataset, ax=axs[2, 2])

sns.boxplot(x="CognitiveState", y="AmbientNoiseLevel", data=dataset, ax=axs[3, 0])

sns.boxplot(x="CognitiveState", y="LightingLevel", data=dataset, ax=axs[3, 1])

plt.show()

# Modeling Engagement Level

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

from sklearn.model_selection import train_test_split, GridSearchCV, LeaveOneOut
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

X = dataset.drop(["EmotionalState", "CognitiveState", "EngagementLevel"], axis = 1)
y = dataset["EngagementLevel"]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 777)

## K Neighbors

In [None]:
kneighbors_params = {
    "n_neighbors": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]   
}
kneighbors_grid = GridSearchCV(KNeighborsClassifier(), param_grid = kneighbors_params, scoring = "f1_macro", cv = LeaveOneOut())
kneighbors_grid.fit(X_train, y_train)

In [None]:
kneighbors_grid.best_params_

In [None]:
kneighbors_grid.best_score_

In [None]:
kneighbors = kneighbors_grid.best_estimator_

## SVM

In [None]:
svm_params = {
    "kernel": ["linear", "rbf", "poly"],
    "degree": [3, 4, 5]
}

svm_grid = GridSearchCV(SVC(), param_grid = svm_params, scoring = "f1_macro")
svm_grid.fit(X_train, y_train)

In [None]:
svm_grid.best_params_

In [None]:
svm_grid.best_score_

In [None]:
svm = svm_grid.best_estimator_

## Random Forest

In [None]:
forest_params = {
    "n_estimators": [25, 50, 100, 150],
    "max_depth": [None, 6, 8, 10],
    "min_impurity_decrease": [0, 0.1, 0.2, 0.3],
    "max_leaf_nodes": [None, 5, 10, 20]
}

forest_grid = GridSearchCV(RandomForestClassifier(min_samples_leaf = 3, random_state = 1234), param_grid = forest_params, scoring = "f1_macro")
forest_grid.fit(X_train, y_train)

In [None]:
forest_grid.best_params_

In [None]:
forest_grid.best_score_

In [None]:
random_forest = forest_grid.best_estimator_

## GradientBoost

In [None]:
boosting_params = {
    "n_estimators": [25, 50, 100, 150],
    "max_depth": [None, 6, 8, 10],
    "min_impurity_decrease": [0, 0.1, 0.2, 0.3],
    "max_leaf_nodes": [None, 5, 10, 20],
    "max_features": [None, "sqrt"]
}

boosting_grid = GridSearchCV(GradientBoostingClassifier(min_samples_leaf = 3, random_state = 1234), param_grid = boosting_params, scoring = "f1_macro")
boosting_grid.fit(X_train, y_train)

In [None]:
boosting_grid.best_params_

In [None]:
boosting_grid.best_score_

In [None]:
gradient_boosting = boosting_grid.best_estimator_

## Model Selection

In [None]:
kneighbors_f1_score = f1_score(y_test, kneighbors.predict(X_test), average = "macro")
svm_f1_score = f1_score(y_test, svm.predict(X_test), average = "macro")
random_f1_forest_score = f1_score(y_test, random_forest.predict(X_test), average = "macro")
gradient_f1_boosting_score = f1_score(y_test, gradient_boosting.predict(X_test), average = "macro")

In [None]:
kneighbors_precision_score = precision_score(y_test, kneighbors.predict(X_test), average = "macro")
svm_presicion_score = precision_score(y_test, svm.predict(X_test), average = "macro")
random_precision_forest_score = precision_score(y_test, random_forest.predict(X_test), average = "macro")
gradient_precision_boosting_score = precision_score(y_test, gradient_boosting.predict(X_test), average = "macro")

In [None]:
kneighbors_recall_score = recall_score(y_test, kneighbors.predict(X_test), average = "macro")
svm_recall_score = recall_score(y_test, svm.predict(X_test), average = "macro")
random_recall_forest_score = recall_score(y_test, random_forest.predict(X_test), average = "macro")
gradient_recall_boosting_score = recall_score(y_test, gradient_boosting.predict(X_test), average = "macro")

In [None]:
kneighbors_accuracy_score = accuracy_score(y_test, kneighbors.predict(X_test))
svm_accuracy_score = accuracy_score(y_test, svm.predict(X_test))
random_accuracy_forest_score = accuracy_score(y_test, random_forest.predict(X_test))
gradient_accuracy_boosting_score = accuracy_score(y_test, gradient_boosting.predict(X_test))

In [None]:
s = {
    "Accuracy": [kneighbors_accuracy_score, svm_accuracy_score, random_accuracy_forest_score, gradient_accuracy_boosting_score],
    "Precision": [kneighbors_precision_score, svm_presicion_score, random_precision_forest_score, gradient_precision_boosting_score],
    "Recall": [kneighbors_recall_score, svm_recall_score, random_recall_forest_score, gradient_recall_boosting_score],
    "F1": [kneighbors_f1_score, svm_f1_score, random_f1_forest_score, gradient_f1_boosting_score]
}
scores = pd.DataFrame(data = s, index = ["K nearest neighbors", "SVM", "Random Forest", "GradientBoost"])
scores

# Modeling Emotional State

In [None]:
X = dataset.drop(["EmotionalState", "CognitiveState", "EngagementLevel"], axis = 1)
y = dataset["EmotionalState"]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 777)

## Random Forest

In [None]:
forest_params = {
    "n_estimators": [25, 50, 100, 150],
    "max_depth": [None, 6, 8, 10],
    "min_impurity_decrease": [0, 0.1, 0.2, 0.3],
    "max_leaf_nodes": [None, 5, 10, 20]
}

forest_grid = GridSearchCV(RandomForestClassifier(min_samples_leaf = 3, random_state = 1234), param_grid = forest_params, scoring = "f1_macro")
forest_grid.fit(X_train, y_train)

In [None]:
forest_grid.best_params_

In [None]:
forest_grid.best_score_

In [None]:
random_forest = forest_grid.best_estimator_

## GradientBoost

In [None]:
boosting_params = {
    "n_estimators": [25, 50, 100, 150],
    "max_depth": [None, 6, 8, 10],
    "min_impurity_decrease": [0, 0.1, 0.2, 0.3],
    "max_leaf_nodes": [None, 5, 10, 20],
    "max_features": [None, "sqrt"]
}

boosting_grid = GridSearchCV(GradientBoostingClassifier(min_samples_leaf = 3, random_state = 1234), param_grid = boosting_params, scoring = "f1_macro")
boosting_grid.fit(X_train, y_train)

In [None]:
boosting_grid.best_params_

In [None]:
boosting_grid.best_score_

In [None]:
gradient_boosting = boosting_grid.best_estimator_

## Model Selection

In [None]:
random_f1_forest_score = f1_score(y_test, random_forest.predict(X_test), average = "macro")
gradient_f1_boosting_score = f1_score(y_test, gradient_boosting.predict(X_test), average = "macro")

In [None]:
random_precision_forest_score = precision_score(y_test, random_forest.predict(X_test), average = "macro")
gradient_precision_boosting_score = precision_score(y_test, gradient_boosting.predict(X_test), average = "macro")

In [None]:
random_recall_forest_score = recall_score(y_test, random_forest.predict(X_test), average = "macro")
gradient_recall_boosting_score = recall_score(y_test, gradient_boosting.predict(X_test), average = "macro")

In [None]:
random_accuracy_forest_score = accuracy_score(y_test, random_forest.predict(X_test))
gradient_accuracy_boosting_score = accuracy_score(y_test, gradient_boosting.predict(X_test))

In [None]:
s = {
    "Accuracy": [random_accuracy_forest_score, gradient_accuracy_boosting_score],
    "Precision": [random_precision_forest_score, gradient_precision_boosting_score],
    "Recall": [random_recall_forest_score, gradient_recall_boosting_score],
    "F1": [random_f1_forest_score, gradient_f1_boosting_score]
}
scores = pd.DataFrame(data = s, index = ["Random Forest", "GradientBoost"])
scores

# Modeling Cognitive State

In [None]:
X = dataset.drop(["EmotionalState", "CognitiveState", "EngagementLevel"], axis = 1)
y = dataset["CognitiveState"]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 777)

## K Neighbors

In [None]:
kneighbors_params = {
    "n_neighbors": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]   
}
kneighbors_grid = GridSearchCV(KNeighborsClassifier(), param_grid = kneighbors_params, scoring = "f1_macro", cv = LeaveOneOut())
kneighbors_grid.fit(X_train, y_train)

In [None]:
kneighbors_grid.best_params_

In [None]:
kneighbors_grid.best_score_

In [None]:
kneighbors = kneighbors_grid.best_estimator_

## SVM

In [None]:
svm_params = {
    "kernel": ["linear", "rbf", "poly"],
    "degree": [3, 4, 5]
}

svm_grid = GridSearchCV(SVC(), param_grid = svm_params, scoring = "f1_macro")
svm_grid.fit(X_train, y_train)

In [None]:
svm_grid.best_params_

In [None]:
svm_grid.best_score_

In [None]:
svm = svm_grid.best_estimator_

## Random Forest

In [None]:
forest_params = {
    "n_estimators": [25, 50, 100, 150],
    "max_depth": [None, 6, 8, 10],
    "min_impurity_decrease": [0, 0.1, 0.2, 0.3],
    "max_leaf_nodes": [None, 5, 10, 20]
}

forest_grid = GridSearchCV(RandomForestClassifier(min_samples_leaf = 3, random_state = 1234), param_grid = forest_params, scoring = "f1_macro")
forest_grid.fit(X_train, y_train)

In [None]:
forest_grid.best_params_

In [None]:
forest_grid.best_score_

In [None]:
random_forest = forest_grid.best_estimator_

## GradientBoost

In [None]:
boosting_params = {
    "n_estimators": [25, 50, 100, 150],
    "max_depth": [None, 6, 8, 10],
    "min_impurity_decrease": [0, 0.1, 0.2, 0.3],
    "max_leaf_nodes": [None, 5, 10, 20],
    "max_features": [None, "sqrt"]
}

boosting_grid = GridSearchCV(GradientBoostingClassifier(min_samples_leaf = 3, random_state = 1234), param_grid = boosting_params, scoring = "f1_macro")
boosting_grid.fit(X_train, y_train)

In [None]:
boosting_grid.best_params_

In [None]:
boosting_grid.best_score_

In [None]:
gradient_boosting = boosting_grid.best_estimator_

## Model Selection

In [None]:
kneighbors_f1_score = f1_score(y_test, kneighbors.predict(X_test), average = "macro")
svm_f1_score = f1_score(y_test, svm.predict(X_test), average = "macro")
random_f1_forest_score = f1_score(y_test, random_forest.predict(X_test), average = "macro")
gradient_f1_boosting_score = f1_score(y_test, gradient_boosting.predict(X_test), average = "macro")

In [None]:
kneighbors_precision_score = precision_score(y_test, kneighbors.predict(X_test), average = "macro")
svm_presicion_score = precision_score(y_test, svm.predict(X_test), average = "macro")
random_precision_forest_score = precision_score(y_test, random_forest.predict(X_test), average = "macro")
gradient_precision_boosting_score = precision_score(y_test, gradient_boosting.predict(X_test), average = "macro")

In [None]:
kneighbors_recall_score = recall_score(y_test, kneighbors.predict(X_test), average = "macro")
svm_recall_score = recall_score(y_test, svm.predict(X_test), average = "macro")
random_recall_forest_score = recall_score(y_test, random_forest.predict(X_test), average = "macro")
gradient_recall_boosting_score = recall_score(y_test, gradient_boosting.predict(X_test), average = "macro")

In [None]:
kneighbors_accuracy_score = accuracy_score(y_test, kneighbors.predict(X_test))
svm_accuracy_score = accuracy_score(y_test, svm.predict(X_test))
random_accuracy_forest_score = accuracy_score(y_test, random_forest.predict(X_test))
gradient_accuracy_boosting_score = accuracy_score(y_test, gradient_boosting.predict(X_test))

In [None]:
s = {
    "Accuracy": [kneighbors_accuracy_score, svm_accuracy_score, random_accuracy_forest_score, gradient_accuracy_boosting_score],
    "Precision": [kneighbors_precision_score, svm_presicion_score, random_precision_forest_score, gradient_precision_boosting_score],
    "Recall": [kneighbors_recall_score, svm_recall_score, random_recall_forest_score, gradient_recall_boosting_score],
    "F1": [kneighbors_f1_score, svm_f1_score, random_f1_forest_score, gradient_f1_boosting_score]
}
scores = pd.DataFrame(data = s, index = ["K nearest neighbors", "SVM", "Random Forest", "GradientBoost"])
scores

From EDA we can also see that cognitive state is very random, so we can't model it. It is very probable that it doesn't have actual relationship.