In [113]:
import numpy as np 
import pandas as pd
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler

In [114]:
df = pd.read_csv("./data/stress_class.csv")
df["Stress_Level"] = (df["Stress_Level"]).map({
    "Low" : 1,
    "Moderate" : 0,
    "High" : -1
})
df.head()

Unnamed: 0,Study_Hours_Per_Day,Sleep_Hours_Per_Day,GPA,Stress_Level
0,6.9,8.7,2.99,0
1,5.3,8.0,2.75,1
2,5.1,9.2,2.67,1
3,6.5,7.2,2.88,0
4,8.1,6.5,3.51,-1


In [115]:
train , test = np.split(df.sample(frac=1), [int(0.7*len(df))])

  return bound(*args, **kwds)


In [116]:
train.head()

Unnamed: 0,Study_Hours_Per_Day,Sleep_Hours_Per_Day,GPA,Stress_Level
174,8.1,7.8,3.26,-1
1150,9.3,5.5,3.29,-1
1651,5.3,5.7,2.79,-1
1860,6.2,6.0,2.88,0
1730,6.1,7.2,3.29,0


In [117]:
test.head()

Unnamed: 0,Study_Hours_Per_Day,Sleep_Hours_Per_Day,GPA,Stress_Level
1933,9.3,9.4,3.69,-1
2,5.1,9.2,2.67,1
783,7.4,6.7,3.36,0
1220,7.5,7.2,3.27,0
326,5.3,5.3,2.67,-1


In [118]:
def scale_dataset(dataframe, oversample=False):
  X = dataframe[dataframe.columns[:-1]].values
  y = dataframe[dataframe.columns[-1]].values

  scaler = StandardScaler()
  X = scaler.fit_transform(X)

  if oversample:
    ros = RandomOverSampler()
    X, y = ros.fit_resample(X, y)

  return X, y

In [119]:
X_train, y_train = scale_dataset(train, oversample=True)
X_test, y_test = scale_dataset(test)

In [120]:
from sklearn.metrics import classification_report

In [121]:
from sklearn.linear_model import  LogisticRegression
log_reg = LogisticRegression()

log_reg.fit(X_train, y_train)

In [122]:
y_pred = log_reg.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

          -1       0.89      0.79      0.84       323
           0       0.74      0.78      0.76       187
           1       0.75      0.99      0.86        90

    accuracy                           0.82       600
   macro avg       0.80      0.85      0.82       600
weighted avg       0.83      0.82      0.82       600



In [123]:
from sklearn.naive_bayes import  GaussianNB
gauss_nb = GaussianNB()

gauss_nb.fit(X_train, y_train)

In [124]:
y_pred = gauss_nb.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

          -1       0.94      0.86      0.90       323
           0       0.87      0.84      0.86       187
           1       0.73      1.00      0.84        90

    accuracy                           0.88       600
   macro avg       0.85      0.90      0.87       600
weighted avg       0.89      0.88      0.88       600



In [125]:
from sklearn.svm import SVC
svm = SVC()

svm.fit(X_train, y_train)

In [126]:
y_pred = svm.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

          -1       0.98      0.95      0.97       323
           0       0.93      0.93      0.93       187
           1       0.88      1.00      0.94        90

    accuracy                           0.95       600
   macro avg       0.93      0.96      0.94       600
weighted avg       0.95      0.95      0.95       600



In [127]:
import pickle

In [128]:
with open("stress_calculator.pkl", "wb") as file:
    pickle.dump(svm, file)

In [129]:
import joblib
import skl2onnx
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

In [130]:
model = joblib.load("stress_calculator.pkl")

In [131]:
initial_type = [('float_input', FloatTensorType([None, 3]))]

In [132]:
onnx_model = convert_sklearn(model, initial_types=initial_type)
with open("model.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

In [142]:
svm.predict(([[4, 0, 0]]))

array([-1])

In [137]:
log_reg.predict(([[7.4, 8.2, 3.4]]))

array([-1])

In [144]:
gauss_nb.predict(([[7400, 90, 2.98]]))

array([-1])