In [7]:
# Importing modules

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import svm

sns.set()

In [28]:
# importing data

data = pd.read_csv('../data/labeled_data.csv')
dataTrain, dataTest = train_test_split(data, train_size=0.8)

In [29]:
dataTrain

Unnamed: 0,Nervousness_Label,Unable to control_Label,Worry_Label,Trouble in Relaxation_Label,Restlessness_Label,Irritability_Label,Fear_Label,Stress_Label
15424,0,0,3,2,2,0,2,1
16109,2,2,2,2,2,2,3,2
5671,1,2,3,1,0,2,3,2
3736,0,2,1,3,1,0,1,1
14654,1,1,0,2,3,0,0,1
...,...,...,...,...,...,...,...,...
3897,3,2,0,3,1,1,3,2
8737,2,2,2,3,3,2,1,2
12815,1,3,1,1,3,3,1,2
4579,3,0,2,1,3,2,0,2


In [30]:
dataTest

Unnamed: 0,Nervousness_Label,Unable to control_Label,Worry_Label,Trouble in Relaxation_Label,Restlessness_Label,Irritability_Label,Fear_Label,Stress_Label
15512,1,2,2,1,3,0,3,2
5426,0,1,1,2,1,0,2,1
13098,0,3,3,3,2,2,1,2
4486,0,0,0,1,1,3,2,1
1321,3,2,0,3,0,1,2,2
...,...,...,...,...,...,...,...,...
9576,0,1,3,1,3,3,3,2
1022,0,3,0,3,3,3,1,2
6178,2,2,1,1,2,1,2,2
11001,2,1,3,1,3,0,0,1


## Function to score each model

In [31]:
def scoreModel(clf):
    return clf.score(dataTest.drop(['Stress_Label'], axis=1), dataTest['Stress_Label'])

## Using SVM Linear

SVM Linear is the fastest SVM model but may have the lowest level of accuracy.

In [32]:
clf = make_pipeline(StandardScaler(), svm.LinearSVC(random_state=0, tol=1e-5))
clf.fit(dataTrain.drop(['Stress_Label'], axis=1), dataTrain['Stress_Label'])

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])

In [33]:
scoreModel(clf)

0.9591089411046689

## Training other models

These models take longer to train as they have a higher time complexity.

In [34]:
### SVM Training Constant

C = 1.0

In [35]:
# SVM Linear Kernel

clfLIN = make_pipeline(StandardScaler(), svm.SVC(kernel="linear", C=C))
clfLIN.fit(dataTrain.drop(['Stress_Label'], axis=1), dataTrain['Stress_Label'])

clfRGF = make_pipeline(StandardScaler(), svm.SVC(kernel="rbf", gamma='auto', C=C))
clfRGF.fit(dataTrain.drop(['Stress_Label'], axis=1), dataTrain['Stress_Label'])

clfPOLY = make_pipeline(StandardScaler(), svm.SVC(kernel="poly", degree=4, gamma='auto', C=C))
clfPOLY.fit(dataTrain.drop(['Stress_Label'], axis=1), dataTrain['Stress_Label'])

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(degree=4, gamma='auto', kernel='poly'))])

In [36]:
### Determining Score for SVM models

scores = {
    'clfLIN': scoreModel(clfLIN),
    'clfRGF': scoreModel(clfRGF),
    'clfPOLY': scoreModel(clfPOLY),
}

for name, val in scores.items():
    print(f"{name}: {val}")

clfLIN: 1.0
clfRGF: 1.0
clfPOLY: 0.4342386328959414
