In [7]:
# Importing modules

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import svm

sns.set()

In [8]:
# importing data

data = pd.read_csv('../data/labeled_data.csv')
dataTrain, dataTest = train_test_split(data, train_size=0.8)

In [9]:
dataTrain

Unnamed: 0.1,Unnamed: 0,Nervousness_Label,Unable to control_Label,Worry_Label,Trouble in Relaxation_Label,Restlessness_Label,Irritability_Label,Fear_Label,Stress_Label
13503,13503,2,0,3,1,2,3,1,2
12929,12929,2,1,0,3,2,3,2,2
9555,9555,2,1,1,1,2,3,3,2
8901,8901,0,3,0,0,0,3,0,1
9303,9303,0,2,2,0,2,2,0,1
...,...,...,...,...,...,...,...,...,...
5773,5773,1,2,2,1,0,3,0,1
5428,5428,2,2,0,3,0,3,3,2
6948,6948,0,2,0,0,1,2,2,1
14590,14590,1,2,1,3,1,1,3,2


In [10]:
dataTest

Unnamed: 0.1,Unnamed: 0,Nervousness_Label,Unable to control_Label,Worry_Label,Trouble in Relaxation_Label,Restlessness_Label,Irritability_Label,Fear_Label,Stress_Label
15425,15425,0,1,1,3,2,0,3,1
8663,8663,2,2,1,0,3,2,0,1
3801,3801,3,0,0,2,3,3,3,2
12790,12790,2,3,3,0,1,0,1,1
7374,7374,2,0,2,3,0,3,2,2
...,...,...,...,...,...,...,...,...,...
1762,1762,2,3,2,1,2,2,2,2
7712,7712,2,2,2,0,2,0,3,2
10832,10832,3,1,3,1,0,3,3,2
1476,1476,0,0,0,2,0,1,1,0


## Function to score each model

In [19]:
def scoreModel(clf):
    return clf.score(dataTrain.drop(['Stress_Label'], axis=1), dataTrain['Stress_Label'])

## Using SVM Linear

SVM Linear is the fastest SVM model but may have the lowest level of accuracy.

In [11]:
clf = make_pipeline(StandardScaler(), svm.LinearSVC(random_state=0, tol=1e-5))
clf.fit(dataTrain.drop(['Stress_Label'], axis=1), dataTrain['Stress_Label'])

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])

In [20]:
scoreModel(clf)

0.9613946746013581

## Training other models

These models take longer to train as they have a higher time complexity.

In [15]:
### SVM Training Constant

C = 1.0

In [24]:
# SVM Linear Kernel

clfLIN = make_pipeline(StandardScaler(), svm.SVC(kernel="linear", C=C))
clfLIN.fit(dataTrain.drop(['Stress_Label'], axis=1), dataTrain['Stress_Label'])

clfRGF = make_pipeline(StandardScaler(), svm.SVC(kernel="rbf", gamma='auto', C=C))
clfRGF.fit(dataTrain.drop(['Stress_Label'], axis=1), dataTrain['Stress_Label'])

clfPOLY = make_pipeline(StandardScaler(), svm.SVC(kernel="poly", degree=4, gamma='auto', C=C))
clfPOLY.fit(dataTrain.drop(['Stress_Label'], axis=1), dataTrain['Stress_Label'])

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(degree=4, gamma='auto', kernel='poly'))])

In [25]:
### Determining Score for SVM models


scores = {
    'clfLIN': scoreModel(clfLIN),
    'clfRGF': scoreModel(clfRGF),
    'clfPOLY': scoreModel(clfPOLY),
}

for name, val in scores.items():
    print(f"{name}: {val}")

clfLIN: 1.0
clfRGF: 0.9999237048905165
clfPOLY: 0.5454337376974135
