In [1]:
import pandas as pd
import numpy as np
import pickle
import scipy

In [2]:
df = pd.read_csv('Stress.csv')

In [3]:
df.sample(5)

Unnamed: 0,sr,rr,t,lm,bo,rem,sr.1,hr,sl
77,56.48,19.296,95.296,9.296,93.944,83.24,6.296,58.24,1
267,84.8,23.28,90.64,13.6,88.64,96.6,0.64,68.2,3
452,52.48,18.496,94.496,8.496,92.744,81.24,5.496,56.24,1
434,84.08,23.088,90.544,13.36,88.544,96.36,0.544,67.72,3
365,49.24,17.696,98.544,7.392,96.696,76.96,8.696,54.24,0


In [4]:
df.rename(columns = {'sr':'snoring rate', 'rr':'respiration rate',
                        't':'body temperature', 'lm':'limb movement', 
                        'bo':'blood oxygen', 'rem':'eye movement', 
                        'sr.1':'sleeping hours','hr':'heart rate', 
                        'sl':'stress level'}, inplace = True)


df.head()

Unnamed: 0,snoring rate,respiration rate,body temperature,limb movement,blood oxygen,eye movement,sleeping hours,heart rate,stress level
0,93.8,25.68,91.84,16.6,89.84,99.6,1.84,74.2,3
1,91.64,25.104,91.552,15.88,89.552,98.88,1.552,72.76,3
2,60.0,20.0,96.0,10.0,95.0,85.0,7.0,60.0,1
3,85.76,23.536,90.768,13.92,88.768,96.92,0.768,68.84,3
4,48.12,17.248,97.872,6.496,96.248,72.48,8.248,53.12,0


In [5]:
df.drop(['snoring rate','limb movement','eye movement','blood oxygen'],
        axis=1,inplace=True)

In [6]:
df

Unnamed: 0,respiration rate,body temperature,sleeping hours,heart rate,stress level
0,25.680,91.840,1.840,74.20,3
1,25.104,91.552,1.552,72.76,3
2,20.000,96.000,7.000,60.00,1
3,23.536,90.768,0.768,68.84,3
4,17.248,97.872,8.248,53.12,0
...,...,...,...,...,...
625,20.960,92.960,3.440,62.40,2
626,17.376,98.064,8.376,53.44,0
627,27.504,86.880,0.000,78.76,4
628,19.728,95.728,6.728,59.32,1


In [7]:
df.isna().sum().sort_values()

respiration rate    0
body temperature    0
sleeping hours      0
heart rate          0
stress level        0
dtype: int64

In [8]:
#Importing library
from sklearn.preprocessing import MinMaxScaler

#Defining variable
scaler = MinMaxScaler()

#transform data
scaled = scaler.fit_transform(df[[ 'respiration rate', 'body temperature',
        'sleeping hours','heart rate']])
print(scaled)


[[0.69142857 0.48857143 0.20444444 0.69142857]
 [0.65028571 0.468      0.17244444 0.65028571]
 [0.28571429 0.78571429 0.77777778 0.28571429]
 ...
 [0.82171429 0.13428571 0.         0.82171429]
 [0.26628571 0.76628571 0.74755556 0.26628571]
 [0.38514286 0.59942857 0.45422222 0.38514286]]


In [9]:
newdf = pd.DataFrame(scaled, columns =[ 'respiration rate', 'body temperature',
 'sleeping hours', 'heart rate'])

In [10]:
newdf['stress level'] = df['stress level']

In [11]:
from sklearn.model_selection import train_test_split as tts
from sklearn import metrics
from sklearn.linear_model import LogisticRegression

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score as cvs

In [12]:
#splitting among features and target
X = newdf[['respiration rate', 'body temperature', 
       'sleeping hours', 'heart rate']]
y = newdf['stress level']
#splitting among test and train dataset
x_train, x_test, y_train, y_test= tts(X, y, test_size=0.4)
print('Dimensions of train dataset:',x_train.shape)
print('Dimensions of test dataset:',x_test.shape)

#defining dictionaries for storing results of different models and comparing 
sc = {}
rn = {}

Dimensions of train dataset: (378, 4)
Dimensions of test dataset: (252, 4)


In [13]:
model=LogisticRegression()
model.fit(x_train,y_train)
model_pred=model.predict(x_test)
print('accuracy_score:',metrics.accuracy_score(y_test, model_pred))

r=cvs(model, X, y, cv=10, scoring='accuracy').mean()
sc['Logistic Regression']=r
rn['Logistic Regression']=np.array(np.unique(model_pred, return_counts=True))
print('cross val score:',r)

accuracy_score: 1.0
cross val score: 1.0


In [14]:
x1 = np.array([[0.819429,0.131429,0.000000,0.819429]])


In [26]:
prediction=model.predict(x1)

print(prediction[0])

4




In [16]:
x_test

Unnamed: 0,respiration rate,body temperature,sleeping hours,heart rate
368,0.270857,0.770857,0.754667,0.270857
618,0.097143,0.931429,0.928889,0.097143
194,0.878857,0.205714,0.000000,0.878857
1,0.650286,0.468000,0.172444,0.650286
424,0.100571,0.936571,0.934222,0.100571
...,...,...,...,...
199,0.595429,0.440571,0.129778,0.595429
20,0.411429,0.625714,0.515556,0.411429
31,0.732571,0.022857,0.000000,0.732571
348,0.664000,0.474857,0.183111,0.664000


In [17]:
model.predict(x_test)

array([1, 0, 4, 3, 0, 2, 2, 1, 1, 2, 2, 4, 0, 4, 3, 1, 3, 0, 4, 1, 1, 4,
       2, 3, 4, 4, 3, 3, 2, 1, 3, 0, 2, 0, 0, 4, 4, 4, 2, 1, 1, 1, 1, 1,
       1, 4, 4, 4, 0, 1, 0, 3, 3, 4, 1, 2, 1, 3, 3, 3, 4, 2, 3, 1, 2, 2,
       4, 1, 2, 2, 1, 4, 2, 2, 1, 4, 0, 3, 0, 2, 0, 4, 2, 3, 4, 4, 0, 4,
       1, 0, 2, 3, 2, 4, 1, 4, 1, 1, 1, 3, 3, 3, 3, 4, 0, 2, 1, 0, 0, 3,
       4, 1, 0, 1, 1, 0, 1, 4, 4, 2, 2, 2, 3, 1, 1, 3, 2, 3, 1, 0, 3, 4,
       4, 4, 0, 3, 4, 0, 4, 0, 0, 0, 0, 2, 0, 1, 2, 4, 1, 2, 3, 3, 2, 3,
       0, 3, 1, 4, 3, 3, 0, 2, 4, 4, 0, 3, 0, 4, 4, 3, 3, 1, 1, 2, 3, 1,
       4, 0, 3, 3, 0, 1, 0, 3, 0, 4, 4, 1, 0, 0, 2, 2, 2, 2, 3, 2, 0, 3,
       2, 0, 0, 1, 1, 1, 1, 1, 4, 1, 0, 1, 0, 2, 2, 1, 3, 1, 1, 0, 0, 1,
       2, 3, 3, 4, 2, 1, 3, 2, 0, 3, 0, 3, 2, 4, 2, 2, 3, 1, 1, 2, 2, 3,
       3, 2, 0, 2, 0, 3, 2, 4, 3, 3], dtype=int64)

In [18]:
with open('model.pkl','wb') as files:
    pickle.dump(model,files)