In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [4]:
df = pd.read_csv("../data/final_training_data.csv")

In [5]:
df.head()

Unnamed: 0,HRR_Mean,HRR_Min,HRR_Max,HRR_Std,HRR_RMS,Stress
0,0.641552,0.639566,0.643243,0.100525,0.235069,0.0
1,0.642973,0.639566,0.643243,0.08927,0.229293,0.0
2,0.643921,0.640921,0.645946,0.083372,0.157093,0.0
3,0.645952,0.644986,0.645946,0.041375,0.131024,0.0
4,0.646764,0.644986,0.648649,0.066093,0.11347,0.0


In [6]:
df['Stress'].unique()

array([0., 1., 2.])

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12445 entries, 0 to 12444
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   HRR_Mean  12445 non-null  float64
 1   HRR_Min   12445 non-null  float64
 2   HRR_Max   12445 non-null  float64
 3   HRR_Std   12445 non-null  float64
 4   HRR_RMS   12445 non-null  float64
 5   Stress    12445 non-null  float64
dtypes: float64(6)
memory usage: 583.5 KB


In [8]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
HRR_Mean,12445.0,0.53133,0.236169,0.0,0.328595,0.573044,0.740726,1.0
HRR_Min,12445.0,0.529921,0.236633,0.0,0.323848,0.571816,0.739837,1.0
HRR_Max,12445.0,0.532379,0.235474,0.0,0.32973,0.575676,0.740541,1.0
HRR_Std,12445.0,0.058898,0.030202,0.0,0.043969,0.054869,0.068181,1.0
HRR_RMS,12445.0,0.137165,0.048432,0.0,0.103584,0.131024,0.160471,1.0
Stress,12445.0,0.949779,0.903741,0.0,0.0,1.0,2.0,2.0


In [9]:
df.shape

(12445, 6)

In [10]:
X = df.drop(['Stress'],axis=1)
y = df['Stress']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20, random_state=42)

In [12]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((9956, 5), (9956,), (2489, 5), (2489,))

In [13]:
model = RandomForestClassifier()
model.fit(X_train, y_train)

In [14]:
y_pred = model.predict(X_test)

In [37]:
pd.Series(y_pred).unique()

array([0., 1., 2.])

In [38]:
y_pred

array([0., 1., 2., ..., 2., 0., 0.])

In [15]:
accuracy = accuracy_score(y_pred=y_pred, y_true=y_test)

In [16]:
accuracy

0.5335476094817195

In [22]:
import pickle
with open(file="../models/hr_rf_model.pkl",mode="wb") as f:
    pickle.dump(model,f)

In [23]:
import pickle
with open("../models/hr_rf_model.pkl","rb") as f:
    model = pickle.load(f)

In [24]:
new_data = [88.50,117.00,108.50,97.20,92.17,89.14,87.38,86.44,84.80,83.36,81.92,80.54,79.36]

In [25]:
import numpy as np
from scipy.stats import kurtosis, skew
def statistical_features(arr):
    vmin = np.amin(arr)
    vmax = np.amax(arr)
    mean = np.mean(arr)
    std = np.std(arr)
    return vmin, vmax, mean, std

def shape_features(arr):
    skewness = skew(arr)
    kurt = kurtosis(arr)
    return skewness, kurt

In [26]:
hr_min, hr_max, hr_mean, hr_std = statistical_features(new_data)

In [27]:
hr_rms = np.sqrt(np.mean(np.square(np.ediff1d(new_data))))

In [29]:
data = {}
data['HRR_Min'], data['HRR_Max'],data['HRR_Mean'],data['HRR_Std'] = statistical_features(new_data)
data['HRR_RMS'] = np.sqrt(np.mean(np.square(np.ediff1d(new_data))))

In [30]:
data

{'HRR_Min': 79.36,
 'HRR_Max': 117.0,
 'HRR_Mean': 90.48538461538459,
 'HRR_Std': 10.692847800415016,
 'HRR_RMS': 9.402068034923662}

In [31]:
data = pd.DataFrame([data])

In [32]:
data

Unnamed: 0,HRR_Min,HRR_Max,HRR_Mean,HRR_Std,HRR_RMS
0,79.36,117.0,90.485385,10.692848,9.402068


In [33]:
from sklearn.preprocessing import MinMaxScaler
scalar = MinMaxScaler()
x_scaled = scalar.fit_transform(data)
data = pd.DataFrame(x_scaled)
data = data.fillna(0)

pred_t = model.predict(data)



In [34]:
pred_t

array([0.])

In [35]:
type(pred_t)

numpy.ndarray

In [36]:
pred_t[0]

0.0

In [None]:
/Users/harshpreetsingh/Downloads/WESAD/S4/S4_E4_Data/HR.csv