In [0]:
import pandas as pd
import numpy as np
import os
import pickle
import sys
import math
from sklearn.metrics import confusion_matrix,classification_report
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from matplot

In [0]:
from google.colab import drive 
drive.mount ("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
os.chdir("/content/gdrive/My Drive/Recovery")

In [0]:
def normalize(df):
  a = df.drop(columns = ['0'])
  for i in a.columns.to_list():
    mean = df[i].mean()
    std = df[i].std()
    df[i] = (df[i]-mean)/std
  return df

def rfe(model,X_train,y_train):
  from sklearn.feature_selection import RFE
  selector = RFE(model,60,1,1)
  selector.fit(X_train,y_train)
  selector.ranking_

  y_p = selector.predict(X_test)
  print (confusion_matrix(y_test,y_p))

  print (classification_report(y_test,y_p,digits = 5))
  
def getsize(model):
  p = pickle.dumps(model)
  return sys.getsizeof(p)

In [0]:
def getfeatures(model,X_train,y_train):
  from sklearn.feature_selection import RFE
  model = DecisionTreeClassifier()
  selector = RFE(model,60,1)
  selector.fit(X_train,y_train)
  col = []
  for i in range(len(selector.ranking_)):
    if selector.ranking_[i] == 1:
      col.append(X_train.columns[i])
  return col
      
      
      

# Class selection :  
The preprocessed data from Datapreprocessing.ipynb results in contains 3 classes with 10 features. However, we consider only the FOG and Normal classes modelling the problem as a binary classification problem.


In [0]:
featurePath = os.getcwd()+"/dataset_fog_release/dataset/features"

windowLength = 1
fs = 64

readTime = featurePath +"/time_" +str(windowLength)+".csv"
readFreq = featurePath + "/freq_"+str(windowLength)+".csv"

timeDom = pd.read_csv(readTime)
freqDom = pd.read_csv(readFreq)

print (timeDom.shape,freqDom.shape)

df = pd.concat([timeDom,freqDom],axis = 1)

print (df['0'].value_counts())

df = df[df['0'] != 2]
print (df['0'].value_counts())

df = normalize(df)

(14093, 46) (14093, 45)
0    12148
1     1690
2      255
Name: 0, dtype: int64
0    12148
1     1690
Name: 0, dtype: int64


# Train-Test Split.
There exists a class imbalance between FOG and Normal classes. This is solved using SMOTE over sampling. Random Undersampling also works with considerable efficiency.


In [0]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from imblearn.over_sampling import SMOTE
X = df.drop(columns = ["0"])
y = df['0']

X_resampled, y_resampled = SMOTE().fit_resample(X,y)

print (X_resampled.shape,y_resampled.shape)

X_train,X_test,y_train,y_test = train_test_split(X_resampled,y_resampled, train_size = 0.7, stratify = y_resampled)

print (X_train.shape,X_test.shape,y_train.shape,y_test.shape)


(24296, 90) (24296,)
(17007, 90) (7289, 90) (17007,) (7289,)


# Standard ML Model Experiments:
The tests are conducted under various standard ML models and thier corresponding efficiencies and model sizes are reported.

#PrunedDecisiontrees

In [0]:
from sklearn.tree import DecisionTreeClassifier
  
# X_train = X_train[getfeatures(model,X_train,y_train)]
# X_test = X_test[getfeatures(model,X_train,y_train)]
for i in range(1,25,2): 
  model = DecisionTreeClassifier(max_depth=i)
  model.fit(X_train,y_train)
  y_pred = model.predict(X_test)

  # scores = cross_val_score(model,X_resampled,y_resampled,cv=10)
  # print("Accuracy: %0.5f (+/- %0.5f)" % (scores.mean(), scores.std() * 2))
  # print (confusion_matrix(y_test,y_pred))

  print (classification_report(y_test,y_pred,digits=5))
  print ("Model size is: ",getsize(model),"bytes")

              precision    recall  f1-score   support

           0    0.93329   0.40302   0.56294      3645
           1    0.61925   0.97119   0.75628      3644

    accuracy                        0.68706      7289
   macro avg    0.77627   0.68710   0.65961      7289
weighted avg    0.77629   0.68706   0.65960      7289

Model size is:  1488 bytes
              precision    recall  f1-score   support

           0    0.90966   0.63539   0.74818      3645
           1    0.71980   0.93688   0.81412      3644

    accuracy                        0.78612      7289
   macro avg    0.81473   0.78614   0.78115      7289
weighted avg    0.81474   0.78612   0.78115      7289

Model size is:  2355 bytes
              precision    recall  f1-score   support

           0    0.92479   0.77586   0.84380      3645
           1    0.80690   0.93688   0.86705      3644

    accuracy                        0.85636      7289
   macro avg    0.86584   0.85637   0.85542      7289
weighted avg    0.86

In [0]:
# rfe(model,X_train,y_train)
# print ("Model size is: ",getsize(model),"bytes")

# Random Forest


In [0]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import recall_score
for i in range(1,25,2):
  model = RandomForestClassifier(max_depth=i)
  model.fit(X_train,y_train)
  y_pred = model.predict(X_test)

# scores = cross_val_score(model,X_resampled,y_resampled,cv=10)
# print("Accuracy: %0.5f (+/- %0.5f)" % (scores.mean(), scores.std() * 2))
  # print (confusion_matrix(y_test,y_pred))
  print ("depth = %d",i)
  print (classification_report(y_test,y_pred,digits=5))
  # print (recall_score(y_test,y_pred))
  print ("Model size is: ",getsize(model),"bytes")

depth = %d 1
              precision    recall  f1-score   support

           0    0.88729   0.45569   0.60214      3645
           1    0.63375   0.94210   0.75775      3644

    accuracy                        0.69886      7289
   macro avg    0.76052   0.69889   0.67995      7289
weighted avg    0.76053   0.69886   0.67994      7289

Model size is:  7283 bytes
depth = %d 3
              precision    recall  f1-score   support

           0    0.88850   0.75638   0.81713      3645
           1    0.78786   0.90505   0.84240      3644

    accuracy                        0.83070      7289
   macro avg    0.83818   0.83071   0.82977      7289
weighted avg    0.83819   0.83070   0.82976      7289

Model size is:  15809 bytes
depth = %d 5
              precision    recall  f1-score   support

           0    0.93290   0.78189   0.85075      3645
           1    0.81223   0.94374   0.87306      3644

    accuracy                        0.86281      7289
   macro avg    0.87257   0.86282 

In [0]:
# rfe(model,X_train,y_train)


Model size is:  7283 bytes


#SVM - RBF

In [0]:
from sklearn.svm import SVC

# X_train = X_train[getfeatures(X_train,y_train)]
# X_test = X_test[getfeatures(X_train,y_train)]

model = SVC(kernel='rbf')
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
scores = cross_val_score(model,X_train,y_train,cv=10)
print("Accuracy: %0.5f (+/- %0.5f)" % (scores.mean(), scores.std() * 2))
print (confusion_matrix(y_test,y_pred))
print (classification_report(y_test,y_pred,digits=5))
print ("Model size is: ",getsize(model),"bytes")

# rfe(model)

Accuracy: 0.90998 (+/- 0.01520)
[[3243  401]
 [ 195 3450]]
              precision    recall  f1-score   support

           0    0.94328   0.88996   0.91584      3644
           1    0.89587   0.94650   0.92049      3645

    accuracy                        0.91823      7289
   macro avg    0.91958   0.91823   0.91817      7289
weighted avg    0.91957   0.91823   0.91817      7289

Model size is:  3973711 bytes


# KNN


In [0]:
from sklearn.neighbors import KNeighborsClassifier

# X_train = X_train[getfeatures(X_train,y_train)]
# X_test = X_test[getfeatures(X_train,y_train)]

model = KNeighborsClassifier()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

print (confusion_matrix(y_test,y_pred))

print (classification_report(y_test,y_pred,digits=5))

print ("Model size is: ",getsize(model),'bytes')
# rfe(model)

[[3137  507]
 [  12 3633]]
              precision    recall  f1-score   support

           0    0.99619   0.86087   0.92360      3644
           1    0.87754   0.99671   0.93333      3645

    accuracy                        0.92880      7289
   macro avg    0.93686   0.92879   0.92847      7289
weighted avg    0.93685   0.92880   0.92847      7289

Model size is:  14024367 bytes


# Adaboost


In [0]:
from sklearn.ensemble import AdaBoostClassifier

model = RandomForestClassifier()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

print (confusion_matrix(y_test,y_pred))

print (classification_report(y_test,y_pred,digits=5))


# rfe(model,X_train,y_train)

print ("Model size is: ",getsize(model),'bytes')


[[3463  181]
 [  89 3556]]
              precision    recall  f1-score   support

           0    0.97494   0.95033   0.96248      3644
           1    0.95157   0.97558   0.96342      3645

    accuracy                        0.96296      7289
   macro avg    0.96325   0.96296   0.96295      7289
weighted avg    0.96325   0.96296   0.96295      7289

Model size is:  1053522 bytes
