In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection  import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import neighbors
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
SEED=42

In [13]:
df = pd.read_csv("../../datasets/clean-ds.csv")
print(df.shape)
df.head()

(19, 13)


Unnamed: 0,Heart Beat 0.8~2.5hz from Avg,Peak Hz in 2.6~10hz,Peak Hz in 21~30hz,Peak Hz in 31~40hz,Peak Hz in 41~50hz,RMS avg,Zero crossing rate avg,Spectral flatness avg,Spectral rolloff avg,Spectral centroid avg,Poly features avg,Spectral bandwidth avg,Not Normal
0,69.0,5.5,23.33,31.0,43.67,0.2528,0.028,0.0202,3931.6415,3655.5029,0.8441,5149.4551,1
1,78.0,3.12,30.0,40.0,43.25,0.1949,0.0372,0.0211,5369.9529,4887.3982,0.695,6042.2007,0
2,78.0,3.0,24.71,39.5,41.5,0.206,0.0275,0.0148,5252.5443,4598.1605,0.663,6013.8062,1
3,76.0,4.0,25.0,40.0,50.0,0.2054,0.072,0.047,7429.2183,7041.157,1.1817,6818.168,0
4,83.0,5.78,22.4,38.78,45.0,0.244,0.0008,0.0,71.4086,128.8602,0.4138,605.7551,1


In [14]:
X, y = df.drop(['Not Normal'],axis=1), df['Not Normal']

In [15]:
standard_scaler = StandardScaler()
X = standard_scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=SEED)

In [16]:
# Instantiate a classification-tree 'dt'
dt = DecisionTreeClassifier(max_depth=5, random_state=SEED)
# Instantiate an AdaBoost classifier 'adab_clf'
adb_clf = AdaBoostClassifier(base_estimator=dt, n_estimators=100)
# Fit 'adb_clf' to the training set
adb_clf.fit(X_train, y_train)
y_pred = adb_clf.predict(X_test)

In [17]:
score = accuracy_score(y_test, y_pred)
print('Voting Classifier: {:.2f}'.format(score))

Voting Classifier: 0.67


In [18]:
print('    Classification report for AdaBoostClassifier')
print('-----------------------------------------------------------')
target_names = ['Normal', 'Not Normal']
print(metrics.classification_report(y_test, y_pred, digits=3, target_names=target_names))

    Classification report for AdaBoostClassifier
-----------------------------------------------------------
              precision    recall  f1-score   support

      Normal      0.667     1.000     0.800         4
  Not Normal      0.000     0.000     0.000         2

    accuracy                          0.667         6
   macro avg      0.333     0.500     0.400         6
weighted avg      0.444     0.667     0.533         6



  _warn_prf(average, modifier, msg_start, len(result))


In [19]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
dt_classifier = DecisionTreeClassifier(criterion='entropy', max_depth= 2, max_features= 5, splitter='best', random_state=SEED)

parameters = {
    'n_estimators'      : [100,150,200],
}

adb_clf = AdaBoostClassifier(base_estimator=dt_classifier)

cross_validation = StratifiedKFold(n_splits=5)

grid_search = GridSearchCV(adb_clf, param_grid=parameters, cv=cross_validation)

grid_search.fit(X_train, y_train)
print('Best score: {}'.format(grid_search.best_score_))
print('Best parameters: {}'.format(grid_search.best_params_))

best_dt_classifier = grid_search.best_estimator_


Best score: 0.4333333333333333
Best parameters: {'n_estimators': 150}


In [20]:
best_dt_classifier.fit(X_train, y_train)
y_pred = best_dt_classifier.predict(X_test)
score = accuracy_score(y_test, y_pred)
print('Voting Classifier: {:.2f}'.format(score))

Voting Classifier: 0.50


In [21]:
df_importance = df[['Zero crossing rate avg', 'Heart Beat 0.8~2.5hz from Avg', 'Peak Hz in 31~40hz', 'Spectral rolloff avg', 'Spectral centroid avg', 'Spectral bandwidth avg','Peak Hz in 21~30hz', 'Peak Hz in 41~50hz','Not Normal']]
X, y = df_importance.drop(['Not Normal'],axis=1), df_importance['Not Normal']
standard_scaler = StandardScaler()
X = standard_scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=SEED)

In [11]:
# Instantiate a classification-tree 'dt'
dt = DecisionTreeClassifier(max_depth=5, random_state=SEED)
# Instantiate an AdaBoost classifier 'adab_clf'
adb_clf = AdaBoostClassifier(base_estimator=dt, n_estimators=100)
# Fit 'adb_clf' to the training set
adb_clf.fit(X_train, y_train)
y_pred = adb_clf.predict(X_test)
score = accuracy_score(y_test, y_pred)
print('Voting Classifier: {:.2f}'.format(score))

Voting Classifier: 0.67


In [22]:
print('    Classification report for AdaBoostClassifier')
print('-----------------------------------------------------------')
target_names = ['Normal', 'Not Normal']
print(metrics.classification_report(y_test, y_pred, digits=3, target_names=target_names))

    Classification report for AdaBoostClassifier
-----------------------------------------------------------
              precision    recall  f1-score   support

      Normal      0.600     0.750     0.667         4
  Not Normal      0.000     0.000     0.000         2

    accuracy                          0.500         6
   macro avg      0.300     0.375     0.333         6
weighted avg      0.400     0.500     0.444         6

