In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix,f1_score,classification_report,precision_recall_curve,roc_curve
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import label_binarize


In [2]:
def prec_recall(y_test,y_pred):
    precision = dict()
    recall = dict()
    x=range(8)
    for i in range(8):
        precision[i], recall[i], _ = precision_recall_curve(y_test[:,i],y_pred[:, i])
        plt.plot(recall[i], precision[i], lw=2, label='class {}'.format(i+1))

    plt.xlabel("recall")
    plt.ylabel("precision")
    plt.legend(loc="best")
    plt.title("precision vs. recall curve")
    plt.show()

In [3]:
def roc(y_test,y_pred):
    fpr = dict()
    tpr = dict()
    x=range(8)
    for i in range(8):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i],y_pred[:, i])
        plt.plot(fpr[i], tpr[i], lw=2, label='class {}'.format(i+1))

    plt.xlabel("false positive rate")
    plt.ylabel("true positive rate")
    plt.legend(loc="best")
    plt.title("ROC curve")
    plt.show()

In [4]:
def model(x_train,y_train,x_test,y_test):
    print("Model : Decision Tree")
    param_grid={'criterion':['gini','entropy'],'max_depth':[5,7,9,13,15,23],'min_samples_leaf':[1,2,5]}
    grid = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5, scoring='accuracy')
    grid.fit(x_train,y_train)
    y_test_pred=grid.predict(x_test)
    print(grid.best_params_)
    acc_test=round(accuracy_score(y_test_pred,y_test),4)*100
    print("Accuracy of Model  on test set : ",acc_test)
    print('Classification Report :')
    print(classification_report(y_test,y_test_pred,digits=8))
    con_mat=confusion_matrix(y_test,y_test_pred,labels=[1,2,3,4,5,6,7,8])
    print("Confusion Matrix of Model on test set : ")
    print(con_mat)
    print("The total correctly classified data : ",np.trace(con_mat))
    classifier=OneVsRestClassifier(DecisionTreeClassifier(criterion=grid.best_params_['criterion'],max_depth=grid.best_params_['max_depth'],min_samples_leaf=grid.best_params_['min_samples_leaf']))
    y_pred1=classifier.fit(x_train,y_train).predict_proba(x_test)
    y_test1=label_binarize(y_test, classes=[*range(8)])
    #print('Precision-Recall Curve')
    #prec_recall(y_test1,y_pred1)
    #print('ROC Curve')
    #roc(y_test1,y_pred1)




In [5]:
if __name__ == '__main__':
    train=pd.read_excel('S11_emgdata/S11_trainset_P135_70.xls')  #reading the xls file into dataframe
    test=pd.read_excel('S11_emgdata/S11_testset_P12345_30.xls')
    
    x_tr=train.drop(43,axis=1)    #separating the target values
    y_tr=train[43]
    x_te=test.drop(43,axis=1)
    y_te=test[43]
    
    x_train=x_tr.to_numpy()        # converting dataframe to numpy array
    y_train=y_tr.to_numpy()
    x_test=x_te.to_numpy()
    y_test=y_te.to_numpy()
    
    sc=StandardScaler()
    x_train=sc.fit_transform(x_train)       #standardizing the features for better traing process
    x_test=sc.fit_transform(x_test)
    model(x_train,y_train,x_test,y_test)

    
    

Model : Decision Tree
{'criterion': 'entropy', 'max_depth': 9, 'min_samples_leaf': 1}
Accuracy of Model  on test set :  94.39
Classification Report :
              precision    recall  f1-score   support

           1  0.91719745 0.97406990 0.94477857       887
           2  0.99658703 0.99207248 0.99432463       883
           3  0.90238612 0.94760820 0.92444444       878
           4  0.93390453 0.85826772 0.89449004       889
           5  0.98996656 0.99663300 0.99328859       891
           6  0.86777778 0.89461627 0.88099267       873
           7  0.95278450 0.89431818 0.92262603       880
           8  0.99649942 0.99417928 0.99533800       859

    accuracy                      0.94389205      7040
   macro avg  0.94463792 0.94397063 0.94378537      7040
weighted avg  0.94462361 0.94389205 0.94373637      7040

Confusion Matrix of Model on test set : 
[[864   0   0  18   0   0   4   1]
 [  0 876   1   0   5   1   0   0]
 [  9   1 832  34   0   1   1   0]
 [ 69   0  20 763   1 

  Y /= np.sum(Y, axis=1)[:, np.newaxis]
