# Classification
2021.6.8

In [1]:
import numpy as np
import pandas as pd

from sklearn.decomposition import PCA

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix
from mlxtend.plotting import plot_decision_regions

from sklearn import tree


import joblib

import matplotlib.pyplot as plt
import seaborn as sns

## Process data

In [5]:
feature = pd.read_csv(r'data\feature.csv', index_col=0)
feature.head(10)

Unnamed: 0,ApEn,peek,RMS,mean frequency,label
0,0.067594,9,472.682642,3.5,1
1,0.037022,10,155.184031,5.0,1
2,0.009688,9,304.301872,5.5,1
3,0.029392,12,558.420084,7.5,1
4,0.052558,7,151.881867,3.0,1
5,0.014919,11,138.696101,3.5,1
6,0.028359,11,434.538039,11.5,1
7,0.017113,13,231.930831,3.0,1
8,0.029919,9,177.993019,1.5,1
9,0.029906,16,531.765711,15.5,1


## PCA

In [6]:
pca = PCA()
newX = pca.fit_transform(np.array(feature.drop('label', axis=1)))
print(pca.explained_variance_ratio_)

[9.98322580e-01 1.23689664e-03 4.40513438e-04 9.65971015e-09]


## SVM

In [10]:
x_train,x_test,y_train,y_test = train_test_split(feature[['ApEn', 'RMS']], feature['label'], test_size=0.3, random_state=0)

In [None]:
x_train.to_csv('x_train.csv')
y_train.to_csv('y_train.csv')
x_test.to_csv('x_test.csv')
y_test.to_csv('y_test.csv')

In [None]:
x_train = StandardScaler().fit_transform(x_train)

In [None]:
svm = SVC(kernel='rbf')
svm.fit(x_train, y_train)

plt.figure(figsize = (10,6))
plot_decision_regions(np.array(x_train), np.array(y_train), clf=svm, legend=2)

plt.xlabel('ApEn')
plt.ylabel('RMS')
plt.title('Classification with SVM method.', y=-0.15)
plt.savefig('SVM.png', dpi=600)

joblib.dump(svm, "EEG_svm.m")

print(svm.score(x_train, y_train)) # 精度
print('Accuracy:', accuracy_score(y_test, svm.predict(x_test)))

## Grid Search

In [None]:
search_model = GridSearchCV(SVC(kernal='rbf'), {'C':[0.1, 1, 10], 'gamma':[1, 0.1, 0.01]}, cv=4)
search_model.fit(x_train, y_train)
for p, s in zip(search_model.cv_results_['params'], search_model.cv_results_['mean_test_score']):
    print(p, s)

print(search_model.best_params_)

print(search_model.best_score_)
print(search_model.best_estimator_)

print(classification_report(y_test, search_model.predict(StandardScaler().fit_transform(x_test))))

In [None]:
{'C': 0.1, 'gamma': 1} 0.9595029796383152
{'C': 0.1, 'gamma': 0.1} 0.9521737557763207
{'C': 0.1, 'gamma': 0.01} 0.9351558927606739
{'C': 1, 'gamma': 1} 0.9607452774658556
{'C': 1, 'gamma': 0.1} 0.9591302162188321
{'C': 1, 'gamma': 0.01} 0.9511800903471374
{'C': 10, 'gamma': 1} 0.9599999358049418
{'C': 10, 'gamma': 0.1} 0.9604967685195336
{'C': 10, 'gamma': 0.01} 0.9563977906036212
{'C': 1, 'gamma': 1}
0.9607452774658556
SVC(C=1, gamma=1)

In [None]:
              precision    recall  f1-score   support

           0       0.98      0.97      0.98      2785
           1       0.90      0.92      0.91       665

    accuracy                           0.96      3450
   macro avg       0.94      0.95      0.94      3450
weighted avg       0.96      0.96      0.96      3450


### RMS-mean f

In [None]:
{'C': 0.1, 'gamma': 1} 0.9592546558700454
{'C': 0.1, 'gamma': 0.1} 0.9481987854294995
{'C': 0.1, 'gamma': 0.01} 0.9325468574543795
{'C': 1, 'gamma': 1} 0.9636025130883847
{'C': 1, 'gamma': 0.1} 0.9586331983261879
{'C': 1, 'gamma': 0.01} 0.9475776982417468
{'C': 10, 'gamma': 1} 0.9648448726419427
{'C': 10, 'gamma': 0.1} 0.9599997506268895
{'C': 10, 'gamma': 0.01} 0.9540372642436488
{'C': 10, 'gamma': 1}
0.9648448726419427
SVC(C=10, gamma=1)
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      2785
           1       0.90      0.92      0.91       665

    accuracy                           0.96      3450
   macro avg       0.94      0.95      0.94      3450
weighted avg       0.96      0.96      0.96      3450

## Decision tree

In [None]:
x_dttrain,x_dttest,y_dttrain,y_dttest = train_test_split(feature[['ApEn', 'peek', 'RMS', 'mean frequency']], feature['label'], test_size=0.3, random_state=0)

x_dttrain.to_csv('x_dttrain.csv')
y_dttrain.to_csv('y_dttrain.csv')
x_dttest.to_csv('x_dttest.csv')
y_dttest.to_csv('y_dttest.csv')

In [None]:
dtree = tree.DecisionTreeClassifier(random_state=30, max_depth=3, min_samples_leaf=100, min_samples_split=100) #实例化
dtree = dtree.fit(x_dttrain,y_dttrain) 
result = dtree.score(x_dttest,y_dttest)
# 0.9637681159420289

In [None]:
fig, axes = plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=600)
fn = ['ApEn', 'peek', 'RMS', 'mean frequency']
tree.plot_tree(dtree,
               feature_names = fn, 
               class_names=['Normal', 'Epilepsy'],
               filled = True, rounded=True)
fig.savefig('Decision tree.png', dpi=600)
fig.savefig('Decision tree.pdf')

In [None]:
dtree.feature_importances_
[*zip(feature_name,dtree.feature_importances_)]

In [None]:
[('ApEn', 0.0),
 ('peek', 0.03885787561867085),
 ('RMS', 0.9611421243813291),
 ('mean frequency', 0.0)]

In [None]:
print(classification_report(y_dttest, dtree.predict(x_dttest)))

In [None]:

              precision    recall  f1-score   support

           0       0.97      0.98      0.98      2785
           1       0.93      0.88      0.90       665

    accuracy                           0.96      3450
   macro avg       0.95      0.93      0.94      3450
weighted avg       0.96      0.96      0.96      3450

## exclude Peek 

In [None]:
0.9585507246376812

In [None]:
              precision    recall  f1-score   support

           0       0.97      0.98      0.97      2785
           1       0.92      0.86      0.89       665

    accuracy                           0.96      3450
   macro avg       0.94      0.92      0.93      3450
weighted avg       0.96      0.96      0.96      3450

In [None]:
[('ApEn', 0.014938370928239797),
 ('RMS', 0.9548974434188772),
 ('mean frequency', 0.030164185652883146)]