# Naive Bayes classification

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
eeg_data = pd.read_csv("data.csv")

In [3]:
eeg_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11500 entries, 0 to 11499
Columns: 180 entries, Signal to y
dtypes: int64(179), object(1)
memory usage: 15.8+ MB


In [4]:
eeg_data.head(1)

Unnamed: 0,Signal,X1,X2,X3,X4,X5,X6,X7,X8,X9,...,X170,X171,X172,X173,X174,X175,X176,X177,X178,y
0,X21.V1.791,135,190,229,223,192,125,55,-9,-33,...,-17,-15,-31,-77,-103,-127,-116,-83,-51,4


In [5]:
# Check the lables of the Class
eeg_data['y'].unique()

array([4, 1, 5, 2, 3])

In [6]:
# Convert the 5 classes into binary class
def convert_binary_class(y):
    if y == 2 or y == 3 or y == 4 or y == 5 :
        return 0
    else :
        return 1

In [7]:
# Apply above function to convert into binary class
eeg_data['y'] = eeg_data['y'].apply(convert_binary_class)
eeg_data.head()

Unnamed: 0,Signal,X1,X2,X3,X4,X5,X6,X7,X8,X9,...,X170,X171,X172,X173,X174,X175,X176,X177,X178,y
0,X21.V1.791,135,190,229,223,192,125,55,-9,-33,...,-17,-15,-31,-77,-103,-127,-116,-83,-51,0
1,X15.V1.924,386,382,356,331,320,315,307,272,244,...,164,150,146,152,157,156,154,143,129,1
2,X8.V1.1,-32,-39,-47,-37,-32,-36,-57,-73,-85,...,57,64,48,19,-12,-30,-35,-35,-36,0
3,X16.V1.60,-105,-101,-96,-92,-89,-95,-102,-100,-87,...,-82,-81,-80,-77,-85,-77,-72,-69,-65,0
4,X20.V1.54,-9,-65,-98,-102,-78,-48,-16,0,-21,...,4,2,-12,-32,-41,-65,-83,-89,-73,0


In [8]:
# Remove unnecessory columns that is the name of the signal
del eeg_data['Signal']

In [9]:
eeg_data.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X170,X171,X172,X173,X174,X175,X176,X177,X178,y
0,135,190,229,223,192,125,55,-9,-33,-38,...,-17,-15,-31,-77,-103,-127,-116,-83,-51,0
1,386,382,356,331,320,315,307,272,244,232,...,164,150,146,152,157,156,154,143,129,1
2,-32,-39,-47,-37,-32,-36,-57,-73,-85,-94,...,57,64,48,19,-12,-30,-35,-35,-36,0
3,-105,-101,-96,-92,-89,-95,-102,-100,-87,-79,...,-82,-81,-80,-77,-85,-77,-72,-69,-65,0
4,-9,-65,-98,-102,-78,-48,-16,0,-21,-59,...,4,2,-12,-32,-41,-65,-83,-89,-73,0


### Standardize the Variables

In [10]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [11]:
scaler.fit(eeg_data.drop('y',axis=1))

StandardScaler(copy=True, with_mean=True, with_std=True)

In [12]:
scaled_features = scaler.transform(eeg_data.drop('y',axis=1))

In [13]:
eeg_feature = pd.DataFrame(scaled_features)
eeg_feature.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,168,169,170,171,172,173,174,175,176,177
0,0.885051,1.209929,1.462764,1.439539,1.242366,0.818262,0.38091,-0.014267,-0.163196,-0.198415,...,0.105907,-0.04163,-0.020282,-0.107131,-0.375743,-0.535944,-0.693858,-0.634144,-0.43329,-0.235399
1,2.400577,2.36619,2.239441,2.109257,2.037441,1.996033,1.94166,1.719102,1.546403,1.484569,...,1.074264,1.0577,0.972841,0.943023,0.982911,1.018615,1.029295,1.023429,0.954241,0.856537
2,-0.123287,-0.169154,-0.225131,-0.172743,-0.149016,-0.179743,-0.312756,-0.409056,-0.484131,-0.547478,...,0.233004,0.40782,0.455214,0.361582,0.193824,0.008151,-0.103237,-0.136872,-0.138593,-0.144405
3,-0.564057,-0.54253,-0.524794,-0.513803,-0.503073,-0.545472,-0.591462,-0.575607,-0.496475,-0.453979,...,-0.426689,-0.436417,-0.417531,-0.397851,-0.375743,-0.428321,-0.389414,-0.364021,-0.347337,-0.320328
4,0.015586,-0.325731,-0.537025,-0.575814,-0.434746,-0.254129,-0.058825,0.04125,-0.089134,-0.329314,...,0.118012,0.085916,0.08204,0.005598,-0.108758,-0.165242,-0.316347,-0.431551,-0.470127,-0.368858


## Train Test Split

In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(eeg_feature,eeg_data['y'],
                                                    test_size=0.30)

In [16]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

GaussianNB(priors=None)

In [17]:
predictions = classifier.predict(X_test)

### Predictions and Evaluations

In [22]:
from sklearn.metrics import confusion_matrix, classification_report
# Confusion Matrix
cm = confusion_matrix(y_test, predictions)
cm

array([[2700,   68],
       [  79,  603]])

In [23]:
# Accuracy = TP + TN / TP + TN + FN + FP
accuracy = (cm[0][0] + cm[1][1]) / (cm[0][0] + cm [0][1] + cm [1][0] + cm[1][1])
# Accuracy
accuracy * 100

95.73913043478261

In [24]:
target_names = ['Seizure Activity', 'Non Seizure Activity ']
print(classification_report(y_test, predictions, target_names=target_names))

                       precision    recall  f1-score   support

     Seizure Activity       0.97      0.98      0.97      2768
Non Seizure Activity        0.90      0.88      0.89       682

          avg / total       0.96      0.96      0.96      3450

