# Seizure Predicition Procedure
1.   Preprocessing for the used dataset
2.   Balancing the Dataset
2.   Filter using bass band filters for increasing SNR
3.   Feature extraction
4.    Normalization
5.   SVM Vs. KNN for classification
6.   testing & validation using K-fold



# Importing and installing needed libraries & setting google drive folder


In [None]:
import pandas as pd
import numpy as np
import librosa, librosa.display

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df= pd.read_csv('/content/drive/My Drive/data.csv')

In [None]:
#converting it into binary lables
df["OUTPUT_LABEL"] = df.y == 1
df["OUTPUT_LABEL"] = df["OUTPUT_LABEL"].astype(int)
df.pop('y')
df.drop(df.columns[0], axis=1, inplace=True)

In [None]:
def calc_prevalence(y_actual):
    # this function calculates the prevalence of the positive class (label = 1)
    return sum(y_actual) / len(y_actual)


print(
    "prevalence of the positive class: %.3f"
    % calc_prevalence(df["OUTPUT_LABEL"].values)
)

prevalence of the positive class: 0.200


#Filtering 

In [None]:
from scipy.signal import butter, lfilter

def butter_bandpass(lowcut, highcut, fs, order=4):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a


def butter_bandpass_filter(data, lowcut, highcut, fs, order=4):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

#Features Extraction

In [None]:
#Statistical
def statisticalMean(signal):
  return abs(np.mean(signal))

def statisticalSTD(signal):
  return np.std(signal)

def statisticalSkewness(signal):
  return signal.skew()

def statisticalKurtosis(signal):
  return signal.kurtosis()


In [None]:
#Spectral
def averageEnergy(signal):
  summation = 0
  for i in range(len(signal)):
    summation = summation + ((signal[i])**2)
  avg_energy = summation/ (len(signal))
  return avg_energy

def spectralCentroid(signal):
  #It is calculated as the weighted mean of the frequencies present in the signal, 
  #determined using a Fourier transform, with their magnitudes as the weights.
  n = len(signal)
  timestep = 1/178
  spectrum = abs(np.fft.rfft(signal)) #using FT to compute frequcies present in the signal
  normalized_spectrum = spectrum / sum(spectrum)  # like a probability mass function
  normalized_frequencies = np.linspace(0, 1, len(spectrum))
  spectral_centroid = sum(normalized_frequencies * normalized_spectrum)
  freq = np.fft.rfftfreq(n, d=timestep) #get frequncies
  freq_max= freq.max() 
  # Spectral centroid is calculated as a ratio.  Multiply by your maximum frequency bin to get real frequency.
  spectral_centroid= freq_max * spectral_centroid
  return spectral_centroid


In [None]:
def signalFilter(data, fs):
  delta = butter_bandpass_filter(data, 0.5, 4, fs, order=4)
  theta = butter_bandpass_filter(data, 4, 8, fs, order=4)
  alpha = butter_bandpass_filter(data, 8, 12, fs, order=4)
  beta = butter_bandpass_filter(data, 12, 25, fs, order=4)
  return delta,theta,alpha,beta

In [None]:
def filter_extract(dataframe):
  
  df_features= dataframe.drop(['OUTPUT_LABEL'], axis=1)
  df_features= butter_bandpass_filter(df_features, lowcut=0.5, highcut=25, fs=178, order=4)
  df_features= df_features.T
  df_frame= pd.DataFrame(df_features)
  e_dataframe= pd.DataFrame()
  
  std_l=[]
  skew_l= []
  kurtosis_l=[]
  spC_l=[]
  deltaList= []
  thetaList= []
  alphaList= []
  betaList= []

  
  for column in df_frame:

    delta,theta,alpha,beta = signalFilter(df_frame[column], 178)
    deltaList.append(averageEnergy(delta))
    thetaList.append(averageEnergy(theta))
    alphaList.append(averageEnergy(alpha))
    betaList.append(averageEnergy(beta))

    stat_std= statisticalSTD(df_frame[column])
    std_l.append(stat_std)

    stat_skew= statisticalSkewness(df_frame[column])
    skew_l.append(stat_skew)

    stat_kurtosis= statisticalKurtosis(df_frame[column])
    kurtosis_l.append(stat_kurtosis)

    spec_cetroid= spectralCentroid(df_frame[column])
    spC_l.append(spec_cetroid)

  e_dataframe['Standard Deviation']= std_l
  e_dataframe['Skewness']= skew_l
  e_dataframe['Kurtosis']= kurtosis_l
  e_dataframe['Spectral Centroid']= spC_l
  e_dataframe['delta'] = deltaList
  e_dataframe['theta'] = thetaList
  e_dataframe['alpha'] = alphaList
  e_dataframe['beta'] = betaList
  
  return e_dataframe

#Normalization(Feature Scaling)

In [None]:
from sklearn.preprocessing import MinMaxScaler
def normalizeData(dataFrame):
  minRange = 0
  maxRange = 100
  scaler = MinMaxScaler(feature_range=(minRange, maxRange), copy=True)
  df = pd.DataFrame(scaler.fit_transform(dataFrame),columns=dataFrame.columns, index=dataFrame.index) 
  return df

# Testing

In [None]:
features= filter_extract(df)
result= normalizeData(features)

In [None]:
result['Label']= df['OUTPUT_LABEL']

# SVM Vs. KNN Classification
using proposed algorithm

In [None]:
#svm
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold 
from sklearn.metrics import accuracy_score

svclassifier = SVC(kernel='linear',C=1)
X = result.drop(columns=['Label'],axis=1)
y = result['Label']

# KFold Cross Validation approach
kf = KFold(n_splits=5,shuffle=False)
kf.split(X)    
     
# Initialize the accuracy of the models to blank list. The accuracy of each model will be appended to this list
accuracy_model = []
 
# Iterate over each train-test split
cm_holder=[]
total=np.empty([2, 2])

for train_index, test_index in kf.split(X):
    # Split train-test
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # Train the model
    model = svclassifier.fit(X_train, y_train)
    y_predict = svclassifier.predict(X_test)
    print("new Matrix")
    print(confusion_matrix(y_test,y_predict))
    # Append to accuracy_model the accuracy of the model
    cm_holder.append(confusion_matrix(y_test, y_predict))
    accuracy_model.append(accuracy_score(y_test, model.predict(X_test), normalize=True)*100)
for i in range(len(cm_holder)):
  total= total+ cm_holder[i]
print("the total confusion matrix is", total)   
#print(accuracy_model)

new Matrix
[[1818   26]
 [  56  400]]
new Matrix
[[1832   18]
 [  46  404]]
new Matrix
[[1813   19]
 [  37  431]]
new Matrix
[[1815   24]
 [  48  413]]
new Matrix
[[1813   22]
 [  50  415]]
the total confusion matrix is [[9091.  109.]
 [ 237. 2063.]]


In [None]:
Results of SVM 
Accuracy= 0.96
Specificity= 0.98
Sensitivity= 0.89

In [None]:
#KNN
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold 
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
  
classifier = KNeighborsClassifier(n_neighbors=5)
X = result.drop(columns=['Label'],axis=1)
y = result['Label']

# KFold Cross Validation approach
kf = KFold(n_splits=5,shuffle=False)
kf.split(X)    
      
# Initialize the accuracy of the models to blank list. The accuracy of each model will be appended to this list
accuracy_model = []
  
# Iterate over each train-test split
cm_holder=[]
total=np.empty([2, 2])

for train_index, test_index in kf.split(X):
  # Split train-test
  X_train, X_test = X.iloc[train_index], X.iloc[test_index]
  y_train, y_test = y[train_index], y[test_index]
  # Train the model
  model = classifier.fit(X_train, y_train)
  y_predict = classifier.predict(X_test)
  print("new Matrix")
  print(confusion_matrix(y_test,y_predict))
  # Append to accuracy_model the accuracy of the model
  cm_holder.append(confusion_matrix(y_test, y_predict))
  accuracy_model.append(accuracy_score(y_test, model.predict(X_test), normalize=True)*100)
for i in range(len(cm_holder)):
  total= total+ cm_holder[i]
print("the total confusion matrix is", total)

new Matrix
[[1823   21]
 [  32  424]]
new Matrix
[[1829   21]
 [  27  423]]
new Matrix
[[1805   27]
 [  28  440]]
new Matrix
[[1817   22]
 [  38  423]]
new Matrix
[[1808   27]
 [  40  425]]
the total confusion matrix is [[9082.  118.]
 [ 165. 2135.]]


In [None]:
Results of KNN 
Accuracy= 0.97
Specificity= 0.98
Sensitivity= 0.92