<a href="https://colab.research.google.com/github/aryankotru/MINERVA/blob/main/MINERVA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import scipy.io
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pywt
from scipy.stats import entropy
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from scipy.signal import butter, sosfilt, sosfreqz, lfilter, iirnotch, spectrogram

global variable declaration

In [79]:
epoch = 710
num_epochs = 12
blocks = 2
# Sample rate and desired cutoff frequencies (in Hz)*
fs = 250
lowcut = 1
highcut = 30
filter_order = 9
notch_quality_factor = 0.693
wavelet = 'sym9'

freq_bands = {"delta": [0.3, 4], "theta": [4, 8], "alpha": [8, 13], "beta": [13, 30], "gamma": [30, 50]}
chars = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "*", "#"]
stim_freqs = {9.25: 0, 11.25 : 0, 13.25 : 0, 9.75 : 0.5 , 11.75 : 0.5 , 13.75 : 0.5, 10.25 : 1, 12.25 : 1, 14.25: 1, 10.75 : 1.5, 12.75 : 1.5, 14.75 : 1.5}

channel_names = ["POz", "PO3", "PO4", "PO5", "PO6", "Oz", "O1", "O2"]

T =  2.84
nsamples = T * fs
t = np.arange(0, epoch) / fs

input data

In [3]:
#Load EEG data from the csv file
df = pd.read_csv("S001.csv", header = None)
df = np.transpose(df)

In [80]:
df.columns = channel_names
df

Unnamed: 0,POz,PO3,PO4,PO5,PO6,Oz,O1,O2
0,0.123467,0.114257,0.124512,0.111462,0.113582,0.107444,0.111674,0.109825
1,0.112893,0.110927,0.111607,0.110476,0.112080,0.106956,0.109674,0.109044
2,0.083783,0.114444,0.098959,0.116667,0.116939,0.113104,0.113031,0.114177
3,0.077442,0.119146,0.103133,0.121078,0.121045,0.117413,0.117155,0.117924
4,0.100905,0.119532,0.119197,0.118198,0.119499,0.114409,0.116559,0.115945
...,...,...,...,...,...,...,...,...
170395,0.944572,0.684438,0.637771,0.627494,0.603922,0.560036,0.739756,0.649078
170396,0.943351,0.683937,0.635483,0.627203,0.601312,0.559548,0.740082,0.648803
170397,0.943422,0.683703,0.635636,0.627071,0.601352,0.559758,0.740370,0.649163
170398,0.944416,0.684216,0.637584,0.627319,0.603440,0.559777,0.739707,0.648679


testing, validation and training data

In [81]:
train, valid, test = np.split(df, [int(0.6*len((df))), int(0.8*len(df))])
num_rows, num_cols = df.shape
num_epochs = int((num_rows)/epoch)
#num_epochs

data filtering

In [6]:
def notch_filter(data, rate, freq, quality):

    x = scipy.signal.filtfilt(*scipy.signal.iirnotch(freq / (rate / 2), quality), data)
    #https://neuraldatascience.io/7-eeg/erp_filtering.html
    return x

def butter_bandpass(lowcut, highcut, fs, order=filter_order):
        nyq = 0.5 * fs #
        low = lowcut / nyq
        high = highcut / nyq
        sos = butter(order, [low, high], analog=False, btype='band', output='sos')
        return sos

def butter_bandpass_filter(data, lowcut, highcut, fs, order=filter_order):
        sos = butter_bandpass(lowcut, highcut, fs, order=order)
        y = sosfilt(sos, data)
        return y

data normalization

In [7]:
def dataset_scaler(dataframe):
  #problem is its happening column wise, make it happen row wise or change data
  scaler = MinMaxScaler() #normalizing data points, object of class StandardScaler
  minmax_scaled_data = scaler.fit_transform((dataframe))
  np.reshape(minmax_scaled_data, dataframe.shape)
  scaled_data = np.vstack((minmax_scaled_data))
  plt.figure()
  dfa = pd.DataFrame(minmax_scaled_data)
  #dfa.columns = cols #index for columns
  return dfa

In [8]:
#train = dataset_scaler(train)
df = dataset_scaler(df) #only transform training data

<Figure size 640x480 with 0 Axes>

In [13]:
def calculate_features(coeffs):
    mean_val = np.mean(coeffs)
    std_val = np.std(coeffs)
    skewness_val = scipy.stats.skew(coeffs)
    entropy_val = entropy(np.abs(coeffs))
    return mean_val, std_val, skewness_val, entropy_val
feature_names = ['mean', 'std', 'skewness', 'entropy']

In [None]:

for n in range(7):
  data = df.iloc[:epoch, 0]
  coeffs = pywt.wavedec(data, wavelet)
  # Plot the detail coefficients
  for i, coeff in enumerate(coeffs[1:], 1):
    if(i ==1):
      continue
    else:
      plt.figure(figsize=(10, 8))
      plt.subplot(len(coeffs) - 1, 1, i)
      plt.plot(coeff)
      plt.title(f'Detail Coefficients Level {i}')
  plt.figure(figsize=(10, 8))
  plt.subplot(len(coeffs) -1, 1, 1)
  plt.plot(coeffs[0])
  plt.title(f'Approximation Coefficients Level {i} for Channel {n+1}')
  plt.tight_layout()
  plt.show()
#https://drive.google.com/file/d/1Axi2NdCyutM_eVkewdMsQgotnqzJAWNP/view
#ON HOLD FOR TIME BEING, FINISH FIRST PASS IMPLEMENTATION, THEM COME BACK, FIGURE THIS OUT AND FINE TUNE

In [71]:
epoch_len = 710
num_epochs = int(len(df.index)/epoch_len)
num_epochs

240

wavelet transform & feature extraction

In [None]:
def feature_generator(df, channel_names, epoch_len):
  #feature_df = pd.DataFrame()
  channel_list = []
  channel_dict = {}
  num_epochs = int(len(df.index)/epoch_len)
  #print(num_epochs)

  for channel in range(len(channel_names)):
    channel_list = []
    for n in range(num_epochs):
      data = df.iloc[n*epoch_len:(n+1)*epoch_len, channel]
      coeffs = pywt.wavedec(data, wavelet)
      for i, coeff in enumerate(coeffs[2:], 1):
          #print(f'Detail Coefficients Level {i}')
          mean, std, sk, se = calculate_features(coeff)
          #feature = calculate_features(coeff)
          channel_list.append(mean)
          channel_list.append(std)
          channel_list.append(sk)
          channel_list.append(se)
      mean, std, sk, se = calculate_features(coeffs[0])
      channel_list.append(mean)
      channel_list.append(std)
      channel_list.append(sk)
      channel_list.append(se)
    #print(f"For channel {channel} length is: " + str(len(channel_list)))
    channel_dict[channel_names[channel]] = channel_list
    #print(channel_dict[channel_names[channel]])
    feature_df = pd.DataFrame.from_dict(channel_dict)

    #feature_df.iloc[channel] = channel_list
  return feature_df

    #print(f'Approx. Coefficients Level {i} for Channel {channel} epoch {n}')

  #print(channel_list)
  #channel_dict = {key: val for val, key in enumerate(channel_dict.keys(), channel_list)}

#feature_generator(train, channel_names, 710)


main function

In [None]:
def preprocessing(df, epoch_len):
   num_epochs = int(len(df.index)/epoch_len)
   for channel in range(num_cols):
      for n in range(num_epochs):
          data = df.iloc[n*epoch:(n+1)*epoch, channel]
          data = notch_filter(data, fs, 50, notch_quality_factor)
          data = butter_bandpass_filter(data, lowcut, highcut, fs, order=filter_order)




preprocessing(data, 710)

In [None]:
df_temp.to_excel('test.xlsx', sheet_name='sheet1', index=False)