In [1]:
from source import data_import
import pyarrow as pa
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import pandas as pd
from scipy.signal import butter, lfilter

def filter_eeg_channels(df, channels, fs=256, exclude_ranges=None, Q=30):
    """
    Filter EEG channels in a DataFrame using band-stop filters.

    Parameters:
    - df: DataFrame containing EEG data.
    - channels: List of channel names to filter.
    - fs: Sampling frequency (default is 256 Hz).
    - exclude_ranges: List of exclusion ranges, e.g., [[58, 62], [118, 122]].
    - Q: Quality factor for notch filters (default is 30).

    Returns:
    - filtered_df: DataFrame with filtered EEG data.
    """
    filtered_df = df.copy()
    
    if exclude_ranges is None:
        exclude_ranges = []

    for channel in channels:
        for exclude_range in exclude_ranges:
            nyquist = 0.5 * fs
            low = (exclude_range[0] - 1.0) / nyquist
            high = (exclude_range[1] + 1.0) / nyquist

            b, a = butter(4, [low, high], btype='bandstop')
            filtered_eeg_data = lfilter(b, a, filtered_df[channel])
            filtered_df[channel] = filtered_eeg_data

    return filtered_df

In [3]:
p_df_mem = data_import.load_pyarrow(file_name="my_data")

/Users/tassiloheinrich/Documents/DS/ai-seizure-detectives/source/../data/my_data.arrow was loaded.


In [4]:
channels = p_df_mem.columns.drop(["is_seizure", 'before_seizure', 'ECG', 'VNS', '--0', '--1', '--2', '--3', '--4', '.-0',
       '.-1', '.-2', '.-3', '.-4'])
channels

Index(['FP1-F7', 'F7-T7', 'T7-P7', 'P7-O1', 'FP1-F3', 'F3-C3', 'C3-P3',
       'P3-O1', 'FP2-F4', 'F4-C4', 'C4-P4', 'P4-O2', 'FP2-F8', 'F8-T8',
       'T8-P8-0', 'P8-O2', 'FZ-CZ', 'CZ-PZ', 'P7-T7', 'T7-FT9', 'FT9-FT10',
       'FT10-T8', 'T8-P8-1'],
      dtype='object', name='channel')

In [5]:
exclude_ranges=[[58, 62], [118, 122]]
df = p_df_mem
fit_df = filter_eeg_channels(df, channels, fs=256, exclude_ranges=exclude_ranges, Q=30)

In [6]:
fit_df.head()

channel,FP1-F7,F7-T7,T7-P7,P7-O1,FP1-F3,F3-C3,C3-P3,P3-O1,FP2-F4,F4-C4,...,--0,--1,--2,--3,--4,.-0,.-1,.-2,.-3,.-4
0 days 00:00:00,-12.094205,26.713574,-2.525164,5.714844,-0.39871,2.79097,25.650347,-10.233558,-11.828398,-30.700675,...,,,,,,,,,,
0 days 00:00:00.003906250,-3.984894,9.228259,-0.726857,2.078676,-0.002848,1.083164,8.866254,-3.351387,-3.894393,-10.319968,...,,,,,,,,,,
0 days 00:00:00.007812500,-0.758705,2.24748,-0.017454,0.620845,0.147268,0.394352,2.165119,-0.614573,-0.738115,-2.200027,...,,,,,,,,,,
0 days 00:00:00.011718750,-4.978767,12.188861,-0.945918,2.755701,0.009338,1.442223,11.97704,-4.674531,-5.125166,-13.337263,...,,,,,,,,,,
0 days 00:00:00.015625,6.179067,-12.652102,1.46758,-2.511603,0.440694,-1.099635,-12.048158,5.09954,5.960205,15.164319,...,,,,,,,,,,


In [7]:
fit_df2 = fit_df.drop(['VNS', '--0', '--1', '--2', '--3', '--4', '.-0', '.-1', '.-2', '.-3', '.-4', 'before_seizure', 'ECG'], axis=1)

In [8]:
fit_df2.head()

channel,FP1-F7,F7-T7,T7-P7,P7-O1,FP1-F3,F3-C3,C3-P3,P3-O1,FP2-F4,F4-C4,...,T8-P8-0,P8-O2,FZ-CZ,CZ-PZ,P7-T7,T7-FT9,FT9-FT10,FT10-T8,T8-P8-1,is_seizure
0 days 00:00:00,-12.094205,26.713574,-2.525164,5.714844,-0.39871,2.79097,25.650347,-10.233558,-11.828398,-30.700675,...,-40.80133,122.669795,-12.625819,58.078766,2.79097,14.220659,-6.246458,-26.979381,-40.80133,False
0 days 00:00:00.003906250,-3.984894,9.228259,-0.726857,2.078676,-0.002848,1.083164,8.866254,-3.351387,-3.894393,-10.319968,...,-13.759008,41.899137,-4.165896,19.907382,1.083164,4.974709,-1.993871,-9.052953,-13.759008,False
0 days 00:00:00.007812500,-0.758705,2.24748,-0.017454,0.620845,0.147268,0.394352,2.165119,-0.614573,-0.738115,-2.200027,...,-2.982458,9.680581,-0.799886,4.677136,0.394352,1.279735,-0.305718,-1.911762,-2.982458,False
0 days 00:00:00.011718750,-4.978767,12.188861,-0.945918,2.755701,0.009338,1.442223,11.97704,-4.674531,-5.125166,-13.337263,...,-18.140539,56.623853,-5.217581,26.544704,1.442223,7.374148,-2.883424,-12.197177,-18.140539,False
0 days 00:00:00.015625,6.179067,-12.652102,1.46758,-2.511603,0.440694,-1.099635,-12.048158,5.09954,5.960205,15.164319,...,19.951526,-57.208789,6.435789,-27.708167,-1.099635,-6.61345,3.174129,13.186266,19.951526,False


In [9]:
grouped_mean = fit_df2.groupby(['is_seizure']).mean().reset_index()

channel,is_seizure,FP1-F7,F7-T7,T7-P7,P7-O1,FP1-F3,F3-C3,C3-P3,P3-O1,FP2-F4,...,F8-T8,T8-P8-0,P8-O2,FZ-CZ,CZ-PZ,P7-T7,T7-FT9,FT9-FT10,FT10-T8,T8-P8-1
0,False,0.208967,0.233454,0.14556,0.19594,0.211175,0.187434,0.180436,0.196407,0.232616,...,0.232709,0.146044,0.18969,0.220264,0.194291,0.24516,0.149136,0.193162,0.228142,0.146044
1,True,0.309889,0.238079,0.157038,0.21046,0.326969,0.192633,0.187327,0.20175,0.240095,...,0.158835,0.192709,0.216875,0.16685,0.235753,0.233682,0.143404,0.204088,0.186607,0.192709


In [34]:
x = grouped_mean.T.reset_index()
# Assuming x is your DataFrame
x = x.iloc[1:]

# Reset the index if needed
x = x.reset_index(drop=True)

x = x.rename(columns={0: 'seizure_false', 1: 'seizure_true'})

In [41]:
x["diff"] = x["seizure_true"]  - x["seizure_false"]
x = x.sort_values(by='diff', ascending=False)
x

Unnamed: 0,channel,seizure_false,seizure_true,diff
4,FP1-F3,0.211175,0.326969,0.115794
0,FP1-F7,0.208967,0.309889,0.100922
14,T8-P8-0,0.146044,0.192709,0.046666
22,T8-P8-1,0.146044,0.192709,0.046666
17,CZ-PZ,0.194291,0.235753,0.041462
12,FP2-F8,0.210426,0.24749,0.037064
15,P8-O2,0.18969,0.216875,0.027185
3,P7-O1,0.19594,0.21046,0.01452
2,T7-P7,0.14556,0.157038,0.011478
20,FT9-FT10,0.193162,0.204088,0.010927


In [42]:
first_ten_channels = x.head(10)['channel'].tolist()
first_ten_channels 

['FP1-F3',
 'FP1-F7',
 'T8-P8-0',
 'T8-P8-1',
 'CZ-PZ',
 'FP2-F8',
 'P8-O2',
 'P7-O1',
 'T7-P7',
 'FT9-FT10']