In [1]:
import pandas as pd
from source import data_import
from source.calculate_mean_psd import calculate_mean_psd
from source.constants import CHANNELS, FREQUENCY_RANGES

print(f"loaded channel names: {CHANNELS}")

loaded channel names: ['F4-C4', 'F3-C3', 'FT9-FT10', 'FZ-CZ', 'F7-T7', 'FP2-F4', 'T8-P8-1', 'T8-P8-0', 'FP1-F3', 'CZ-PZ']


In [2]:
# load data
df_pp = data_import.load_pyarrow(file_name="preprocessed_df")

/home/weasel/reps/ai-seizure-detectives/source/../data/preprocessed_df.arrow was loaded.


## Feature Extraction

In [3]:
### aggregate Functions for mean psd:
delta = lambda x: calculate_mean_psd(x, frequency_ranges={'Delta' : FREQUENCY_RANGES['Delta']})[x.name]['Delta']
theta = lambda x: calculate_mean_psd(x, frequency_ranges={'Theta' : FREQUENCY_RANGES['Theta']})[x.name]['Theta']
gamma = lambda x: calculate_mean_psd(x, frequency_ranges={'Gamma': FREQUENCY_RANGES['Gamma']})[x.name]['Gamma']

delta_agg = pd.NamedAgg(column='delta', aggfunc=delta)
theta_agg = pd.NamedAgg(column='theta', aggfunc=theta)
gamma_agg = pd.NamedAgg(column='gamma', aggfunc=gamma)

In [4]:
df_features = df_pp.groupby(['seizure_id', 'segment_id']).agg(
    {C:[
        'mean', 
        'std',
        'var',
        delta_agg,
        theta_agg,
        gamma_agg
        ] for C in CHANNELS} | 
    {'is_seizure': 'first'}) # just taking first element for target column
df_features.head()

Unnamed: 0_level_0,channel,F4-C4,F4-C4,F4-C4,F4-C4,F4-C4,F4-C4,F3-C3,F3-C3,F3-C3,F3-C3,...,FP1-F3,FP1-F3,FP1-F3,CZ-PZ,CZ-PZ,CZ-PZ,CZ-PZ,CZ-PZ,CZ-PZ,is_seizure
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,var,delta,theta,gamma,mean,std,var,delta,...,delta,theta,gamma,mean,std,var,delta,theta,gamma,first
seizure_id,segment_id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
0,1,0.642823,25.278278,638.991344,99.724544,33.651173,0.131576,-0.13638,28.577184,816.655466,123.466899,...,115.680103,29.233559,0.343249,0.75887,35.447713,1256.540374,194.36462,76.886992,0.174189,False
0,2,0.459926,24.908361,620.426452,80.467433,30.73937,0.113581,0.607864,22.789379,519.355782,69.215516,...,98.999296,24.519002,0.36393,0.040398,33.861154,1146.577756,187.41729,34.401955,0.181605,False
0,3,-0.336786,25.078313,628.921793,97.815139,19.12002,0.108965,0.020483,24.20861,586.056809,92.561775,...,76.273119,21.891098,0.347473,0.139786,30.472674,928.583879,117.60652,31.99868,0.155509,False
0,4,0.57155,23.796227,566.260437,78.057846,22.859691,0.112154,0.170771,27.05993,732.239825,112.071663,...,111.418632,24.58484,0.32645,0.347514,30.803236,948.839359,158.377845,31.605055,0.181355,False
0,5,-0.156618,28.030262,785.695596,102.786435,31.697164,0.123064,0.019398,28.681243,822.613674,124.450518,...,115.905093,37.75188,0.323275,-0.032786,39.120852,1530.441051,215.122132,60.283108,0.170928,False


## Flatten Dataframe

In [5]:
# joining column names with agg functions, but leaving target 'is_seizure' column as 'is_seizure'.
df_features.columns = ['_'.join(col).strip() for col in df_features.columns.values if 'is_seizure' != col[0]] + ['is_seizure']
df_features.reset_index(inplace=True)
df_features.head(20)

Unnamed: 0,seizure_id,segment_id,F4-C4_mean,F4-C4_std,F4-C4_var,F4-C4_delta,F4-C4_theta,F4-C4_gamma,F3-C3_mean,F3-C3_std,...,FP1-F3_delta,FP1-F3_theta,FP1-F3_gamma,CZ-PZ_mean,CZ-PZ_std,CZ-PZ_var,CZ-PZ_delta,CZ-PZ_theta,CZ-PZ_gamma,is_seizure
0,0,1,0.642823,25.278278,638.991344,99.724544,33.651173,0.131576,-0.13638,28.577184,...,115.680103,29.233559,0.343249,0.75887,35.447713,1256.540374,194.36462,76.886992,0.174189,False
1,0,2,0.459926,24.908361,620.426452,80.467433,30.73937,0.113581,0.607864,22.789379,...,98.999296,24.519002,0.36393,0.040398,33.861154,1146.577756,187.41729,34.401955,0.181605,False
2,0,3,-0.336786,25.078313,628.921793,97.815139,19.12002,0.108965,0.020483,24.20861,...,76.273119,21.891098,0.347473,0.139786,30.472674,928.583879,117.60652,31.99868,0.155509,False
3,0,4,0.57155,23.796227,566.260437,78.057846,22.859691,0.112154,0.170771,27.05993,...,111.418632,24.58484,0.32645,0.347514,30.803236,948.839359,158.377845,31.605055,0.181355,False
4,0,5,-0.156618,28.030262,785.695596,102.786435,31.697164,0.123064,0.019398,28.681243,...,115.905093,37.75188,0.323275,-0.032786,39.120852,1530.441051,215.122132,60.283108,0.170928,False
5,0,6,0.48636,39.835651,1586.879066,252.138662,43.577212,0.121571,0.217503,33.613739,...,129.192067,43.092616,0.340392,0.785863,37.863268,1433.627065,161.301344,53.940287,0.176463,False
6,0,7,-0.493721,33.397087,1115.36545,193.698273,43.186676,0.11928,0.464669,34.488052,...,182.916197,66.94106,0.320393,-1.110479,38.161832,1456.325393,207.186974,42.867163,0.147077,False
7,0,8,0.617939,31.848664,1014.337394,139.871258,35.002992,0.123555,0.029983,28.280461,...,170.101263,45.770055,0.348241,0.608507,37.299374,1391.243264,207.511363,38.684956,0.182358,False
8,0,9,0.113113,28.832383,831.306319,116.094771,26.950793,0.136861,-0.004529,29.117649,...,76.690453,26.540649,0.347047,0.559385,32.210621,1037.524122,124.484473,36.445006,0.190132,False
9,0,10,0.245758,39.515694,1561.490081,222.233458,54.002125,0.13007,0.298746,40.212388,...,138.524686,50.43864,0.326207,-0.083512,37.897142,1436.193366,198.193238,51.875778,0.176812,False


In [6]:
from source.data_import import save_pyarrow

save_pyarrow(df_features, file_name="feature_extracted")

/home/weasel/reps/ai-seizure-detectives/source/../data/feature_extracted.arrow was successfully written.
