In [63]:
import pandas as pd
from source import data_import
from source.calculate_mean_psd import calculate_mean_psd
from source.constants import CHANNELS, FREQUENCY_RANGES

print(f"loaded channel names: {CHANNELS}")

loaded channel names: ['F4-C4', 'F3-C3', 'FT9-FT10', 'FZ-CZ', 'F7-T7', 'FP2-F4', 'T8-P8-1', 'T8-P8-0', 'FP1-F3', 'CZ-PZ']


In [64]:
# load data
df_pp = data_import.load_pyarrow(file_name="preprocessed_df")
df_pp.shape

/home/weasel/reps/ai-seizure-detectives/source/../data/preprocessed_df.arrow was loaded.


(5826809, 15)

## Feature Extraction

In [65]:
# ignore for aggregation
ignore_col = ['is_seizure']
ignore_col = []

In [66]:
### aggregate Functions for mean psd:
delta = lambda x: calculate_mean_psd(x, frequency_ranges={'Delta' : FREQUENCY_RANGES['Delta']})[x.name]['Delta']
theta = lambda x: calculate_mean_psd(x, frequency_ranges={'Theta' : FREQUENCY_RANGES['Theta']})[x.name]['Theta']
gamma = lambda x: calculate_mean_psd(x, frequency_ranges={'Gamma': FREQUENCY_RANGES['Gamma']})[x.name]['Gamma']

delta_agg = pd.NamedAgg(column='delta', aggfunc=delta)
theta_agg = pd.NamedAgg(column='theta', aggfunc=theta)
gamma_agg = pd.NamedAgg(column='gamma', aggfunc=gamma)

In [67]:
abs_mean = lambda x: x.apply(abs).mean()
abs_mean_agg = pd.NamedAgg(column='abs_mean', aggfunc=abs_mean)

In [68]:
df_features = df_pp.groupby(['seizure_id', 'segment_id', "is_seizure"]).agg(
    {C:[
        # 'mean', 
        'std',
        'var',
        'mean',
        abs_mean_agg,
        delta_agg,
        theta_agg,
        gamma_agg
        ] for C in CHANNELS} | 
    {ic: ['first'] for ic in ignore_col}) # just taking first element for target column
df_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,channel,F4-C4,F4-C4,F4-C4,F4-C4,F4-C4,F4-C4,F4-C4,F3-C3,F3-C3,F3-C3,...,FP1-F3,FP1-F3,FP1-F3,CZ-PZ,CZ-PZ,CZ-PZ,CZ-PZ,CZ-PZ,CZ-PZ,CZ-PZ
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,std,var,mean,abs_mean,delta,theta,gamma,std,var,mean,...,delta,theta,gamma,std,var,mean,abs_mean,delta,theta,gamma
seizure_id,segment_id,is_seizure,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
0,1,False,25.278278,638.991344,0.642823,19.767362,99.724544,33.651173,0.131576,28.577184,816.655466,-0.13638,...,115.680103,29.233559,0.343249,35.447713,1256.540374,0.75887,27.466555,194.36462,76.886992,0.174189
0,2,False,24.908361,620.426452,0.459926,19.018136,80.467433,30.73937,0.113581,22.789379,519.355782,0.607864,...,98.999296,24.519002,0.36393,33.861154,1146.577756,0.040398,24.488472,187.41729,34.401955,0.181605
0,3,False,25.078313,628.921793,-0.336786,18.864175,97.815139,19.12002,0.108965,24.20861,586.056809,0.020483,...,76.273119,21.891098,0.347473,30.472674,928.583879,0.139786,23.040842,117.60652,31.99868,0.155509
0,4,False,23.796227,566.260437,0.57155,18.298645,78.057846,22.859691,0.112154,27.05993,732.239825,0.170771,...,111.418632,24.58484,0.32645,30.803236,948.839359,0.347514,23.812672,158.377845,31.605055,0.181355
0,5,False,28.030262,785.695596,-0.156618,21.895794,102.786435,31.697164,0.123064,28.681243,822.613674,0.019398,...,115.905093,37.75188,0.323275,39.120852,1530.441051,-0.032786,29.786483,215.122132,60.283108,0.170928


## Flatten Dataframe

In [69]:
# joining column names with agg functions, but leaving target 'is_seizure' column as 'is_seizure'.
df_features.columns = ['_'.join(col).strip() for col in df_features.columns.values if 'is_seizure' != col[0]] #+ ['is_seizure']
df_features.reset_index(inplace=True)
df_features.tail(30)


Unnamed: 0,seizure_id,segment_id,is_seizure,F4-C4_std,F4-C4_var,F4-C4_mean,F4-C4_abs_mean,F4-C4_delta,F4-C4_theta,F4-C4_gamma,...,FP1-F3_delta,FP1-F3_theta,FP1-F3_gamma,CZ-PZ_std,CZ-PZ_var,CZ-PZ_mean,CZ-PZ_abs_mean,CZ-PZ_delta,CZ-PZ_theta,CZ-PZ_gamma
1108,58,7,False,18.902304,357.297092,-0.129867,14.613813,43.050805,27.537749,0.187851,...,151.193533,19.045718,0.343143,22.489849,505.793304,-0.01632,17.600577,38.784601,43.715104,0.142116
1109,58,8,False,17.562436,308.439155,0.768381,13.545881,33.648257,21.871237,0.181689,...,40.342519,20.953553,0.300189,24.665634,608.3935,0.5393,19.100735,43.943237,61.133626,0.145097
1110,58,9,False,17.866366,319.20703,0.055482,14.015521,32.340037,22.670158,0.162112,...,71.5306,31.945473,0.309519,26.121613,682.338657,0.039282,19.493444,62.158532,71.514952,0.132452
1111,58,10,False,29.447813,867.173674,-0.110975,19.383158,40.970434,28.339451,6.148782,...,72.005667,26.83409,0.898667,23.714588,562.381698,0.191656,18.390121,49.758678,50.889884,0.233121
1112,58,11,False,15.692893,246.26688,0.41173,12.375227,28.056838,18.262682,0.16653,...,46.062897,27.492809,0.213767,18.091766,327.31198,0.173616,14.004999,27.480825,34.086131,0.149297
1113,58,12,False,33.805748,1142.828615,0.698367,25.411591,79.006955,21.153684,4.131121,...,257.892151,23.174721,1.26616,39.510151,1561.052071,0.63987,29.054309,140.890543,30.209887,0.643544
1114,58,13,False,49.455717,2445.867908,0.105825,31.285434,109.618573,36.115524,24.873477,...,204.952106,33.671004,8.519916,25.087019,629.358499,0.124016,18.997094,64.126837,44.241002,0.587274
1115,58,14,False,24.568884,603.630037,-1.073148,17.124432,39.848499,26.372584,3.444726,...,116.835295,20.371412,0.912723,19.933175,397.331484,-0.461689,15.708234,39.078317,35.143902,0.186331
1116,58,15,False,26.150749,683.861694,0.82102,18.560529,78.725586,35.386311,1.601313,...,318.915036,114.556493,1.303635,24.846112,617.329278,0.448621,18.753394,68.143516,54.886177,0.265559
1117,59,1,False,22.991423,528.605555,0.144024,17.439767,41.134349,16.197523,3.893597,...,52.493464,15.564249,7.029091,20.56303,422.838189,0.176004,15.276064,54.545782,23.015469,0.319742


In [70]:
from source.data_import import save_pyarrow

save_pyarrow(df_features, file_name="feature_extracted")

/home/weasel/reps/ai-seizure-detectives/source/../data/feature_extracted.arrow was successfully written.
