In [30]:
import pandas as pd
from source import data_import
from source.constants import CHANNELS

print(f"loaded channel names: {CHANNELS}")

loaded channel names: ['F4-C4', 'F3-C3', 'FT9-FT10', 'FZ-CZ', 'F7-T7', 'FP2-F4', 'T8-P8-1', 'T8-P8-0', 'FP1-F3', 'CZ-PZ']


In [31]:
# load data
df_pp = data_import.load_pyarrow(file_name="preprocessed_df")

/home/weasel/reps/ai-seizure-detectives/source/../data/preprocessed_df.arrow was loaded.


## Feature Extraction

In [32]:
df_features = df_pp.groupby(['seizure_id', 'segment_id']).agg(
    {C:['mean', 'std'] for C in CHANNELS} | 
    {'is_seizure': 'first'}) # just taking first element for target column
df_features.head()

Unnamed: 0_level_0,channel,F4-C4,F4-C4,F3-C3,F3-C3,FT9-FT10,FT9-FT10,FZ-CZ,FZ-CZ,F7-T7,F7-T7,...,FP2-F4,T8-P8-1,T8-P8-1,T8-P8-0,T8-P8-0,FP1-F3,FP1-F3,CZ-PZ,CZ-PZ,is_seizure
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,std,mean,std,mean,std,mean,std,mean,std,first
seizure_id,segment_id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
0,1,0.642823,25.278278,-0.13638,28.577184,0.10927,27.872715,0.072943,40.156441,1.195346,20.135111,...,25.380978,0.365832,22.583062,0.365832,22.583062,-0.383942,26.546966,0.75887,35.447713,False
0,2,0.459926,24.908361,0.607864,22.789379,0.984358,23.485323,0.457697,32.960306,0.657212,18.265025,...,23.32469,0.455336,21.314656,0.455336,21.314656,1.242494,25.584776,0.040398,33.861154,False
0,3,-0.336786,25.078313,0.020483,24.20861,-0.237448,20.733267,-0.281956,32.82972,-0.433444,17.771789,...,18.870577,-0.051306,20.050884,-0.051306,20.050884,-0.092616,22.492845,0.139786,30.472674,False
0,4,0.57155,23.796227,0.170771,27.05993,-0.130328,23.920272,0.457751,34.066019,0.129564,17.220239,...,22.235792,0.214385,19.831823,0.214385,19.831823,0.136081,24.970415,0.347514,30.803236,False
0,5,-0.156618,28.030262,0.019398,28.681243,0.157825,25.146513,0.097109,42.098599,0.096355,17.888231,...,29.556387,-0.188304,21.520493,-0.188304,21.520493,-0.279403,28.959603,-0.032786,39.120852,False


## Flatten Dataframe

In [33]:
# joining column names with agg functions, but leaving target 'is_seizure' column as 'is_seizure'.
df_features.columns = ['_'.join(col).strip() for col in df_features.columns.values if 'is_seizure' != col[0]] + ['is_seizure']
df_features.reset_index(inplace=True)
df_features.head(20)

Unnamed: 0,seizure_id,segment_id,F4-C4_mean,F4-C4_std,F3-C3_mean,F3-C3_std,FT9-FT10_mean,FT9-FT10_std,FZ-CZ_mean,FZ-CZ_std,...,FP2-F4_std,T8-P8-1_mean,T8-P8-1_std,T8-P8-0_mean,T8-P8-0_std,FP1-F3_mean,FP1-F3_std,CZ-PZ_mean,CZ-PZ_std,is_seizure
0,0,1,0.642823,25.278278,-0.13638,28.577184,0.10927,27.872715,0.072943,40.156441,...,25.380978,0.365832,22.583062,0.365832,22.583062,-0.383942,26.546966,0.75887,35.447713,False
1,0,2,0.459926,24.908361,0.607864,22.789379,0.984358,23.485323,0.457697,32.960306,...,23.32469,0.455336,21.314656,0.455336,21.314656,1.242494,25.584776,0.040398,33.861154,False
2,0,3,-0.336786,25.078313,0.020483,24.20861,-0.237448,20.733267,-0.281956,32.82972,...,18.870577,-0.051306,20.050884,-0.051306,20.050884,-0.092616,22.492845,0.139786,30.472674,False
3,0,4,0.57155,23.796227,0.170771,27.05993,-0.130328,23.920272,0.457751,34.066019,...,22.235792,0.214385,19.831823,0.214385,19.831823,0.136081,24.970415,0.347514,30.803236,False
4,0,5,-0.156618,28.030262,0.019398,28.681243,0.157825,25.146513,0.097109,42.098599,...,29.556387,-0.188304,21.520493,-0.188304,21.520493,-0.279403,28.959603,-0.032786,39.120852,False
5,0,6,0.48636,39.835651,0.217503,33.613739,0.122458,25.873762,0.156761,47.048994,...,35.851282,0.550318,22.05828,0.550318,22.05828,0.689219,33.831806,0.785863,37.863268,False
6,0,7,-0.493721,33.397087,0.464669,34.488052,0.721143,26.909594,-0.044024,45.575342,...,32.604831,0.026332,23.627218,0.026332,23.627218,-0.170747,35.702901,-1.110479,38.161832,False
7,0,8,0.617939,31.848664,0.029983,28.280461,-0.549961,26.22652,0.593422,37.852693,...,35.244524,0.500679,23.041928,0.500679,23.041928,0.197563,35.163717,0.608507,37.299374,False
8,0,9,0.113113,28.832383,-0.004529,29.117649,0.163063,24.426227,-0.08398,40.408078,...,30.023711,0.135233,21.409069,0.135233,21.409069,0.166556,26.039075,0.559385,32.210621,False
9,0,10,0.245758,39.515694,0.298746,40.212388,0.597114,25.445978,0.553471,54.590007,...,42.920719,0.141164,22.471298,0.141164,22.471298,0.098476,37.538723,-0.083512,37.897142,False


In [34]:
from source.data_import import save_pyarrow

save_pyarrow(df_features, file_name="feature_extracted")

/home/weasel/reps/ai-seizure-detectives/source/../data/feature_extracted.arrow was successfully written.
