In [None]:
import os
import pandas as pd
from matplotlib import pyplot as plt

# Params
TARGETS = ['seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']

# Paths
root = '/media/latlab/MR/projects/kaggle-hms'
data_dir = os.path.join(root, 'data')
results_dir = os.path.join(root, 'results')
train_eeg_dir = os.path.join(data_dir, 'train_eegs')
train_spectrogram_dir = os.path.join(data_dir, 'train_spectrograms')

# Load data
df = pd.read_csv(os.path.join(data_dir, 'train.csv'))

# Normalize votes (for each instance)
y_data = df[TARGETS].values
y_data = y_data / y_data.sum(axis=1, keepdims=True)
df[TARGETS] = y_data

# Print number of patients
print('Number of patients: {}'.format(len(df['patient_id'].unique())))

df

In [None]:
train = df.groupby('eeg_id')[['spectrogram_id','spectrogram_label_offset_seconds']].agg(
    {'spectrogram_id':'first','spectrogram_label_offset_seconds':'min'})
train.columns = ['spec_id','min']

tmp = df.groupby('eeg_id')[['spectrogram_id','spectrogram_label_offset_seconds']].agg(
    {'spectrogram_label_offset_seconds':'max'})
train['max'] = tmp

tmp = df.groupby('eeg_id')[['patient_id']].agg('first')
train['patient_id'] = tmp

tmp = df.groupby('eeg_id')[TARGETS].agg('sum')
for t in TARGETS:
    train[t] = tmp[t].values
    
y_data = train[TARGETS].values
y_data = y_data / y_data.sum(axis=1,keepdims=True)
train[TARGETS] = y_data

tmp = df.groupby('eeg_id')[['expert_consensus']].agg('first')
train['target'] = tmp

train = train.reset_index()
print('Train non-overlapp eeg_id shape:', train.shape )
train.head()

In [None]:
# Get one EEG data
eeg_idx = 0
eeg_id = df['eeg_id'].unique()[eeg_idx]

# Load parquet EEG data
eeg_path = os.path.join(train_eeg_dir, f'{eeg_id}.parquet')
eeg = pd.read_parquet(eeg_path)

# Print EEG length on 200 Hz
print('EEG length: {} s'.format(len(eeg) / 200))
eeg


In [None]:
# Get one EEG data
spectrogram_idx = 0
spectrogram_id = df['spectrogram_id'].unique()[spectrogram_idx]

# Load parquet EEG data
spectrogram_path = os.path.join(train_spectrogram_dir, f'{spectrogram_id}.parquet')
spectrogram = pd.read_parquet(spectrogram_path)
spectrogram

In [None]:
spectrogram.columns