In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2

In [2]:
class CFG:
    verbose = 1
    seed = 42
    preset = "efficientnetv2_b2_imagenet"
    image_size = [400, 300]
    epochs = 20
    batch_size = 64
    lr_mode = 'cos'
    drop_reminder = True
    num_classes = 6
    fold = 0
    class_names = ['Seizure', 'LPD', 'GPD', 'LRDA','GRDA', 'Other']
    label2name = dict(enumerate(class_names))
    name2label = {v:k for k, v in label2name.items()}

In [3]:
cfg = CFG()

In [4]:
cfg.name2label['Seizure']

0

In [5]:
root = './hms-harmful-brain-activity-classification'

In [49]:
data_df = pd.read_csv(root + 'train.csv')

In [50]:
data_df.head(10)

Unnamed: 0,eeg_id,eeg_sub_id,eeg_label_offset_seconds,spectrogram_id,spectrogram_sub_id,spectrogram_label_offset_seconds,label_id,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,1628180742,0,0.0,353733,0,0.0,127492639,42516,Seizure,3,0,0,0,0,0
1,1628180742,1,6.0,353733,1,6.0,3887563113,42516,Seizure,3,0,0,0,0,0
2,1628180742,2,8.0,353733,2,8.0,1142670488,42516,Seizure,3,0,0,0,0,0
3,1628180742,3,18.0,353733,3,18.0,2718991173,42516,Seizure,3,0,0,0,0,0
4,1628180742,4,24.0,353733,4,24.0,3080632009,42516,Seizure,3,0,0,0,0,0
5,1628180742,5,26.0,353733,5,26.0,2413091605,42516,Seizure,3,0,0,0,0,0
6,1628180742,6,30.0,353733,6,30.0,364593930,42516,Seizure,3,0,0,0,0,0
7,1628180742,7,36.0,353733,7,36.0,3811483573,42516,Seizure,3,0,0,0,0,0
8,1628180742,8,40.0,353733,8,40.0,3388718494,42516,Seizure,3,0,0,0,0,0
9,2277392603,0,0.0,924234,0,0.0,1978807404,30539,GPD,0,0,5,0,1,5


In [51]:
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 106800 entries, 0 to 106799
Data columns (total 15 columns):
 #   Column                            Non-Null Count   Dtype  
---  ------                            --------------   -----  
 0   eeg_id                            106800 non-null  int64  
 1   eeg_sub_id                        106800 non-null  int64  
 2   eeg_label_offset_seconds          106800 non-null  float64
 3   spectrogram_id                    106800 non-null  int64  
 4   spectrogram_sub_id                106800 non-null  int64  
 5   spectrogram_label_offset_seconds  106800 non-null  float64
 6   label_id                          106800 non-null  int64  
 7   patient_id                        106800 non-null  int64  
 8   expert_consensus                  106800 non-null  object 
 9   seizure_vote                      106800 non-null  int64  
 10  lpd_vote                          106800 non-null  int64  
 11  gpd_vote                          106800 non-null  i

In [63]:
data_df['eeg_path'] = f'{root}/train_eegs' + data_df['eeg_id'].astype(str) + '.parquet'
data_df['spectrogram_path'] = f'{root}/train_spectrograms' + data_df['spectrogram_id'].astype(str) + '.parquet'
data_df['spectrogram_path_npy'] = f'{root}/train_spectrograms' + data_df['spectrogram_id'].astype(str) + '.npy'
data_df['class_name'] = data_df.expert_consensus.copy()
data_df['class_label'] = data_df.expert_consensus.map(CFG.name2label)

In [64]:
data_df.head(5)

Unnamed: 0,eeg_id,eeg_sub_id,eeg_label_offset_seconds,spectrogram_id,spectrogram_sub_id,spectrogram_label_offset_seconds,label_id,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote,eeg_path,spectrogram_path,spectrogram_path_npy,class_name,class_label
0,1628180742,0,0.0,353733,0,0.0,127492639,42516,Seizure,3,0,0,0,0,0,./hms-harmful-brain-activity-classification/tr...,./hms-harmful-brain-activity-classification/tr...,./hms-harmful-brain-activity-classification/tr...,Seizure,0
1,1628180742,1,6.0,353733,1,6.0,3887563113,42516,Seizure,3,0,0,0,0,0,./hms-harmful-brain-activity-classification/tr...,./hms-harmful-brain-activity-classification/tr...,./hms-harmful-brain-activity-classification/tr...,Seizure,0
2,1628180742,2,8.0,353733,2,8.0,1142670488,42516,Seizure,3,0,0,0,0,0,./hms-harmful-brain-activity-classification/tr...,./hms-harmful-brain-activity-classification/tr...,./hms-harmful-brain-activity-classification/tr...,Seizure,0
3,1628180742,3,18.0,353733,3,18.0,2718991173,42516,Seizure,3,0,0,0,0,0,./hms-harmful-brain-activity-classification/tr...,./hms-harmful-brain-activity-classification/tr...,./hms-harmful-brain-activity-classification/tr...,Seizure,0
4,1628180742,4,24.0,353733,4,24.0,3080632009,42516,Seizure,3,0,0,0,0,0,./hms-harmful-brain-activity-classification/tr...,./hms-harmful-brain-activity-classification/tr...,./hms-harmful-brain-activity-classification/tr...,Seizure,0


In [None]:
def classify_data():
    pass

In [52]:
index = np.random.randint(0, len(data_df.axes[0]))
spectro_id = data_df.iloc[index]['spectrogram_id']
spectro_filename = f'{spectro_id}.parquet'
spectro_path = root_data + f'{spectro_filename}'

In [56]:
df = pd.read_parquet(spectro_path)

ImportError: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.

In [55]:
img.imshow()

AttributeError: 'NoneType' object has no attribute 'imshow'