In [None]:

import os
import pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import torch
import torch.nn.functional as F
import torchvision.transforms as transforms
import timm
from torch.utils.data import Dataset, DataLoader
import pywt, librosa

USE_WAVELET = None 

NAMES = ['LL','LP','RP','RR','LZ','RZ']

FEATS = [['Fp1','F7','T3','T5','O1'],
         ['Fp1','F3','C3','P3','O1'],
         ['Fp2','F8','T4','T6','O2'],
         ['Fp2','F4','C4','P4','O2'],
         ['Fp1','Fz','Cz','Pz','O1'],
         ['Fp2','Fz','Cz','Pz','O2'],
        ]


# DENOISE FUNCTION
def maddest(d, axis=None):
    return np.mean(np.absolute(d - np.mean(d, axis)), axis)

def denoise(x, wavelet='haar', level=1):    
    coeff = pywt.wavedec(x, wavelet, mode="per")
    sigma = (1/0.6745) * maddest(coeff[-level])

    uthresh = sigma * np.sqrt(2*np.log(len(x)))
    coeff[1:] = (pywt.threshold(i, value=uthresh, mode='hard') for i in coeff[1:])

    ret=pywt.waverec(coeff, wavelet, mode='per')
    
    return ret

import librosa

def spectrogram_from_eeg(parquet_path, display=False):
    
    # LOAD MIDDLE 50 SECONDS OF EEG SERIES
    eeg = pd.read_parquet(parquet_path)
    middle = (len(eeg)-10_000)//2
    eeg = eeg.iloc[middle:middle+10_000]
    
    # VARIABLE TO HOLD SPECTROGRAM
    img = np.zeros((192,768,6),dtype='float32')
    
    if display: plt.figure(figsize=(10,12))
    signals = []
    for k in range(6):
        COLS = FEATS[k]
        
        for kk in range(4):
        
            # COMPUTE PAIR DIFFERENCES
            x = eeg[COLS[kk]].values - eeg[COLS[kk+1]].values

            # FILL NANS
            m = np.nanmean(x)
            if np.isnan(x).mean()<1: x = np.nan_to_num(x,nan=m)
            else: x[:] = 0

            # DENOISE
            if USE_WAVELET:
                x = denoise(x, wavelet=USE_WAVELET)
            signals.append(x)

            # RAW SPECTROGRAM
            mel_spec = librosa.feature.melspectrogram(y=x, sr=200, hop_length=len(x)//768, 
                  n_fft=1024, n_mels=192, fmin=0, fmax=20, win_length=128)

            # LOG TRANSFORM
            width = (mel_spec.shape[1]//32)*32
            mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max).astype(np.float32)[:,:width]

            # STANDARDIZE TO -1 TO 1
            mel_spec_db = (mel_spec_db+40)/40 
            img[:,:,k] += mel_spec_db
                
        # AVERAGE THE 4 MONTAGE DIFFERENCES
        img[:,:,k] /= 4.0
        
        if display:
            plt.subplot(3,2,k+1)
            plt.imshow(img[:,:,k],aspect='auto',origin='lower')
            plt.title(f'EEG {eeg_id} - Spectrogram {NAMES[k]}')
            
    if display: 
        plt.show()
        plt.figure(figsize=(10,7))
        offset = 0
        for k in range(6):
            if k>0: offset -= signals[3-k].min()
            plt.plot(range(10_000),signals[k]+offset,label=NAMES[3-k])
            offset += signals[3-k].max()
        plt.legend()
        plt.title(f'EEG {eeg_id} Signals')
        plt.show()
        print(); print('#'*25); print()
        
    return img

class CFG:
    base_dir = pathlib.Path("/kaggle/input/hms-harmful-brain-activity-classification")
    path_test = base_dir / "test.csv"
    path_submission = base_dir / "sample_submission.csv"
    spec_dir = base_dir / "test_spectrograms"
    model_name = "tf_efficientnet_b0_ns"

    model_weights = sorted(list(
        pathlib.Path("zzzzzzz").glob("*ft.pt")
    ))
    
    print(model_weights)

    transform = transforms.Resize((768, 768), antialias=False)
    batch_size = 16
    label_columns = [
        "seizure_vote",
        "lpd_vote",
        "gpd_vote",
        "lrda_vote",
        "grda_vote",
        "other_vote",
    ]
test = pd.read_csv(CFG.path_test)
submission = pd.read_csv(CFG.path_submission)
submission = pd.merge(submission, test, how="inner", on="eeg_id")
submission["path"] = submission["spectrogram_id"].map(lambda x: CFG.spec_dir / f"{x}.parquet")


# READ ALL EEG SPECTROGRAMS
PATH2 = '/kaggle/input/hms-harmful-brain-activity-classification/test_eegs/'
DISPLAY = 1
EEG_IDS2 = test.eeg_id.unique()
all_eegs2 = {}

print('Converting Test EEG to Spectrograms...'); print()
for i,eeg_id in enumerate(EEG_IDS2):
        
    # CREATE SPECTROGRAM FROM EEG PARQUET
    img = spectrogram_from_eeg(f'{PATH2}{eeg_id}.parquet', i<DISPLAY)
    all_eegs2[eeg_id] = img
    
from torch.utils.data import Dataset, DataLoader
def preprocess(x):
    m, s = x.mean(), x.std()
    x = (x - m) / (s + 1e-6)
    return x

def preprocess2(x):
    x = np.clip(x, np.exp(-6), np.exp(10))
    x = np.log(x)
    m, s = x.mean(), x.std()
    x = (x - m) / (s + 1e-6)
    return x


class SpecDataset(Dataset):
    
    def __init__(self, df, transform=CFG.transform):
        self.df = df
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        row = self.df.iloc[index]
        # input
        x = all_eegs2[row.eeg_id]
        x = [x[:,:,i+0:i+1] for i in range(6)]
        x1 = np.concatenate(x,axis=0)[:,:,0]
        x1 = preprocess(x1)
        # input
        x = pd.read_parquet(row.path)
        x = x.fillna(-1).values[:, 1:].T
        x2 = preprocess2(x)
        x2 = torch.Tensor(x2[None, :])
        x2 = np.array(CFG.transform(x2))[0]
        x = np.concatenate([x1,x2])

        
        x = torch.Tensor(x[None, :])
        if self.transform:
            x = self.transform(x)
        # output
        y = np.array(row.loc[CFG.label_columns].values, 'float32')
        y = torch.Tensor(y)
        return x, y
    
    

In [None]:
data_ds = SpecDataset(df=submission)
data_loader = DataLoader(dataset=data_ds, num_workers=os.cpu_count())
data_loader

In [None]:
x, y = next(iter(data_loader))
x.shape, x

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(x[0, 0])
plt.show()

In [None]:

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"DEVICE: {DEVICE}")
model = timm.create_model(model_name=CFG.model_name, pretrained=False, num_classes=6, in_chans=1)
model.to(DEVICE)
num_parameter = sum(x.numel() for x in model.parameters())
print(f"Model has {num_parameter} parameters.")

print(f"DEVICE: {DEVICE}")

prediction = pd.DataFrame(0.0, columns=CFG.label_columns, index=submission.index)
for i, path_weight in enumerate(CFG.model_weights):
    print(f"Model {i}: {path_weight}")
    model.load_state_dict(torch.load(path_weight))
    model.eval()
    with torch.no_grad():
        res = []
        for x, y in data_loader:
            x = x.to(DEVICE)
            pred = model(x)
            pred = F.softmax(pred, dim=1)
            pred = pred.detach().cpu().numpy()
            res.append(pred)
        res = np.concatenate(res)
        res = pd.DataFrame(res, columns=CFG.label_columns, index=submission.index)

        prediction = prediction + res
        print("\n")
        
prediction = prediction / len(CFG.model_weights)

submission[CFG.label_columns] = prediction
submission = submission[["eeg_id"] + CFG.label_columns]
submission.to_csv("submission.csv", index=None)


In [None]:
submission

In [None]:
submission.iloc[:,-6:].sum(axis=1)
