In [67]:
import pandas as pd
import torch
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
from typing import Union, Dict, List
import numpy as np
from sklearn import preprocessing
from torch.utils.data import Dataset
from datetime import datetime
import matplotlib.pyplot as plt
from scipy import signal
from scipy.signal import argrelmax
from sklearn.metrics import mean_squared_error

from data import ChildInstituteDataset, preprocess, to_list, extract_keys
from inference import inference

In [9]:
data = pd.read_parquet('./data/merged_data_labelled.parquet')
window_size, step_size = 360, 1
unique_series_id = data['series_id'].unique()
config = {}

In [86]:
cnt = 0
for series_id in unique_series_id:
    if cnt > 0:
        break
    cnt += 1
    series_data = data[data['series_id']==series_id].copy()
    series_data.reset_index(drop=True, inplace=True)

    preporcessed = preprocess(series_data)
    d_list = to_list(preporcessed, window_size, config, step_size)
    d_keys = extract_keys(preporcessed, window_size, step_size)

    for i, key in enumerate(d_keys):
        key['X'] = d_list[i]
    d_list = d_keys

    d_dataset = ChildInstituteDataset(d_list)
    d_dataloader = DataLoader(d_dataset,
                              batch_size = 128,
                              shuffle=False
                              )
    
    model = torch.jit.load('./tensorboard/new_label,cross_entropy,all,window_size=360,batch_size=512,NAdam/version_0/saved_models/best_model.pth',
                           map_location=torch.device("cpu"))
    model.to(torch.device("cpu"))
    model.eval()

    all_outputs = []
    all_series_ids = []
    with torch.no_grad():
        with tqdm(d_dataloader, leave=True) as pbar:
            for i, batch in enumerate(pbar):
                inputs = batch['X']
                steps = batch['step']
                series_ids = batch['series_id']

                outputs = model({'X':inputs})
                all_outputs.append(outputs.sigmoid().numpy().astype(np.float32))

  0%|          | 0/2075 [00:00<?, ?it/s]

In [35]:
def lpf(wave, fs=12*60*24, fe=60, n=3):
    nyq = fs / 2.0
    b, a = signal.butter(1, fe/nyq, btype='low')
    for i in range(0, n):
        wave = signal.filtfilt(b, a, wave)
    return wave

In [None]:
submission = pd.DataFrame()
onset_candis = []
wakeup_candis = []
for i, output in tqdm(enumerate(all_outputs), total=len(all_outputs)):
    series_id = i
    before_RMSE = np.sqrt(mean_squared_error(output, np.zeros_like(output)))

    output[:, 0]= torch.from_numpy(lpf(output[:,0]).copy())
    output[:, 1]= torch.from_numpy(lpf(output[:,1]).copy())

    after_RMSE = np.sqrt(mean_squared_error(output, np.zeros_like(output)))

    decay_ratio = before_RMSE/after_RMSE
    outputs *= decay_ratio

    onset_candi = argrelmax(output[:, 0], order=12*60*6)
    wakeup_candi = argrelmax(output[:, 1], order=12*60*6)
    try:                
        onset_candi = onset_candi[0]
        onset = pd.DataFrame({'series_id': [series_id]*len(onset_candi),
                            'step': onset_candi,
                            'score': output[onset_candi, 0].reshape(-1),
                            'event': ['onset']*len(onset_candi)})
    except:
        onset = pd.DataFrame()

    try:
        wakeup_candi = wakeup_candi[0]
        wakeup = pd.DataFrame({'series_id': [series_id]*len(wakeup_candi),
                            'step': wakeup_candi,
                            'score': output[wakeup_candi, 1].reshape(-1),
                            'event': ['wakeup']*len(wakeup_candi)})
    except:
        wakeup = pd.DataFrame()

    submission = pd.concat([submission, onset, wakeup], axis=0)

In [88]:
submission

Unnamed: 0,series_id,step,score,event
0,3.0,15,0.001735,onset
0,20.0,49,0.045914,onset
0,27.0,99,0.009282,onset
0,30.0,91,0.967838,wakeup
0,50.0,3,0.040047,wakeup
...,...,...,...,...
0,2036.0,88,0.912018,wakeup
0,2046.0,32,0.021753,onset
0,2047.0,96,0.000247,onset
0,2051.0,79,0.011105,onset


In [89]:
submission = submission.sort_values(['step']).reset_index(drop=True)

In [90]:
submission

Unnamed: 0,series_id,step,score,event
0,801.0,1,0.030924,onset
1,1910.0,1,0.006192,onset
2,666.0,2,0.000879,onset
3,50.0,3,0.040047,wakeup
4,311.0,6,0.011401,wakeup
...,...,...,...,...
263,1534.0,114,0.001454,wakeup
264,221.0,117,0.001034,onset
265,171.0,122,0.106004,wakeup
266,1495.0,125,0.027746,onset


In [None]:
submission = submission.sort_values(['series_id','step']).reset_index(drop=True)
submission['row_id'] = submission.index.astype(int)
submission['score'] = submission['score'].fillna(submission['score'].mean())
submission = submission[['row_id','series_id','step','event','score']]
submission['step'] = submission['step']
submission.to_csv('submission.csv',index=False)

In [None]:
def inference(model_path: str, test_dataloader: DataLoader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = torch.jit.load(model_path, map_location=device)
    model.to(device)
    model.eval()

    all_series_ids = []
    all_steps = []
    all_events = []
    all_scores = []
    all_dates = []

    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc='Inference'):
            inputs = batch['X'].to(device)
            series_ids = batch['series_id']
            steps = batch['step'].cpu().numpy()
            dates = batch['date'].cpu().numpy()

            outputs = model({'X': inputs})
            probabilities = torch.softmax(outputs, dim=1)
            predictions = torch.argmax(probabilities, dim=1)
            scores = probabilities.detach().cpu().numpy()

            all_series_ids.extend(series_ids.tolist())
            all_steps.extend(steps.tolist())
            all_dates.extend(dates.tolist())

            batch_predictions = predictions.cpu().numpy().astype(int).tolist()
            for pred in batch_predictions:
                all_events.append(pred)

            for score in scores:
                all_scores.append(max(score))

    submission = pd.DataFrame({
        'series_id': all_series_ids,
        'step': all_steps,
        'date': all_dates,
        'event': all_events,
        'score': all_scores
    })

    return submission