# DSHS 연합 Inference 코드

#### README에 data, weights 폴더 세팅을 공유하였으니 참고 부탁드립니다.
#### Run All만 하면 outputs/submission.csv가 생성되어야 합니다.
#### 만약 재현에 문제가 있다면 언제든지 cytotoxicity8@kaist.ac.kr으로 연락 부탁드립니다.

In [2]:
!pip install -r requirements.txt

[0m

In [2]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import torch

from tqdm import tqdm

from tsai.all import *

from dataset import SNUH_Dataset
from resnet import ResNet1d
from utils import flattening_prediction, validate_child, validate_adult
import config

import os
import joblib

In [3]:
prediction_df = pd.read_csv(os.path.join(config.DATA_PATH, config.INFO_FILENAME), index_col=0)
adult_model_columns = ['adult_resnet1', 'adult_resnet2', 'adult_inception1',
                  'adult_inception2', 'adult_rocket1', 'adult_rocket2']
                  
child_model_columns = ['child_resnet', 'child_inception', 'child_rocket']
prediction_df = prediction_df.assign(**dict.fromkeys(adult_model_columns, None))
prediction_df = prediction_df.assign(**dict.fromkeys(child_model_columns, None))

device = config.DEVICE

# Inference of a child dataset

### 저희는 .zip 파일을 인풋으로 받고 있습니다. 대회에서 제공된 ecg_child_numpy_valid.zip 형태를 활용해주시면 감사하겠습니다.

In [5]:
child_dataset = SNUH_Dataset(data_path=config.DATA_PATH, ecg_filename=config.CHILD_ECG_FILENAME, butterworth=True)

Butterworth filter


100%|██████████| 1100/1100 [00:03<00:00, 337.70it/s]


In [6]:
child_loader = torch.utils.data.DataLoader(
    child_dataset,
    batch_size=config.BATCH_SIZE,
    shuffle=False,
    num_workers=1,
    drop_last=False,
)

In [7]:
resnet = ResNet1d(input_dim=(12, 5000),
                     blocks_dim=list(zip([64, 128, 196, 256, 320], [5000, 1250, 250, 50, 10])),
                     kernel_size=17,
                     dropout_rate=0.5).to(device)

inception = InceptionTimePlus(c_in=12, c_out=1, seq_len=5000, nf=32, 
                          fc_dropout=0.5, nb_filters=None, depth=5).to(device)

In [8]:
resnet.load_state_dict(torch.load('weights/child/resnet_child_69_model.pth')['model'])
inception.load_state_dict(torch.load('weights/child/inception_child_79_model.pth')['model'])

<All keys matched successfully>

In [9]:
rocket = joblib.load('weights/child/rocket_child_ensemble5.pkl')

In [10]:
resnet.eval()
inception.eval()

for ecg, idx in tqdm(child_loader):
    ecg = ecg.float().cuda()

    with torch.no_grad():
        pred_age_resnet1 = resnet(ecg)
        pred_age_inception1 = inception(ecg)
        pred_age_rocket1 = rocket.predict(ecg.cpu().detach().numpy())
        
        pred_age_resnet1 = flattening_prediction(pred_age_resnet1, idx)
        pred_age_inception1 = flattening_prediction(pred_age_inception1, idx)


    for model, pred in zip(child_model_columns, [pred_age_resnet1, pred_age_inception1, pred_age_rocket1]):
        prediction_df.loc[idx, model] = pred

  0%|          | 0/5 [00:00<?, ?it/s]OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
100%|██████████| 5/5 [05:27<00:00, 65.46s/it]


In [11]:
child_pred = (prediction_df["child_resnet"] +prediction_df['child_inception'] + prediction_df["child_rocket"]) / 3.0
child_pred=  child_pred[child_pred.notna()]

prediction_df.loc[child_pred.index, "AGE"] = child_pred

In [12]:
tmp = prediction_df

In [13]:
prediction_df = validate_child(child_dataset, prediction_df).copy()

ecg_child_8791 includes a channel with only zeros.


In [14]:
assert sum(prediction_df["AGE"]<1/12) == 0

# Inference of a adult dataset

### 저희는 .zip 파일을 인풋으로 받고 있습니다. 대회에서 제공된 ecg_adult_numpy_valid.zip 형태를 활용해주시면 감사하겠습니다.

In [15]:
adult_dataset = SNUH_Dataset(data_path=config.DATA_PATH, ecg_filename=config.ADULT_ECG_FILENAME, butterworth=False)

In [16]:
adult_loader = torch.utils.data.DataLoader(
    adult_dataset, 
    batch_size=config.BATCH_SIZE,
    shuffle=False,
    num_workers=1,
    drop_last=False,
)

In [17]:
resnet1 = ResNet1d(input_dim=(12, 5000),
                     blocks_dim=list(zip([64, 128, 196, 256, 320], [5000, 1250, 250, 50, 10])),
                     kernel_size=17,
                     dropout_rate=0.5).to(device)

resnet2 = ResNet1d(input_dim=(12, 5000),
                     blocks_dim=list(zip([64, 128, 196, 256, 320], [5000, 1250, 250, 50, 10])),
                     kernel_size=17,
                     dropout_rate=0.5).to(device)

inception1 = InceptionTimePlus(c_in=12, c_out=1, seq_len=5000, nf=32,
                               conv_dropout = 0.2, fc_dropout=0.4, nb_filters=None, 
                               depth=5).to(device)

inception2 = InceptionTimePlus(c_in=12, c_out=1, seq_len=5000, nf=32,
                               fc_dropout=0.5, nb_filters=None,
                               depth=5).to(device)

In [18]:
resnet1.load_state_dict(torch.load('weights/adult/resnet1_39_model.pth')['model'])
resnet2.load_state_dict(torch.load('weights/adult/resnet2_39_model.pth')['model'])

inception1.load_state_dict(torch.load('weights/adult/inception1_39_model.pth')['model'])
inception2.load_state_dict(torch.load('weights/adult/inception2_39_model.pth')['model'])

<All keys matched successfully>

In [19]:
rocket1 = joblib.load('weights/adult/rocket1_features10000_dilation32_ensemble5.pkl')
rocket2 = joblib.load('weights/adult/rocket2_features12000_dilation36_ensemble3.pkl')

In [20]:
resnet1.eval()
resnet2.eval()
inception1.eval()
inception2.eval()

for ecg, idx in tqdm(adult_loader):
    ecg = ecg.float().cuda()

    with torch.no_grad():

        pred_age_resnet1 = resnet1(ecg)
        pred_age_resnet2 = resnet2(ecg)

        pred_age_inception1 = inception1(ecg)
        pred_age_inception2 = inception2(ecg)
        
        pred_age_rocket1 = rocket1.predict(ecg.cpu().detach().numpy())
        pred_age_rocket2 = rocket2.predict(ecg.cpu().detach().numpy())
        
        pred_age_resnet1 = flattening_prediction(pred_age_resnet1, idx)
        pred_age_resnet2 = flattening_prediction(pred_age_resnet2, idx)
        pred_age_inception1 = flattening_prediction(pred_age_inception1, idx)
        pred_age_inception2 = flattening_prediction(pred_age_inception2, idx)

    for model, pred in zip(adult_model_columns, [pred_age_resnet1, pred_age_resnet2, pred_age_inception1,
                 pred_age_inception2, pred_age_rocket1, pred_age_rocket2]):
        
        prediction_df.loc[idx, model] = pred

100%|██████████| 18/18 [28:04<00:00, 93.58s/it]


In [21]:
adult_pred = (prediction_df["adult_resnet1"] + prediction_df["adult_resnet2"] + prediction_df["adult_inception1"] + \
    prediction_df['adult_inception2'] + prediction_df['adult_rocket1'] + prediction_df['adult_rocket2'])/6.0
adult_pred=  adult_pred[adult_pred.notna()]


prediction_df.loc[adult_pred.index, "AGE"] = adult_pred

In [22]:
prediction_df = validate_adult(adult_dataset, prediction_df).copy()

ecg_adult_36819 includes a channel with only zeros.


In [23]:
assert sum(prediction_df["AGE"]>105) == 0

In [24]:
# AGE에는 결측값이 반드시 없어야 합니다.
assert sum(prediction_df["AGE"].isna()) == 0

In [5]:
prediction_df.to_csv(os.path.join(config.OUTPUT_PATH, "submission_check1.csv"))
prediction_df[["GENDER", "AGE"]].to_csv(os.path.join(config.OUTPUT_PATH, "submission1.csv"))