In [52]:
import os
import gc
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import matplotlib
import glob
import sklearn
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, KFold

from tqdm.auto import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchinfo
from torchinfo import summary
from torch.utils.data import DataLoader, Dataset

In [53]:
## 많은 라이브러릴 사용하지 않았기 때문에, 아래의 라이브러리 버전만을 출력합니다.
## 아래의 버전만 맞추어 주신다면 충분히 코드의 재현이 될 것이지만, 문제 생길경우 문의주시면 감사하겠습니다.
print(f"sklearn ver : {sklearn.__version__}" )
print(f"torch ver : {torch.__version__}" )
print(f"pandas ver : {pd.__version__}" )
print(f"numpy ver : {np.__version__}" )
print(f"torchinfo ver : {torchinfo.__version__}" )
print(f"matplotlib ver : {matplotlib.__version__}" )
print("OS : M1 Pro 14inch Ventura")

sklearn ver : 1.2.2
torch ver : 2.0.1
pandas ver : 2.0.3
numpy ver : 1.23.2
torchinfo ver : 1.7.2
matplotlib ver : 3.7.1
OS : M1 Pro 14inch Ventura


In [54]:
device = torch.device("mps" if  torch.backends.mps.is_available() else "cpu")
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
'''
cuda사용시 2번째 줄의 코드를 주석 해제하시면 됩니다.
'''

'\ncuda사용시 2번째 줄의 코드를 주석 해제하시면 됩니다.\n'

In [55]:
TIMESTEP = 5000
EPOCHS = 70
LR = 1e-3
BS = 32
SEED = 42
MODEL_NAME ='Conv1d_ver_7'
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
seed_everything(SEED) # Seed 고정

In [56]:
#Reference : https://doheon.github.io/%EC%BD%94%EB%93%9C%EA%B5%AC%ED%98%84/time-series/ci-3.lstm-post/
class Model(nn.Module):
    def __init__(self, input_size):
        super(Model, self).__init__()
        self.input_size = input_size
        print
        self.conv1 = self.return_conv(self.input_size, 16)
        self.conv2 = self.return_conv(16, 32)
        self.conv3 = self.return_conv(32, 64)
        self.conv4 = self.return_conv(64, 128)
        self.gap = nn.AdaptiveAvgPool1d(1)
        self.output = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(128+1,1)
        )
            
        
        self.init_weight()
        
    def forward(self, x, sex):
        x = x.transpose(1,2)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.gap(x)
        x = x.transpose(1,2)
        x = x.squeeze(1)
        x = self.output(torch.cat((x, sex.reshape(-1,1)), dim=1))
        return x
    
    def init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.GroupNorm):
                nn.init.constant(m.weight, 1)
                nn.init.constant(m.bias, 0)
            elif isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight)
                nn.init.zeros_(m.bias)
                
    def return_conv(self, input_s, output_s):
        layer = nn.Sequential(
            nn.Conv1d(in_channels =  input_s, out_channels = output_s, kernel_size = 3, padding='same'),
            nn.ReLU(),
            nn.GroupNorm(1,output_s),
            nn.Conv1d(in_channels =  output_s, out_channels = output_s, kernel_size = 3, padding='same'),
            nn.ReLU(),
            nn.GroupNorm(1,output_s),
            nn.Conv1d(in_channels =  output_s, out_channels = output_s, kernel_size = 3, padding='same'),
            nn.ReLU(),
            nn.GroupNorm(1,output_s),
            nn.AvgPool1d(2)
        )
        return layer
            

In [57]:
class TestDataset(Dataset):
    def __init__(self, origin_x, sex):
        self.data = origin_x
        self.sex = sex
    def __getitem__(self, index):
        self.x = self.data[index]
        return self.x, self.sex[index]
    def __len__(self):
        return len(self.data)


In [58]:
ADULT_DATA_FOLDER = "ECG_adult_numpy_valid" # ex. ECG_adult_numpy_valid 이런식으로 문자열 입력
CHILD_DATA_FOLDER = "ECG_child_numpy_valid" # ex. ECG_adult_numpy_valid 이런식으로 문자열 입력
TEST_CSV = "submission.csv" #대회에서 주어진 submission.csv와 같은 .csv 파일명 입력

'''
경로

--- inference.ipynb
--- submission.csv
--- models
    --- .pth 파일들..
    --- .pth 파일들..
--- ECG_adult_numpy_test
    --- .npy 파일들.. 
    --- .npy 파일들..
--- ECG_child_numpy_test
    --- .npy 파일들.. 
    --- .npy 파일들..
'''

'''
예시 코드입니다. 아래와 같이 경로가 설정될 수 있께 위의 변수들을 설정하여주세요.
adult_data_test_num = len(os.listdir('./ECG_adult_numpy_valid'))
child_data_test_num = len(os.listdir('./ECG_child_numpy_valid'))

test_csv= pd.read_csv('./submission.csv')
'''


"\n예시 코드입니다. 아래와 같이 경로가 설정될 수 있께 위의 변수들을 설정하여주세요.\nadult_data_test_num = len(os.listdir('./ECG_adult_numpy_valid'))\nchild_data_test_num = len(os.listdir('./ECG_child_numpy_valid'))\n\ntest_csv= pd.read_csv('./submission.csv')\n"

In [59]:
adult_data_test_num = len(os.listdir(f'./{ADULT_DATA_FOLDER}'))
child_data_test_num = len(os.listdir(f'./{CHILD_DATA_FOLDER}'))

test_csv= pd.read_csv(f'./{TEST_CSV}')
test_csv

Unnamed: 0,FILENAME,GENDER,AGE
0,ecg_child_8781,FEMALE,
1,ecg_child_8782,MALE,
2,ecg_child_8783,MALE,
3,ecg_child_8784,FEMALE,
4,ecg_child_8785,FEMALE,
...,...,...,...
5470,ecg_adult_39536,FEMALE,
5471,ecg_adult_39537,FEMALE,
5472,ecg_adult_39538,FEMALE,
5473,ecg_adult_39539,FEMALE,


In [61]:
test_csv.loc[test_csv['GENDER'] == 'MALE',  'GENDER'] = 0
test_csv.loc[test_csv['GENDER'] == 'FEMALE',  'GENDER'] = 1
test_csv.loc[test_csv['GENDER'] == 'unknown',  'GENDER'] = 0
test_csv

Unnamed: 0,FILENAME,GENDER,AGE
0,ecg_child_8781,1,
1,ecg_child_8782,0,
2,ecg_child_8783,0,
3,ecg_child_8784,1,
4,ecg_child_8785,1,
...,...,...,...
5470,ecg_adult_39536,1,
5471,ecg_adult_39537,1,
5472,ecg_adult_39538,1,
5473,ecg_adult_39539,1,


In [62]:
test_adult_path = [f'./{ADULT_DATA_FOLDER}/{name}.npy' for name in test_csv['FILENAME'][1100:]]
test_child_path = [f'./{CHILD_DATA_FOLDER}/{name}.npy' for name in test_csv['FILENAME'][:1100]] 
print(test_adult_path[:10])
print(len(test_adult_path))
print(len(test_child_path))

'''
Test csv 형식이 주어진 submission.csv와 동일하다고 가정합니다.
Test csv가 Child 1100명 -> adult 4375명 순으로 인덱싱이 되어있다고 가정하였고,
만약 실제 테스트 데이터의 child 인원수가 다르다면 이 셀의 1100의 숫자와, 바로 아래의셀의 adult_sex_tmp=test_csv['GENDER'][1100+i]부분의 
1100숫자를 child의 인원수에맞게 수정해주시면 됩니다.
'''

['./ECG_adult_numpy_valid/ecg_adult_35141.npy', './ECG_adult_numpy_valid/ecg_adult_35142.npy', './ECG_adult_numpy_valid/ecg_adult_35143.npy', './ECG_adult_numpy_valid/ecg_adult_35144.npy', './ECG_adult_numpy_valid/ecg_adult_35145.npy', './ECG_adult_numpy_valid/ecg_adult_35146.npy', './ECG_adult_numpy_valid/ecg_adult_35147.npy', './ECG_adult_numpy_valid/ecg_adult_35148.npy', './ECG_adult_numpy_valid/ecg_adult_35149.npy', './ECG_adult_numpy_valid/ecg_adult_35150.npy']
4375
1100


"\nTest csv 형식이 주어진 submission.csv와 동일하다고 가정합니다.\nTest csv가 Child 1100명 -> adult 4375명 순으로 인덱싱이 되어있다고 가정하였고,\n만약 실제 테스트 데이터의 child 인원수가 다르다면 이 셀의 1100의 숫자와, 바로 아래의셀의 adult_sex_tmp=test_csv['GENDER'][1100+i]부분의 \n1100숫자를 child의 인원수에맞게 수정해주시면 됩니다.\n"

In [63]:
adult_test_np = []
adult_test_sex =[]
for i, path in tqdm(enumerate(test_adult_path)):
    adult_test_np_tmp = np.load(path).reshape(5000,-1)[:,:]
    adult_sex_tmp = test_csv['GENDER'][1100+i]
    adult_test_np.append(adult_test_np_tmp)
    adult_test_sex.append(adult_sex_tmp)
adult_test_np = np.array(adult_test_np)
adult_test_sex = np.array(adult_test_sex)
adult_test_np.shape

0it [00:00, ?it/s]

(4375, 5000, 12)

In [64]:
child_test_np = []
child_test_sex = []
for i, path in tqdm(enumerate(test_child_path)):
    child_test_np_tmp = np.load(path).reshape(5000,-1)[:,:]
    child_sex_tmp = test_csv['GENDER'][i]
    child_test_np.append(child_test_np_tmp)
    child_test_sex.append(child_sex_tmp)
child_test_np = np.array(child_test_np)
child_test_sex = np.array(child_test_sex)
child_test_np.shape

0it [00:00, ?it/s]

(1100, 5000, 12)

In [65]:
test_adult_dataset = TestDataset(torch.tensor(adult_test_np), torch.tensor(adult_test_sex))
test_adult_loader = DataLoader(test_adult_dataset, batch_size=BS, shuffle=False)
test_child_dataset = TestDataset(torch.tensor(child_test_np), torch.tensor(child_test_sex))
test_child_loader = DataLoader(test_child_dataset, batch_size=BS, shuffle=False)

In [66]:
child_pred_list = []
adult_pred_list = []
for fold in tqdm(range(1,11)):
    adult_pred = []
    child_pred = []
    pretrained_adult_model = torch.load('./model/' + MODEL_NAME + f"_adult_{fold}.pth")
    pretrained_adult_model = pretrained_adult_model.to(device)
    pretrained_adult_model.eval()
    pretrained_child_model = torch.load('./model/' + MODEL_NAME + f"_child_{fold}.pth")
    pretrained_child_model = pretrained_child_model.to(device)
    pretrained_child_model.eval()
    with torch.no_grad():
        for x, sex in iter(test_adult_loader):
            x = x.float().to(device)
            sex = sex.to(device)
            batch_pred = pretrained_adult_model(x,sex).squeeze(1)    
            adult_pred += batch_pred.cpu().detach().tolist()
    with torch.no_grad():
        for x, sex in iter(test_child_loader):
            x = x.float().to(device)
            sex = sex.to(device)
            batch_pred = pretrained_child_model(x,sex).squeeze(1) 
            child_pred += batch_pred.cpu().detach().tolist()
    adult_pred_list.append(adult_pred)
    child_pred_list.append(child_pred)

adult_pred_arr = np.array(adult_pred_list)
child_pred_arr = np.array(child_pred_list)
print(adult_pred_arr.shape)
print(child_pred_arr.shape)

  0%|          | 0/10 [00:00<?, ?it/s]

(10, 4375)
(10, 1100)


In [67]:
adult_pred = adult_pred_arr.mean(axis=0).tolist()
child_pred = child_pred_arr.mean(axis=0).tolist()
len(adult_pred)

4375

In [None]:
pred = child_pred+adult_pred

test_csv['AGE']=pred
test_csv

In [47]:
test_csv.to_csv(f'./{MODEL_NAME}.csv')