In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset

# 데이터셋 구성

In [2]:
class SequentialHmData(Dataset):
    def __init__(self, feature_path, df_path):
        self.feature_df = pd.read_csv(feature_path)
        self.ref_df = pd.read_csv(df_path)
        self.person_ids = self.ref_df.study_instance_uid.unique()
        
    def __getitem__(self, index):
        current_person_id = self.person_ids[index]
        filenames = self.ref_df[self.ref_df.study_instance_uid==current_person_id].filename

        df_current_person = self.feature_df[self.feature_df.filename.isin(filenames)]
        label = df_current_person.iloc[:,1:7]
        features = df_current_person.iloc[:,7:]
        
        
        
        return label, features
    
    def __len__(self):
        return len(self.ref_df.study_instance_uid.unique())

In [3]:
train_dataset = SequentialHmData(feature_path='./dataset/train_features.csv', df_path='./dataset/train.csv')
valid_dataset = SequentialHmData(feature_path='./dataset/valid_features.csv', df_path='./dataset/valid.csv')
test_dataset = SequentialHmData(feature_path='./dataset/test_features.csv', df_path='./dataset/test.csv')

In [5]:
train_dataset[0][0]

Unnamed: 0,label_0,label_1,label_2,label_3,label_4,label_5
0,0.001414,0.01582,0.008004,0.018745,0.013196,0.033904
1,0.001421,0.016012,0.008033,0.019219,0.013521,0.036777
2,0.001357,0.016703,0.007739,0.018858,0.013182,0.035088
3,0.00146,0.016845,0.007683,0.019069,0.013656,0.036148
4,0.001424,0.016073,0.007486,0.018949,0.013299,0.035246
5,0.001428,0.01613,0.007555,0.0188,0.013492,0.035669
6,0.001432,0.016321,0.007848,0.019178,0.013795,0.036316
7,0.001439,0.016626,0.007837,0.018888,0.013559,0.035537
8,0.00137,0.016455,0.007539,0.019107,0.013448,0.035402
9,0.00146,0.01622,0.00785,0.018771,0.013163,0.036393


# 모델 생성

In [212]:
class SequenceModel(nn.Module):
    def __init__(self, ch_in=1024):
        super(SequenceModel, self).__init__()
        drop_out = 0.5
        hidden = 96
        lstm_layers = 2
        feature_num=1
        ratio = 1
        self.ratio=ratio
        
        # seq model 1
        self.fea_conv = nn.Sequential(nn.Dropout2d(drop_out),
                                      nn.Conv2d(ch_in, 512, kernel_size=(1, 1), stride=(1,1),padding=(0,0), bias=False),
                                      nn.BatchNorm2d(512),
                                      nn.ReLU(),
                                      nn.Dropout2d(drop_out),
                                      nn.Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0), bias=False),
                                      nn.BatchNorm2d(128),
                                      nn.ReLU(),
                                      nn.Dropout2d(drop_out),
                                      )

        self.fea_first_final = nn.Sequential(nn.Conv2d(128*feature_num, 6, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0), bias=True))

        # # bidirectional GRU
        self.hidden_fea = hidden
        self.fea_lstm = nn.GRU(128*feature_num, self.hidden_fea, num_layers=lstm_layers, batch_first=True, bidirectional=True)
        self.fea_lstm_final = nn.Sequential(nn.Conv2d(1, 6, kernel_size=(1, self.hidden_fea*2), stride=(1, 1), padding=(0, 0), dilation=1, bias=True))
        
        
        # seq model 2
        self.conv_first = nn.Sequential(nn.Conv2d(12, 128*ratio, kernel_size=(5, 1), stride=(1,1),padding=(2,0),dilation=1, bias=False),
                                        nn.BatchNorm2d(128*ratio),
                                        nn.ReLU(),
                                        nn.Conv2d(128*ratio, 64*ratio, kernel_size=(3, 1), stride=(1, 1), padding=(2, 0),dilation=2, bias=False),
                                        nn.BatchNorm2d(64*ratio),
                                        nn.ReLU())

        self.conv_res = nn.Sequential(nn.Conv2d(64 * ratio, 64 * ratio, kernel_size=(3, 1), stride=(1, 1),padding=(4, 0),dilation=4, bias=False),
                                      nn.BatchNorm2d(64 * ratio),
                                      nn.ReLU(),
                                      nn.Conv2d(64 * ratio, 64 * ratio, kernel_size=(3, 1), stride=(1, 1),padding=(2, 0),dilation=2, bias=False),
                                      nn.BatchNorm2d(64 * ratio),
                                      nn.ReLU())

        self.conv_final = nn.Sequential(nn.Conv2d(64*ratio, 1, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), dilation=1,bias=False))

        # bidirectional GRU
        self.hidden = hidden
        self.lstm = nn.GRU(64*ratio, self.hidden, num_layers=lstm_layers, batch_first=True, bidirectional=True)
        self.final = nn.Sequential(nn.Conv2d(1, 6, kernel_size=(1, self.hidden*2), stride=(1, 1), padding=(0, 0), dilation=1, bias=True))


    def forward(self, features, x):
        
        batch_size, _, _, _ = features.shape
        
        # stem_fc
        x_fc = self.fea_conv(features) # (N, LenFeat, 1, LenSeq)
        
        # fc
        out11 = self.fea_first_final(x_fc) # (N, 6, 1, LenSeq)

        # lstm
        x_lstm, _ = self.fea_lstm(x_fc.view(batch_size, -1, 128)) # (N, LenSeq,192)
        x_lstm = x_lstm.reshape(batch_size, 1, -1, self.hidden_fea*2) # (N, 1, LenSeq, 192)
        out12 = self.fea_lstm_final(x_lstm) # (N, 6, 1, LenSeq)
        out12 = out12.permute(0,1,3,2)
        
        # seq1 output
        out1 = out11+out12
        out1_sigmoid = torch.sigmoid(out1)
        
        # concat cnn out, seq1 out
        x = torch.cat([x, out1_sigmoid], dim=1)
        
        # stem_fc
        x = self.conv_first(x)
        x = self.conv_res(x)
        
        # fc
        out21 = self.conv_final(x)
        
        # lstm
        x, _ = self.lstm(x.view(batch_size, -1, 64))
        x = x.reshape(batch_size, 1, -1, self.hidden*2)
        out22 = self.final(x)
        out22 = out22.permute(0,1,3,2)
        
        # seq2 output
        out2 = out21 + out22
        
        return out1, out2
    
model = SequenceModel(ch_in=1024)
f = np.zeros([10,1024,1,35])
x = np.zeros([10, 6, 1, 35])

feature = torch.from_numpy(f).float()
inputs = torch.from_numpy(x).float()

o1, o2 = model(feature, inputs)
o1.shape, o2.shape

(torch.Size([10, 6, 1, 35]), torch.Size([10, 6, 1, 35]))

In [206]:
rnn = nn.GRU(1024, 6, num_layers=2, bidirectional=True)
input = torch.randn(100, 35, 1024) # Batch size, sequence length, feature size

output, hn = rnn(input) # batch size, sequence length, output feature size

In [207]:
output.shape

torch.Size([100, 35, 12])