In [1]:
import numpy as np 
import pandas as pd
import torch
import pickle


class Dataset_ECG_pytorch(torch.utils.data.Dataset):
    """
        Build ECG dataset
    """
    def __init__(self, list_id, list_age, list_sex, list_recording, list_labels_oh=None, num_classes=12):
        """
            dataset을 읽어들여 id, age, sex, recording, labels를 저장한 list를 만들어 줍니다.
        """
        self.sample_id = torch.tensor(list_id)
        self.sample_age = torch.tensor(list_age)
        self.sample_sex = torch.tensor(list_sex)
        self.sample_recording = torch.tensor(list_recording)

        length = len(list_id)
        assert length==len(self.sample_id)
        assert length==len(self.sample_age)
        assert length==len(self.sample_sex)
        assert length==len(self.sample_recording)

        if not list_labels_oh is None:
            self.train = True
            self.sample_labels = torch.tensor(list_labels_oh)
            assert length==len(self.sample_labels)
        
        self.num_samples = length
        
        print(f'Loaded {self.num_samples} samples...')

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        result = {
            "id": self.sample_id[idx],
            "age": self.sample_age[idx],
            "sex": self.sample_sex[idx],
            "recording": self.sample_recording[idx],
        }
        if self.train:
            result['labels'] = self.sample_labels[idx]
        return result
    
    
    

base_dir = '../input/project-3/'
with open(base_dir+'train_data_torch.pkl', 'rb') as f:
    training_dataset = pickle.load(f)
with open(base_dir+'test_data_torch.pkl', 'rb') as f:
    test_dataset = pickle.load(f)

print("완료")


완료


In [2]:
#num_classes -> label / num_leads -> recording1,2 / out_channel -> recording shape
class Example_CNN_v1(torch.nn.Module):
    def __init__(self, num_classes=12, num_leads=2):
        super(Example_CNN_v1, self).__init__()
        self.num_classes = num_classes
        self.num_leads = num_leads
        self.conv1 = torch.nn.Conv1d(in_channels=self.num_leads, out_channels=32, kernel_size=15, stride=3, padding=2)
        self.relu1 = torch.nn.ReLU() #1498
        self.conv2 = torch.nn.Conv1d(in_channels=32, out_channels=64, kernel_size=13, stride=3, padding=1)
        self.relu2 = torch.nn.ReLU() #497
        self.conv3 = torch.nn.Conv1d(in_channels=64, out_channels=128, kernel_size=11, stride=3, padding=1)
        self.relu3 = torch.nn.ReLU() #164
        self.conv4 = torch.nn.Conv1d(in_channels=128, out_channels=128, kernel_size=10, stride=2)
        self.relu4 = torch.nn.ReLU() #78
        self.conv5 = torch.nn.Conv1d(in_channels=128, out_channels=64, kernel_size=9, stride=1)
        self.relu5 = torch.nn.ReLU() #70
        self.conv6 = torch.nn.Conv1d(in_channels=64, out_channels=32, kernel_size=7, stride=1)
        self.relu6 = torch.nn.ReLU() #64
        self.fc1 = torch.nn.Linear(32*63, 128)
        self.relu7 = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(128, self.num_classes)

    def forward(self, x):
        # 이 모델은 recording만을 input으로 받습니다. feature를 추가적으로 사용하도록 할 수도 있습니다.
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.conv4(x)
        x = self.relu4(x)
        x = self.conv5(x)
        x = self.relu5(x)
        x = self.conv6(x)
        x = self.relu6(x)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        #print(x.size())
        x = self.fc1(x)
        x = self.relu7(x)
        out = self.fc2(x)
        return out

print("완료")


완료


In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [4]:
EPOCHS = 20
BATCH_SIZE = 32
LEARNING_RATE = 0.001

training_loader = torch.utils.data.DataLoader(training_dataset, pin_memory=True, batch_size=BATCH_SIZE)

num_training = len(training_dataset)


model = Example_CNN_v1(num_classes=12, num_leads=2)

model.to(device)
model.train()

criterion = torch.nn.BCEWithLogitsLoss() # for multi-label classification
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
#optimizer = Nadam(model.parameters(), lr=LEARNING_RATE)

print("완료")

완료


In [5]:
for epoch in range(1, EPOCHS+1):
    print(f'***** Epoch {epoch} *****')
    epoch_training_loss_sum = 0.0
    for i_batch, sample_batched in enumerate(training_loader):
        b_recording = sample_batched["recording"].to(device)
        b_labels = sample_batched["labels"].to(device)
        optimizer.zero_grad()
        b_out = model(b_recording)
        loss = criterion(b_out, b_labels)
        loss.backward()
        optimizer.step()
        epoch_training_loss_sum += loss.item() * b_labels.shape[0]

    epoch_training_loss = epoch_training_loss_sum / num_training
    print(f'training loss of epoch {epoch}: {epoch_training_loss}\n')

print("완료")

***** Epoch 1 *****
training loss of epoch 1: 0.2801238125856782

***** Epoch 2 *****
training loss of epoch 2: 0.2615826489861846

***** Epoch 3 *****
training loss of epoch 3: 0.24961335177709976

***** Epoch 4 *****
training loss of epoch 4: 0.21114555345943214

***** Epoch 5 *****
training loss of epoch 5: 0.1937403153335703

***** Epoch 6 *****
training loss of epoch 6: 0.18606483509410007

***** Epoch 7 *****
training loss of epoch 7: 0.17950705251308027

***** Epoch 8 *****
training loss of epoch 8: 0.17360426626700848

***** Epoch 9 *****
training loss of epoch 9: 0.16761733252611752

***** Epoch 10 *****
training loss of epoch 10: 0.1627390944483676

***** Epoch 11 *****
training loss of epoch 11: 0.1561425787451216

***** Epoch 12 *****
training loss of epoch 12: 0.14930602767902837

***** Epoch 13 *****
training loss of epoch 13: 0.141980919724029

***** Epoch 14 *****
training loss of epoch 14: 0.1338491038732074

***** Epoch 15 *****
training loss of epoch 15: 0.1263374765

In [6]:
model.eval()

test_prediction_df = pd.DataFrame(columns=['labels'])


test_len = len(test_dataset) # 바꿔야
with torch.no_grad():
    for idx in range(test_len):
        recording = test_dataset.sample_recording[idx]
        out = model(recording.unsqueeze(0).to(device)) # unsqueeze는 batch dimension을 추가해주기 위함
        sample_prediction = out.squeeze() > 0.5 # Use 0.5 as a threshold / squeeze는 batch dimension을 제거해주기 위함
        indices_of_1s = np.where(sample_prediction.cpu())[0]
        str_indices_of_1s = ' '.join(map(str, indices_of_1s))
        test_prediction_df.loc[idx] = [str_indices_of_1s]

test_prediction_df.index = test_dataset.sample_id.tolist()
test_prediction_df.index.name = 'id'
test_prediction_df.sort_index(inplace=True)



In [7]:
print(test_prediction_df.head())

print(len(test_prediction_df))


   labels
id       
0       9
1     3 8
2       8
3   2 3 8
4       8
7389


In [8]:
test_prediction_df.to_csv('my_submission.csv')