In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from google.colab import output
# !cp 파일1 파일2 # 파일1을 파일2로 복사 붙여넣기
!cp "/content/drive/MyDrive/data_2.zip" "data_2.zip"


In [3]:
# data_2.zip을 현재 디렉터리에 압축해제
!unzip "data_2.zip"

Archive:  data_2.zip
  inflating: dirty_mnist_2nd.zip     
  inflating: dirty_mnist_2nd_answer.csv  
  inflating: mnist_data.zip          
  inflating: sample_submission.csv   
  inflating: test_dirty_mnist_2nd.zip  


In [4]:
from google.colab import output
# 현재 디렉터리에 dirty_mnist라는 폴더 생성
!mkdir "./dirty_mnist"
#dirty_mnist.zip라는 zip파일을 dirty_mnist라는 폴더에 압축 풀기
!unzip "dirty_mnist_2nd.zip" -d "./dirty_mnist/"
# 현재 디렉터리에 test_dirty_mnist라는 폴더 생성
!mkdir "./test_dirty_mnist"
#test_dirty_mnist.zip라는 zip파일을 test_dirty_mnist라는 폴더에 압축 풀기
!unzip "test_dirty_mnist_2nd.zip" -d "./test_dirty_mnist/"
# 출력 결과 지우기
output.clear()

In [6]:
import torch.nn as nn

class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Dropout(p=drop_prob),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

In [None]:
'''
import torch.hub
hub_model = torch.hub.load(
    'moskomule/senet.pytorch',
    'se_resnet50',
    pretrained=True,)
'''

"\nimport torch.hub\nhub_model = torch.hub.load(\n    'moskomule/senet.pytorch',\n    'se_resnet50',\n    pretrained=True,)\n"

In [7]:

from torch.hub import load_state_dict_from_url
from torchvision.models import ResNet


def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)


class SEBasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None,
                 *, reduction=16):
        super(SEBasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.dropout1 = torch.nn.Dropout(p=drop_prob)
        self.conv2 = conv3x3(planes, planes, 1)
        self.bn2 = nn.BatchNorm2d(planes)
        self.se = SELayer(planes, reduction)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout1(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.se(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)
        return out


class SEBottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None,
                 *, reduction=16):
        super(SEBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.dropout1 = torch.nn.Dropout(p=drop_prob)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.dropout2 = torch.nn.Dropout(p=drop_prob)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se = SELayer(planes * 4, reduction)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout1(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.dropout2(out)


        out = self.conv3(out)
        out = self.bn3(out)
        out = self.se(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


def se_resnet18(num_classes=1_000):
    """Constructs a ResNet-18 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(SEBasicBlock, [2, 2, 2, 2], num_classes=num_classes)
    model.avgpool = nn.AdaptiveAvgPool2d(1)
    return model

'''
def se_resnet34(num_classes=1_000):
    """Constructs a ResNet-34 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(SEBasicBlock, [3, 4, 6, 3], num_classes=num_classes)
    model.avgpool = nn.AdaptiveAvgPool2d(1)
    return model
'''
'''
def se_resnet50(num_classes=1_000, pretrained=False):
    """Constructs a ResNet-50 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(SEBottleneck, [3, 4, 6, 3], num_classes=num_classes)
    model.avgpool = nn.AdaptiveAvgPool2d(1)
    if pretrained:
        model.load_state_dict(load_state_dict_from_url(
            "https://github.com/moskomule/senet.pytorch/releases/download/archive/seresnet50-60a8950a85b2b.pkl"))
    return model
'''

def se_resnet101(num_classes=1_000):
    """Constructs a ResNet-101 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(SEBottleneck, [3, 4, 23, 3], num_classes=num_classes)
    model.avgpool = nn.AdaptiveAvgPool2d(1)
    return model


def se_resnet152(num_classes=1_000):
    """Constructs a ResNet-152 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(SEBottleneck, [3, 8, 36, 3], num_classes=num_classes)
    model.avgpool = nn.AdaptiveAvgPool2d(1)
    return model


class CifarSEBasicBlock(nn.Module):
    def __init__(self, inplanes, planes, stride=1, reduction=16):
        super(CifarSEBasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.se = SELayer(planes, reduction)
        if inplanes != planes:
            self.downsample = nn.Sequential(nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False),
                                            nn.BatchNorm2d(planes))
        else:
            self.downsample = lambda x: x
        self.stride = stride

    def forward(self, x):
        residual = self.downsample(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.se(out)

        out += residual
        out = self.relu(out)

        return out


class CifarSEResNet(nn.Module):
    def __init__(self, block, n_size, num_classes=10, reduction=16):
        super(CifarSEResNet, self).__init__()
        self.inplane = 16
        self.conv1 = nn.Conv2d(
            3, self.inplane, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(self.inplane)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(
            block, 16, blocks=n_size, stride=1, reduction=reduction)
        self.layer2 = self._make_layer(
            block, 32, blocks=n_size, stride=2, reduction=reduction)
        self.layer3 = self._make_layer(
            block, 64, blocks=n_size, stride=2, reduction=reduction)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(64, num_classes)
        self.initialize()

    def initialize(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, blocks, stride, reduction):
        strides = [stride] + [1] * (blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.inplane, planes, stride, reduction))
            self.inplane = planes

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x


class CifarSEPreActResNet(CifarSEResNet):
    def __init__(self, block, n_size, num_classes=10, reduction=16):
        super(CifarSEPreActResNet, self).__init__(
            block, n_size, num_classes, reduction)
        self.bn1 = nn.BatchNorm2d(self.inplane)
        self.initialize()

    def forward(self, x):
        x = self.conv1(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.bn1(x)
        x = self.relu(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)


def se_resnet20(**kwargs):
    """Constructs a ResNet-18 model.
    """
    model = CifarSEResNet(CifarSEBasicBlock, 3, **kwargs)
    return model


def se_resnet32(**kwargs):
    """Constructs a ResNet-34 model.
    """
    model = CifarSEResNet(CifarSEBasicBlock, 5, **kwargs)
    return model


def se_resnet56(**kwargs):
    """Constructs a ResNet-34 model.
    """
    model = CifarSEResNet(CifarSEBasicBlock, 9, **kwargs)
    return model


def se_preactresnet20(**kwargs):
    """Constructs a ResNet-18 model.
    """
    model = CifarSEPreActResNet(CifarSEBasicBlock, 3, **kwargs)
    return model


def se_preactresnet32(**kwargs):
    """Constructs a ResNet-34 model.
    """
    model = CifarSEPreActResNet(CifarSEBasicBlock, 5, **kwargs)
    return model


def se_preactresnet56(**kwargs):
    """Constructs a ResNet-34 model.
    """
    model = CifarSEPreActResNet(CifarSEBasicBlock, 9, **kwargs)
    return model




In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm
import imutils
import zipfile
import os
from PIL import Image
from torchsummary import summary

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as T
from torch.utils.data import DataLoader, Dataset
from google.colab import output

from torch.hub import load_state_dict_from_url
from torchvision.models import ResNet

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 디바이스 설정
learning_rate = 0.001
training_epochs = 15
batch_size = 100
drop_prob = 0.3

In [9]:
dirty_mnist_answer = pd.read_csv("/content/dirty_mnist_2nd_answer.csv")
# dirty_mnist라는 디렉터리 속에 들어있는 파일들의 이름을 
# namelist라는 변수에 저장
namelist = os.listdir('./dirty_mnist/')

# unmpy를 tensor로 변환하는 ToTensor 정의
class ToTensor(object):
    """numpy array를 tensor(torch)로 변환합니다."""
    def __call__(self, sample):
        image, label = sample['image'], sample['label']
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose((2, 0, 1))
        return {'image': torch.FloatTensor(image),
                'label': torch.FloatTensor(label)}
# to_tensor 선언
to_tensor = T.Compose([
                      ToTensor()
                    ])

class DatasetMNIST(torch.utils.data.Dataset):
    def __init__(self,
                 dir_path,
                 meta_df,
                 transforms=to_tensor,#미리 선언한 to_tensor를 transforms로 받음
                 augmentations=None):
        
        self.dir_path = dir_path # 데이터의 이미지가 저장된 디렉터리 경로
        self.meta_df = meta_df # 데이터의 인덱스와 정답지가 들어있는 DataFrame

        self.transforms = transforms# Transform
        self.augmentations = augmentations # Augmentation
        
    def __len__(self):
        return len(self.meta_df)
    
    def __getitem__(self, index):
        # 폴더 경로 + 이미지 이름 + .png => 파일의 경로
        # 참고) "12".zfill(5) => 000012
        #       "146".zfill(5) => 000145
        # cv2.IMREAD_GRAYSCALE : png파일을 채널이 1개인 GRAYSCALE로 읽음
        image = cv2.imread(self.dir_path +\
                           str(self.meta_df.iloc[index,0]).zfill(5) + '.png',
                           cv2.IMREAD_GRAYSCALE)
        # 0 ~ 255의 값을 갖고 크기가 (256,256)인 numpy array를
        # 0 ~ 1 사이의 실수를 갖고 크기가 (256,256,1)인 numpy array로 변환
        image = (image/255).astype('float')[..., np.newaxis]
        # 정답 numpy array생성(존재하면 1 없으면 0)
        label = self.meta_df.iloc[index, 1:].values.astype('float')
        sample = {'image': image, 'label': label}
        # transform 적용
        # numpy to tensor
        if self.transforms:
            sample = self.transforms(sample)

        # sample 반환
        return sample

In [10]:
def se_resnet34(num_classes=1_000):
    """Constructs a ResNet-34 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(SEBasicBlock, [3, 4, 6, 3], num_classes=num_classes)
    model.avgpool = nn.AdaptiveAvgPool2d(1)
    return model

class my_resnet_34(nn.Module):
    def __init__(self):
        super(my_resnet_34, self).__init__()
        self.conv2d = nn.Conv2d(1, 3, 3, stride=1)
        self.se_resnet34 =se_resnet34()
        self.FC = nn.Linear(1000, 26)

    def forward(self, x):
        # resnet의 입력은 [3, N, N]으로
        # 3개의 채널을 갖기 때문에
        # resnet 입력 전에 conv2d를 한 층 추가
        x = F.relu(self.conv2d(x))

        # resnet18을 추가
        x = F.relu(self.se_resnet34(x))

        # 마지막 출력에 nn.Linear를 추가
        # multilabel을 예측해야 하기 때문에
        # softmax가 아닌 sigmoid를 적용
        x = torch.sigmoid(self.FC(x))
        return x

'''
def se_resnet50(num_classes=1_000, pretrained=False):
    """Constructs a ResNet-50 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(SEBottleneck, [3, 4, 6, 3], num_classes=num_classes)
    model.avgpool = nn.AdaptiveAvgPool2d(1)
   
    return model

class my_resnet_50(nn.Module):
    def __init__(self):
        super(my_resnet_50, self).__init__()
        self.conv2d = nn.Conv2d(1, 3, 3, stride=1)
        self.se_resnet50 =se_resnet50()
        self.FC = nn.Linear(1000, 26)

    def forward(self, x):
        # resnet의 입력은 [3, N, N]으로
        # 3개의 채널을 갖기 때문에
        # resnet 입력 전에 conv2d를 한 층 추가
        x = F.relu(self.conv2d(x))

        # resnet18을 추가
        x = F.relu(self.se_resnet50(x))

        # 마지막 출력에 nn.Linear를 추가
        # multilabel을 예측해야 하기 때문에
        # softmax가 아닌 sigmoid를 적용
        x = torch.sigmoid(self.FC(x))
        return x
    '''
# 모델 선언
hub_model = my_resnet_34()
hub_model.to(device)

my_resnet_34(
  (conv2d): Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1))
  (se_resnet34): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): SEBasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (dropout1): Dropout(p=0.3, inplace=False)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (se): SELayer(
          (avg_pool): AdaptiveAvgPool2d(output_size=1)
     

In [None]:
summary(hub_model, input_size=(1, 28, 28))

RuntimeError: ignored

In [None]:
#weights,biases = hub_model.layers[0].get_weights()
torch.cuda.empty_cache()


In [11]:
# cross validation을 적용하기 위해 KFold 생성
from sklearn.model_selection import KFold
def fit(learning_rate=0.001, epochs=10, train_batch_size=128, test_batch_size=32, drop_prob=0.3):
  kfold = KFold(n_splits=5, shuffle=True, random_state=0)

  # dirty_mnist_answer에서 train_idx와 val_idx를 생성
  best_models = [] # 폴드별로 가장 validation acc가 높은 모델 저장
  for fold_index, (trn_idx, val_idx) in enumerate(kfold.split(dirty_mnist_answer),1):
      print(f'[fold: {fold_index}]')
      # cuda cache 초기화
      torch.cuda.empty_cache()

      #train fold, validation fold 분할
      train_answer = dirty_mnist_answer.iloc[trn_idx]
      test_answer  = dirty_mnist_answer.iloc[val_idx]

      #Dataset 정의
      train_dataset = DatasetMNIST("dirty_mnist/", train_answer)
      valid_dataset = DatasetMNIST("dirty_mnist/", test_answer)
    
      #DataLoader 정의
      train_data_loader = DataLoader(
          train_dataset,
          batch_size = train_batch_size,
          shuffle = False,
          num_workers = 3,
          drop_last=True
      )
      valid_data_loader = DataLoader(
          valid_dataset,
          batch_size = test_batch_size,
          shuffle = False,
          num_workers = 3,
          drop_last=True
      )
      # 모델 선언
      #model = MultiLabelResnet()
      #model=torch.load(('/content/drive/MyDrive/Colab Notebooks/dacon/models/5_resnet18_0.8095_epoch_0.pth'))

      #model.to(device)# gpu에 모델 할당

      # 훈련 옵션 설정
      optimizer = torch.optim.Adam(hub_model.parameters(),
                                  lr = learning_rate)
      lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                  step_size = 5,
                                                  gamma = 0.75)
      criterion = torch.nn.BCELoss()
      
      # 훈련 시작
      valid_acc_max = 0
      for epoch in range(epochs):
          
          # 1개 epoch 훈련
          train_acc_list = []
          with tqdm(train_data_loader,#train_data_loader를 iterative하게 반환
                  total=train_data_loader.__len__(), # train_data_loader의 크기
                  unit="batch") as train_bar:# 한번 반환하는 smaple의 단위는 "batch"

                  for sample in train_bar:
                    train_bar.set_description(f"Train Epoch {epoch}")
                    # 갱신할 변수들에 대한 모든 변화도를 0으로 초기화
                    # 참고)https://tutorials.pytorch.kr/beginner/pytorch_with_examples.html
                    optimizer.zero_grad()
                    images, labels = sample['image'], sample['label']

                    # tensor를 gpu에 올리기 
                    images = images.to(device)
                    labels = labels.to(device)
                  
                    # 모델의 dropoupt, batchnormalization를 train 모드로 설정
                    hub_model.train()
                    # .forward()에서 중간 노드의 gradient를 계산
                    with torch.set_grad_enabled(True):
                        # 모델 예측
                        probs  = hub_model(images)
                        # loss 계산
                        loss = criterion(probs, labels)
                        # 중간 노드의 gradient로
                        # backpropagation을 적용하여
                        # gradient 계산
                        loss.backward()
                        # weight 갱신
                        optimizer.step()

                        # train accuracy 계산
                        probs  = probs.cpu().detach().numpy()
                        labels = labels.cpu().detach().numpy()
                        preds = probs > 0.5
                        batch_acc = (labels == preds).mean()
                        train_acc_list.append(batch_acc)
                        train_acc = np.mean(train_acc_list)

                    # 현재 progress bar에 현재 미니배치의 loss 결과 출력
                    train_bar.set_postfix(train_loss= loss.item(),
                                          train_acc = train_acc)
          # 1개 epoch학습 후 Validation 점수 계산
          valid_acc_list = []
          with tqdm(valid_data_loader,
                  total=valid_data_loader.__len__(),
                  unit="batch") as valid_bar:
              for sample in valid_bar:

                valid_bar.set_description(f"Valid Epoch {epoch}")
                optimizer.zero_grad()
                images, labels = sample['image'], sample['label']
                images = images.to(device)
                labels = labels.to(device)
                
                # 모델의 dropoupt, batchnormalization를 eval모드로 설정
                hub_model.eval()
                # .forward()에서 중간 노드의 gradient를 계산
                with torch.no_grad():

                  # validation loss만을 계산
                  probs  = hub_model(images)
                  valid_loss = criterion(probs, labels)

                  # train accuracy 계산
                  probs  = probs.cpu().detach().numpy()
                  labels = labels.cpu().detach().numpy()
                  preds = probs > 0.5
                  batch_acc = (labels == preds).mean()
                  valid_acc_list.append(batch_acc)

                valid_acc = np.mean(valid_acc_list)
                valid_bar.set_postfix(valid_loss = valid_loss.item(),
                                        valid_acc = valid_acc)
                      
          # Learning rate 조절
          lr_scheduler.step()

          # 모델 저장
          if valid_acc_max < valid_acc:
              valid_acc_max = valid_acc
              best_model = hub_model
              MODEL = "SE-resnet34"
              # 모델을 저장할 구글 드라이브 경로
              path = "/content/drive/MyDrive/Colab Notebooks/dacon/models3/"
              torch.save(best_model, f'{path}{fold_index}_{MODEL}_{valid_loss.item():2.4f}_epoch_{epoch}.pth')
      
      # 폴드별로 가장 좋은 모델 저장
      best_models.append(best_model)


fit()

  0%|          | 0/312 [00:00<?, ?batch/s]

[fold: 1]


Train Epoch 0: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.541, train_loss=0.687]
Valid Epoch 0: 100%|██████████| 312/312 [00:24<00:00, 12.76batch/s, valid_acc=0.537, valid_loss=0.688]
Train Epoch 1: 100%|██████████| 312/312 [02:58<00:00,  1.74batch/s, train_acc=0.549, train_loss=0.681]
Valid Epoch 1: 100%|██████████| 312/312 [00:23<00:00, 13.17batch/s, valid_acc=0.55, valid_loss=0.685]
Train Epoch 2: 100%|██████████| 312/312 [02:58<00:00,  1.74batch/s, train_acc=0.562, train_loss=0.674]
Valid Epoch 2: 100%|██████████| 312/312 [00:23<00:00, 13.13batch/s, valid_acc=0.542, valid_loss=0.673]
Train Epoch 3: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.57, train_loss=0.668]
Valid Epoch 3: 100%|██████████| 312/312 [00:23<00:00, 13.00batch/s, valid_acc=0.559, valid_loss=0.665]
Train Epoch 4: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.579, train_loss=0.66]
Valid Epoch 4: 100%|██████████| 312/312 [00:23<00:00, 13.01batch/s, valid_ac

[fold: 2]


Train Epoch 0: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.627, train_loss=0.621]
Valid Epoch 0: 100%|██████████| 312/312 [00:24<00:00, 12.95batch/s, valid_acc=0.63, valid_loss=0.625]
Train Epoch 1: 100%|██████████| 312/312 [02:58<00:00,  1.74batch/s, train_acc=0.636, train_loss=0.61]
Valid Epoch 1: 100%|██████████| 312/312 [00:24<00:00, 12.95batch/s, valid_acc=0.631, valid_loss=0.625]
Train Epoch 2: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.641, train_loss=0.607]
Valid Epoch 2: 100%|██████████| 312/312 [00:24<00:00, 12.92batch/s, valid_acc=0.633, valid_loss=0.629]
Train Epoch 3: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.647, train_loss=0.6]
Valid Epoch 3: 100%|██████████| 312/312 [00:24<00:00, 12.91batch/s, valid_acc=0.634, valid_loss=0.629]
Train Epoch 4: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.654, train_loss=0.594]
Valid Epoch 4: 100%|██████████| 312/312 [00:24<00:00, 12.90batch/s, valid_acc

[fold: 3]


Train Epoch 0: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.683, train_loss=0.559]
Valid Epoch 0: 100%|██████████| 312/312 [00:24<00:00, 12.97batch/s, valid_acc=0.679, valid_loss=0.571]
Train Epoch 1: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.69, train_loss=0.547]
Valid Epoch 1: 100%|██████████| 312/312 [00:23<00:00, 13.07batch/s, valid_acc=0.681, valid_loss=0.585]
Train Epoch 2: 100%|██████████| 312/312 [02:58<00:00,  1.74batch/s, train_acc=0.696, train_loss=0.542]
Valid Epoch 2: 100%|██████████| 312/312 [00:23<00:00, 13.07batch/s, valid_acc=0.685, valid_loss=0.569]
Train Epoch 3: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.701, train_loss=0.532]
Valid Epoch 3: 100%|██████████| 312/312 [00:24<00:00, 13.00batch/s, valid_acc=0.662, valid_loss=0.622]
Train Epoch 4: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.706, train_loss=0.525]
Valid Epoch 4: 100%|██████████| 312/312 [00:23<00:00, 13.11batch/s, valid_

[fold: 4]


Train Epoch 0: 100%|██████████| 312/312 [02:58<00:00,  1.74batch/s, train_acc=0.724, train_loss=0.512]
Valid Epoch 0: 100%|██████████| 312/312 [00:24<00:00, 12.94batch/s, valid_acc=0.742, valid_loss=0.467]
Train Epoch 1: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.73, train_loss=0.506]
Valid Epoch 1: 100%|██████████| 312/312 [00:24<00:00, 12.97batch/s, valid_acc=0.739, valid_loss=0.478]
Train Epoch 2: 100%|██████████| 312/312 [02:58<00:00,  1.74batch/s, train_acc=0.735, train_loss=0.496]
Valid Epoch 2: 100%|██████████| 312/312 [00:24<00:00, 12.91batch/s, valid_acc=0.736, valid_loss=0.469]
Train Epoch 3: 100%|██████████| 312/312 [02:58<00:00,  1.74batch/s, train_acc=0.74, train_loss=0.475]
Valid Epoch 3: 100%|██████████| 312/312 [00:24<00:00, 12.91batch/s, valid_acc=0.727, valid_loss=0.491]
Train Epoch 4: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.743, train_loss=0.471]
Valid Epoch 4: 100%|██████████| 312/312 [00:24<00:00, 12.98batch/s, valid_a

[fold: 5]


Train Epoch 0: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.748, train_loss=0.48]
Valid Epoch 0: 100%|██████████| 312/312 [00:23<00:00, 13.08batch/s, valid_acc=0.766, valid_loss=0.418]
Train Epoch 1: 100%|██████████| 312/312 [02:58<00:00,  1.74batch/s, train_acc=0.754, train_loss=0.456]
Valid Epoch 1: 100%|██████████| 312/312 [00:23<00:00, 13.04batch/s, valid_acc=0.764, valid_loss=0.425]
Train Epoch 2: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.757, train_loss=0.453]
Valid Epoch 2: 100%|██████████| 312/312 [00:24<00:00, 12.94batch/s, valid_acc=0.766, valid_loss=0.42]
Train Epoch 3: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.76, train_loss=0.431]
Valid Epoch 3: 100%|██████████| 312/312 [00:24<00:00, 12.99batch/s, valid_acc=0.756, valid_loss=0.44]
Train Epoch 4: 100%|██████████| 312/312 [02:59<00:00,  1.74batch/s, train_acc=0.762, train_loss=0.429]
Valid Epoch 4: 100%|██████████| 312/312 [00:24<00:00, 12.90batch/s, valid_acc

# SaveModel 하나 불러오기

In [None]:
savemodel=torch.load(('/content/drive/MyDrive/Colab Notebooks/dacon/models2/5_SE-resnet34_0.4120_epoch_0.pth'))
savemodel

my_resnet_34(
  (conv2d): Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1))
  (se_resnet34): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): SEBasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (se): SELayer(
          (avg_pool): AdaptiveAvgPool2d(output_size=1)
          (fc): Sequential(
            (0): Linear(in_

In [None]:
#test Dataset 정의
sample_submission = pd.read_csv("sample_submission.csv")
test_dataset = DatasetMNIST("test_dirty_mnist/", sample_submission)
batch_size = 128
test_data_loader = DataLoader(
    test_dataset,
    batch_size = batch_size,
    shuffle = False,
    num_workers = 3,
    drop_last = False
)

In [None]:
predictions_list = []
# 배치 단위로 추론
prediction_df = pd.read_csv("sample_submission.csv")

# 5개의 fold마다 가장 좋은 모델을 이용하여 예측

# 0으로 채워진 array 생성
prediction_array = np.zeros([prediction_df.shape[0],
                             prediction_df.shape[1] -1])
for idx, sample in enumerate(test_data_loader):
    with torch.no_grad():
       # 추론
        hub_model.eval()
        images = sample['image']
        images = images.to(device)
        probs  = hub_model(images)
        probs = probs.cpu().detach().numpy()
        preds = (probs > 0.5)

        # 예측 결과를 
        # prediction_array에 입력
        batch_index = batch_size * idx
        prediction_array[batch_index: batch_index + images.shape[0],:]\
                     = preds.astype(int)
                         
# 채널을 하나 추가하여 list에 append
predictions_list.append(prediction_array[...,np.newaxis])

In [None]:
answer=predictions_list[0]
answer=answer.astype('int64')
answer.shape
answer=np.squeeze(answer, 2)


array([1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1,
       0, 1, 0, 1])

array([1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1,
       0, 1, 0, 1])

array([1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1,
       0, 1, 0, 1])

array([1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1,
       0, 1, 1, 1])

array([1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1,
       0, 1, 1, 1])

array([1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1,
       0, 1, 0, 1])

In [None]:
sample_submission = pd.read_csv("sample_submission.csv")
sample_submission.iloc[:,1:] = answer
sample_submission.to_csv("/content/drive/MyDrive/Colab Notebooks/se_resnet34_prediction.csv", index = False)
sample_submission

Unnamed: 0,index,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z
0,50000,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1
1,50001,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1
2,50002,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1
3,50003,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,1,1
4,50004,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,54995,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1
4996,54996,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1
4997,54997,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,1,1
4998,54998,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,1


# 모델 다 불러와서 Ensemble

In [15]:
'''
model_path_front='/content/drive/MyDrive/Colab Notebooks/dacon/models2/'
model_names=['1_SE-resnet34_0.6583_epoch_6.pth', '1_SE-resnet34_0.6610_epoch_3.pth', '1_SE-resnet34_0.6799_epoch_1.pth', '1_SE-resnet34_0.6967_epoch_2.pth', \
             '1_SE-resnet34_0.7262_epoch_0.pth', '2_SE-resnet34_0.6222_epoch_1.pth', '2_SE-resnet34_0.6316_epoch_0.pth', '3_SE-resnet34_0.6117_epoch_0.pth', \
             '3_SE-resnet34_0.6850_epoch_6.pth', '4_SE-resnet34_0.5352_epoch_0.pth', '4_SE-resnet34_0.5420_epoch_7.pth', '4_SE-resnet34_0.5508_epoch_6.pth', '5_SE-resnet34_0.4120_epoch_0.pth']
model_1perfold_names=['1_SE-resnet34_0.6583_epoch_6.pth', '2_SE-resnet34_0.6222_epoch_1.pth', '3_SE-resnet34_0.6117_epoch_0.pth', '4_SE-resnet34_0.5352_epoch_0.pth', '5_SE-resnet34_0.4120_epoch_0.pth']
'''
model_path_front='/content/drive/MyDrive/Colab Notebooks/dacon/models3/'
model_names=['5_SE-resnet34_0.4181_epoch_0.pth', '4_SE-resnet34_0.4666_epoch_0.pth', '3_SE-resnet34_0.5853_epoch_1.pth', '3_SE-resnet34_0.5842_epoch_5.pth', \
             '3_SE-resnet34_0.5724_epoch_4.pth', '3_SE-resnet34_0.5709_epoch_0.pth', '3_SE-resnet34_0.5689_epoch_2.pth', '2_SE-resnet34_0.6394_epoch_7.pth', \
             '2_SE-resnet34_0.6335_epoch_5.pth', '2_SE-resnet34_0.6292_epoch_2.pth', '2_SE-resnet34_0.6287_epoch_3.pth', '1_SE-resnet34_0.6288_epoch_9.pth']

model_paths=[]
for i in range(len(model_names)):
  model_paths.append(model_path_front + model_names[i])
best_models=[] 
for i in range(len(model_paths)):
  temp_model=torch.load((model_paths[i]))
  best_models.append(temp_model)


In [16]:
#test Dataset 정의
sample_submission = pd.read_csv("sample_submission.csv")
test_dataset = DatasetMNIST("test_dirty_mnist/", sample_submission)
batch_size = 128
test_data_loader = DataLoader(
    test_dataset,
    batch_size = batch_size,
    shuffle = False,
    num_workers = 3,
    drop_last = False
)

In [17]:
predictions_list = []
# 배치 단위로 추론
prediction_df = pd.read_csv("sample_submission.csv")

# 5개의 fold마다 가장 좋은 모델을 이용하여 예측
for model in best_models:
    # 0으로 채워진 array 생성
    prediction_array = np.zeros([prediction_df.shape[0],
                                 prediction_df.shape[1] -1])
    for idx, sample in enumerate(test_data_loader):
        with torch.no_grad():
            # 추론
            model.eval()
            images = sample['image']
            images = images.to(device)
            probs  = model(images)
            probs = probs.cpu().detach().numpy()
            preds = (probs > 0.5)

            # 예측 결과를 
            # prediction_array에 입력
            batch_index = batch_size * idx
            prediction_array[batch_index: batch_index + images.shape[0],:]\
                         = preds.astype(int)
                         
    # 채널을 하나 추가하여 list에 append
    predictions_list.append(prediction_array[...,np.newaxis])

In [18]:
# axis = 2를 기준으로 평균
predictions_array = np.concatenate(predictions_list, axis = 2)
predictions_mean = predictions_array.mean(axis = 2)

# 평균 값이 0.5보다 클 경우 1 작으면 0
predictions_mean = (predictions_mean > 0.5) * 1
predictions_mean

array([[1, 0, 1, ..., 1, 0, 1],
       [1, 1, 0, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 1],
       ...,
       [0, 0, 1, ..., 0, 0, 1],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 1, 0, 0]])

In [21]:
sample_submission = pd.read_csv("sample_submission.csv")
sample_submission.iloc[:,1:] = predictions_mean
sample_submission.to_csv("/content/drive/MyDrive/Colab Notebooks/dacon/ensemble_prediction2_dropout.csv", index = False)
sample_submission

Unnamed: 0,index,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z
0,50000,1,0,1,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1
1,50001,1,1,0,1,1,0,1,0,1,1,0,1,0,0,1,1,1,0,1,0,0,0,0,0,0,0
2,50002,1,0,1,1,1,1,0,1,0,1,0,0,1,1,0,1,0,0,0,0,1,1,1,0,0,1
3,50003,1,1,0,1,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1,0,1
4,50004,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,54995,0,0,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,1,0,0,1
4996,54996,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0
4997,54997,0,0,1,1,0,1,0,1,0,0,1,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1
4998,54998,0,0,1,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0


# bestmodel array 활용

NameError: ignored