<a href="https://colab.research.google.com/github/ehddnr301/dacon_cv2/blob/master/ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading https://files.pythonhosted.org/packages/4e/83/f9c5f44060f996279e474185ebcbd8dbd91179593bffb9abe3afa55d085b/efficientnet_pytorch-0.7.0.tar.gz
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.0-cp36-none-any.whl size=16032 sha256=306f0b5fadcb1250ce8c70b6af1243319a3f97ee3506743dbe69c94ca760adb9
  Stored in directory: /root/.cache/pip/wheels/e9/c6/e1/7a808b26406239712cfce4b5ceeb67d9513ae32aa4b31445c6
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.0


In [None]:
!pip install torchinfo

Collecting torchinfo
  Downloading https://files.pythonhosted.org/packages/4f/b1/4b310bd715885636e7174b4b52817202fff0ae3609ca2bfb17f28e33e0a1/torchinfo-0.0.8-py3-none-any.whl
Installing collected packages: torchinfo
Successfully installed torchinfo-0.0.8


In [None]:
from efficientnet_pytorch import EfficientNet
import os
from typing import Tuple, Sequence, Callable
import csv
import cv2
import random
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import torch
import torch.optim as optim
from torch import nn, Tensor
from torch.utils.data import Dataset, DataLoader, random_split
from torchinfo import summary

import albumentations

from albumentations.pytorch import ToTensor
from torchvision import transforms

from torchvision.models import resnet50, resnet152


In [None]:
# random seed
random_seed = 777
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)


IMAGE_WIDTH=256
IMAGE_HEIGHT=256
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS=3
EPOCHS=30

PATH_TRAIN_DATASET='/content/drive/MyDrive/dacon_computer_vision/data/dirty_mnist/'
PATH_TEST_DATASET='/content/drive/MyDrive/dacon_computer_vision/data/test_dirty_mnist/'
PATH_TRAIN_ANS_CSV='/content/drive/MyDrive/dacon_computer_vision/data/dirty_mnist_2nd_answer.csv'

In [None]:
class MnistModel1(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.effnet = EfficientNet.from_pretrained('efficientnet-b5')
        self.drop_layer = nn.Dropout(p=0.2) # add dropout
        self.classifier = nn.Linear(1000, 26)

    def forward(self, x):
        x = self.effnet(x)
        x = self.drop_layer(x)
        x = self.classifier(x)

        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
class MnistModel2(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.resnet = resnet152(pretrained=True)
        self.classifier = nn.Linear(1000, 26)

    def forward(self, x):
        x = self.resnet(x)
        x = self.classifier(x)

        return x


In [None]:
class MnistModel3(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.resnet = resnet50(pretrained=True)
        self.classifier = nn.Linear(1000, 26)

    def forward(self, x):
        x = self.resnet(x)
        x = self.classifier(x)

        return x


In [None]:
class MnistDataset(Dataset):
    def __init__(
        self,
        dir: os.PathLike,
        image_ids: os.PathLike,
        transforms: Sequence[Callable]
    ) -> None:
        self.dir = dir
        self.transforms = transforms

        self.labels = {}
        with open(image_ids, 'r') as f:
            reader = csv.reader(f)
            next(reader)
            for row in reader:
                self.labels[int(row[0])] = list(map(int, row[1:]))

        self.image_ids = list(self.labels.keys())

    def __len__(self) -> int:
        return len(self.image_ids)

    def __getitem__(self, index: int) -> Tuple[Tensor]:
        image_id = self.image_ids[index]
        image = cv2.imread(
            os.path.join(
                self.dir, f'{str(image_id).zfill(5)}.png'))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        target = np.array(self.labels.get(image_id)).astype(np.float32)

        if self.transforms is not None:
            augmented = self.transforms(image=image) 
            image = augmented['image']

        return image, target

In [None]:
transforms_test = albumentations.Compose([
    ToTensor((
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )),
])

In [None]:

PATH1 = '/content/drive/MyDrive/dacon_computer_vision/checkpoint/model_0217_25.pt'
PATH2 = '/content/drive/MyDrive/dacon_computer_vision/checkpoint/model_resnet152_0219_35.pt'
PATH3 = '/content/drive/MyDrive/dacon_computer_vision/checkpoint/model_resnet_0218_35.pt'

model1 = MnistModel1()
model2 = MnistModel2()
model3 = MnistModel3()

model_list = [model1, model2, model3]
path_list = [PATH1, PATH2, PATH3]

# checkpoint = torch.load(PATH)
# model.load_state_dict(checkpoint['model_state_dict'])


Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b5-b6417697.pth


HBox(children=(FloatProgress(value=0.0, max=122410125.0), HTML(value='')))


Loaded pretrained weights for efficientnet-b5


Downloading: "https://download.pytorch.org/models/resnet152-b121ed2d.pth" to /root/.cache/torch/hub/checkpoints/resnet152-b121ed2d.pth


HBox(children=(FloatProgress(value=0.0, max=241530880.0), HTML(value='')))




Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))




In [None]:
testset = MnistDataset(PATH_TEST_DATASET, '/content/drive/MyDrive/dacon_computer_vision/data/sample_submission.csv', transforms_test)

test_loader = DataLoader(testset, batch_size=4)

In [None]:
!unzip -n '/content/drive/MyDrive/dacon_computer_vision/data/test_dirty_mnist_2nd.zip' -d '/content/drive/MyDrive/dacon_computer_vision/data/test_dirty_mnist'

Archive:  /content/drive/MyDrive/dacon_computer_vision/data/test_dirty_mnist_2nd.zip


In [None]:
from tqdm import tqdm

In [None]:

for epoch in tqdm(range(3)):
  submit = pd.read_csv('/content/drive/MyDrive/dacon_computer_vision/data/sample_submission.csv')
  
  current_model = model_list[epoch].to(device)
  checkpoint = torch.load(path_list[epoch])
  current_model.load_state_dict(checkpoint['model_state_dict'])

  current_model.eval()
  batch_size = test_loader.batch_size
  batch_index = 0
  for i, (images, targets) in enumerate(tqdm(test_loader)):
      images = images.to(device)
      targets = targets.to(device)
      outputs = current_model(images)
      outputs = outputs > 0.5
      batch_index = i * batch_size
      submit.iloc[batch_index:batch_index+batch_size, 1:] = \
          outputs.long().squeeze(0).detach().cpu().numpy()
      
  submit.to_csv(f'/content/drive/MyDrive/dacon_computer_vision/data/submit0221_{epoch}.csv', index=False)

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m

 12%|█▏        | 144/1250 [00:07<00:55, 19.84it/s][A[A[A[A



 12%|█▏        | 146/1250 [00:07<00:56, 19.59it/s][A[A[A[A



 12%|█▏        | 148/1250 [00:07<00:56, 19.66it/s][A[A[A[A



 12%|█▏        | 150/1250 [00:07<00:55, 19.70it/s][A[A[A[A



 12%|█▏        | 152/1250 [00:07<00:55, 19.66it/s][A[A[A[A



 12%|█▏        | 155/1250 [00:07<00:55, 19.75it/s][A[A[A[A



 13%|█▎        | 157/1250 [00:07<00:56, 19.18it/s][A[A[A[A



 13%|█▎        | 159/1250 [00:08<00:57, 18.95it/s][A[A[A[A



 13%|█▎        | 161/1250 [00:08<00:56, 19.11it/s][A[A[A[A



 13%|█▎        | 163/1250 [00:08<00:56, 19.33it/s][A[A[A[A



 13%|█▎        | 165/1250 [00:08<00:56, 19.31it/s][A[A[A[A



 13%|█▎        | 167/1250 [00:08<00:57, 18.70it/s][A[A[A[A



 14%|█▎        | 169/1250 [00:08<00:57, 18.93it/s][A[A[A[A



 14%|█▍        | 172/1250 [00:08<00:55, 19.37it/s][A[A[A[A



 14%|█▍        | 174/1250

In [None]:
s0 = pd.read_csv('/content/drive/MyDrive/dacon_computer_vision/data/submit0221_0.csv')
s1 = pd.read_csv('/content/drive/MyDrive/dacon_computer_vision/data/submit0221_1.csv')
s2 = pd.read_csv('/content/drive/MyDrive/dacon_computer_vision/data/submit0221_2.csv')


In [None]:
s0

Unnamed: 0,index,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z
0,50000,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,1,1,0,1,0,1
1,50001,0,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,0,0,1,1,0,0,0,0
2,50002,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,1,0,1,1,0,1,0,1,0,0,1
3,50003,1,1,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1
4,50004,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,54995,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,1,0
4996,54996,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,1
4997,54997,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,1,1,1,1,0,0,1
4998,54998,0,0,1,0,0,0,1,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,1


In [None]:
s1

Unnamed: 0,index,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z
0,50000,1,0,1,1,0,1,0,1,1,1,0,1,0,1,0,1,0,0,0,1,1,1,0,0,0,0
1,50001,0,1,1,0,1,0,1,0,1,1,0,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0
2,50002,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,1,0,1,0,1,0,0,1
3,50003,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0
4,50004,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,54995,0,1,1,1,0,1,0,1,0,1,1,1,0,0,1,0,0,0,0,1,1,1,1,0,1,0
4996,54996,0,0,1,0,0,0,0,1,1,1,0,1,0,0,1,1,1,0,1,0,1,0,0,0,1,1
4997,54997,1,0,0,1,0,1,0,1,0,1,0,1,1,0,1,1,0,0,0,0,1,1,1,0,0,1
4998,54998,1,0,1,0,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,1,0,0,1


In [None]:
s2

Unnamed: 0,index,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z
0,50000,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1
1,50001,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0
2,50002,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1
3,50003,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1
4,50004,0,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,54995,0,1,1,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,0
4996,54996,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,1
4997,54997,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1
4998,54998,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1


In [None]:
ss = s0 + s2 + s2

In [None]:
idx = s1['index']
idx

0       50000
1       50001
2       50002
3       50003
4       50004
        ...  
4995    54995
4996    54996
4997    54997
4998    54998
4999    54999
Name: index, Length: 5000, dtype: int64

In [None]:
ss

Unnamed: 0,index,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z
0,150000,3,0,3,0,3,3,0,3,3,0,0,1,1,1,0,3,0,0,0,2,3,3,0,3,0,3
1,150003,0,1,2,0,3,0,1,0,3,3,3,3,0,2,3,3,0,1,0,0,3,3,0,0,0,0
2,150006,0,0,2,3,3,0,3,0,3,0,1,1,1,0,0,3,0,3,3,0,3,0,3,0,0,3
3,150009,3,3,0,1,0,3,3,0,1,1,3,3,3,0,0,3,0,2,0,3,0,2,0,3,0,3
4,150012,0,0,3,0,3,3,0,0,2,0,0,3,3,0,3,0,3,0,3,3,3,2,3,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,164985,0,3,3,0,0,0,1,3,0,0,2,3,3,0,3,0,0,3,0,3,3,0,3,0,3,0
4996,164988,3,1,3,0,3,0,0,0,3,3,0,0,0,0,3,3,3,0,0,0,0,0,0,3,0,3
4997,164991,3,0,0,3,0,3,0,0,0,3,3,0,0,3,3,3,0,0,0,1,3,3,3,0,0,3
4998,164994,2,0,3,0,0,0,1,0,3,3,0,3,2,0,0,0,3,3,0,1,0,0,3,0,0,3


In [None]:
sss = ss / 3 > 0.6

In [None]:
sss['index'] = idx

In [None]:
sss = sss.astype(int)
sss

Unnamed: 0,index,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z
0,50000,1,0,1,0,1,1,0,1,1,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1
1,50001,0,0,1,0,1,0,0,0,1,1,1,1,0,1,1,1,0,0,0,0,1,1,0,0,0,0
2,50002,0,0,1,1,1,0,1,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,1
3,50003,1,1,0,0,0,1,1,0,0,0,1,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1
4,50004,0,0,1,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,54995,0,1,1,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,1,0,1,0,1,0
4996,54996,1,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,1,0,1
4997,54997,1,0,0,1,0,1,0,0,0,1,1,0,0,1,1,1,0,0,0,0,1,1,1,0,0,1
4998,54998,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,0,1,0,0,1


In [None]:
sss.to_csv('/content/drive/MyDrive/dacon_computer_vision/data/submit0221.csv', index=False)