In [1]:
from torchvision import models
import torch

import os
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize
import matplotlib.pyplot as plt
from itertools import cycle

resnet18_pretrained = models.resnet18(pretrained=True)

In [2]:
num_classes= 18
num_ftrs = resnet18_pretrained.fc.in_features
resnet18_pretrained.fc = nn.Linear(num_ftrs, num_classes)

In [3]:
import json
with open("train.json", "r") as f:
    json_data = json.load(f)
json_data = json_data['train']

In [4]:
class TrainDataset(Dataset):
    def __init__(self, img_paths, transform, json_data):
        self.img_paths = img_paths
        self.transform = transform
        self.json_data = json_data
        
    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        label = json_data[index//7]['img'][index%7]['class']
        
        return image, label

    def __len__(self):
        return len(self.img_paths)

class TrainDataset_60(Dataset):
    def __init__(self, img_paths, transform, json_data):
        self.img_paths = img_paths
        self.transform = transform
        self.json_data = json_data
        
    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        if 'incorrect_mask.jpg' == self.img_paths[index].split('/')[-1]:
            if 'female' in self.img_paths[index].split('/')[-2]:
                label = 11
            else:
                label = 8
        elif 'mask' in self.img_paths[index].split('/')[-1]:
            if 'female' in self.img_paths[index].split('/')[-2]:
                label = 5
            else:
                label = 2
        else:
            if 'female' in self.img_paths[index].split('/')[-2]:
                label = 17
            else:
                label = 4
        return image, label

    def __len__(self):
        return len(self.img_paths)

In [5]:
import numpy as np
import torch

# 중앙을 중심으로 지킬앤 하이드 처럼 좌우에 컷믹스
def rand_bbox(size, lam):
    H = size[2]
    W = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)
    cut_h = int(H * cut_rat)


    cx = np.random.randn() + W//2
    cy = np.random.randn() + H//2

    # 패치의 4점
    bbx1 = np.clip(cx - cut_w // 2, 0, W//2)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W//2)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return int(bbx1), int(bby1), int(bbx2), int(bby2)

In [12]:
from math import gcd
gcd(len(image_paths), len(paths_60))


84

In [13]:
# meta 데이터와 이미지 경로를 불러옵니다.
path_dir = 'input/data/train/images'

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = []
for image in json_data:
    for img in image['img']:
        image_paths.append(img['path'])

paths_60 = []
for image in json_data:
    for img in image['img']:
        if img['class'] == 2 or img['class'] == 5 or img['class'] == 8 or img['class'] == 11 or img['class'] == 14 or img['class'] == 17:
            paths_60.append(img['path'])




transform = transforms.Compose([
    Resize((512, 384), Image.BILINEAR),
    transforms.RandomHorizontalFlip(0.5),
    ToTensor(),
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2)),
])
dataset = TrainDataset(image_paths, transform, json_data)
dataset_60 = TrainDataset_60(paths_60, transform, json_data)

loader = DataLoader(
    dataset,
    batch_size=84, # 
    shuffle=True # shuffle 추가
)

loader2 = DataLoader(
    dataset_60,
    batch_size=84,
    shuffle=True
)

from sklearn.metrics import f1_score
device = torch.device('cuda')
model = resnet18_pretrained.to(device)
model.train()

import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.002)

for epoch in range(50):
    running_loss = 0.0
    epoch_f1 = 0
    n_iter = 0
    for (i, data), (j, data_60) in zip(enumerate(loader, 0),cycle(enumerate(loader2, 0))):
        print(i, j)
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        inputs_60, labels_60 = data_60
        inputs_60 = inputs_60.to(device)
        labels_60 = labels_60.to(device)
        
        # t_ = torch.zeros((100,18))
        # t_[range(100),labels]=1
        # print(t_)
        # t_ = t_.to(device)
        # 일정 확률로 컷믹스
        optimizer.zero_grad()

        if np.random.random() > 0.5 :
            rand_index = torch.randperm(inputs_60.size()[0]) # 패치에 사용할 label
            target_a = labels # 원본 이미지 label
            target_b = labels_60[rand_index] # 패치 이미지 label  
            lam = np.random.beta(1.0, 1.0)     
            bbx1, bby1, bbx2, bby2 = rand_bbox(inputs_60.size(), lam)

                # 원본 데이터에 컷믹스 패치
            inputs[:, :, bbx1:bbx2, bby1:bby2] = inputs_60[rand_index, :, bbx1:bbx2, bby1:bby2]

                # 원본 이미지와 패치 이미지의 넓이 비율
            lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (inputs.size()[-1] * inputs.size()[-2]))

                # 예측은 레이블 1개
            outputs = model(inputs)

                # 원본 이미지의 레이블과 패치 이미지의 레이블에 대해 loss 가중합
            loss = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
            
            # 컷믹스 안하면 그냥 별거 없음
        else:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:
            print('{},{:.5f} loss: {:.3f}'.format(epoch+1, i+1, running_loss/10))
            running_loss=0.0
        n_iter += 1
    print('Finished')
    print('{:.4f}'.format(epoch_f1))

torch.save(model.state_dict(), '/opt/ml/resnet_cutmix_60.pt')

0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
1,10.00000 loss: 0.774
10 10
11 11
12 12
13 13
14 14
15 15
16 0
17 1
18 2
19 3
1,20.00000 loss: 0.534
20 4
21 5
22 6
23 7
24 8
25 9
26 10
27 11
28 12
29 13
1,30.00000 loss: 0.509
30 14
31 15
32 0
33 1
34 2
35 3
36 4
37 5
38 6
39 7
1,40.00000 loss: 0.406
40 8
41 9
42 10
43 11
44 12
45 13
46 14
47 15
48 0
49 1
1,50.00000 loss: 0.402
50 2
51 3
52 4
53 5
54 6
55 7
56 8
57 9
58 10
59 11
1,60.00000 loss: 0.485
60 12
61 13
62 14
63 15
64 0
65 1
66 2
67 3
68 4
69 5
1,70.00000 loss: 0.478
70 6
71 7
72 8
73 9
74 10
75 11
76 12
77 13
78 14
79 15
1,80.00000 loss: 0.392
80 0
81 1
82 2
83 3
84 4
85 5
86 6
87 7
88 8
89 9
1,90.00000 loss: 0.452
90 10
91 11
92 12
93 13
94 14
95 15
96 0
97 1
98 2
99 3
1,100.00000 loss: 0.500
100 4
101 5
102 6
103 7
104 8
105 9
106 10
107 11
108 12
109 13
1,110.00000 loss: 0.437
110 14
111 15
112 0
113 1
114 2
115 3
116 4
117 5
118 6
119 7
1,120.00000 loss: 0.456
120 8
121 9
122 10
123 11
124 12
125 13
126 14
127 15
128 0
129 1
1,1

In [None]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [None]:
# meta 데이터와 이미지 경로를 불러옵니다.
test_dir = '/opt/ml/input/data/eval'
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
transform = transforms.Compose([
    Resize((512, 384), Image.BILINEAR),
    ToTensor(),
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2)),
])
dataset = TestDataset(image_paths, transform)

loader = DataLoader(
    dataset,
    shuffle=False
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in loader:
    with torch.no_grad():
        images = images.to(device)
        pred = model(images)
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'batch_84_RANDOM_epoch_100_resnet_18_cutmix_60.csv'), index=False)

print('test inference is done!')

test inference is done!
