In [1]:
!pip install opencv-python
!pip install gdown
!pip install timm



In [2]:
import gdown

google_path = 'https://drive.google.com/uc?id='
file_id = '1AWOO1awvSGHHKbydWJTmeZ0g5f5rV85I'
output_name = 'open.zip'
gdown.download(google_path+file_id,output_name,quiet=False)

Access denied with the following error:



 	Cannot retrieve the public link of the file. You may need to change
	the permission to 'Anyone with the link', or have had many accesses. 

You may still be able to access the file from the browser:

	 https://drive.google.com/uc?id=1AWOO1awvSGHHKbydWJTmeZ0g5f5rV85I 



In [3]:
!unzip open.zip

Archive:  open.zip
replace open/sample_submission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [4]:
cd open

/home/open


In [5]:
!unzip test.zip
!unzip train.zip

Archive:  test.zip
replace test/20000.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C
Archive:  train.zip
replace train/10000.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


### 라이브러리 불러오기

In [6]:
cd ..

/home


In [7]:
import warnings
warnings.filterwarnings('ignore')

from glob import glob
import pandas as pd
import numpy as np 
from tqdm import tqdm
import cv2

import os
import timm
import random

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
from sklearn.metrics import f1_score, accuracy_score
import time


device = torch.device('cuda')

In [8]:
train_png = sorted(glob('open/train/*.png'))
test_png = sorted(glob('open/test/*.png'))

In [9]:
train_y = pd.read_csv("open/train_df.csv")

train_labels = train_y["label"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

train_labels = [label_unique[k] for k in train_labels]

In [10]:
def img_load(path):
    img = cv2.imread(path)[:,:,::-1]
    img = cv2.resize(img, (512, 512))
    return img

In [11]:
train_imgs = [img_load(m) for m in tqdm(train_png)]
test_imgs = [img_load(n) for n in tqdm(test_png)]

100%|██████████| 4277/4277 [02:31<00:00, 28.22it/s]
100%|██████████| 2154/2154 [01:16<00:00, 28.31it/s]


In [22]:
class Custom_dataset(Dataset):
    def __init__(self, img_paths, labels, mode='train'):
        self.img_paths = img_paths
        self.labels = labels
        self.mode=mode
    def __len__(self):
        return len(self.img_paths)
    def __getitem__(self, idx):
        img = self.img_paths[idx]
        if self.mode=='train':
            augmentation = random.randint(0,2)
            if augmentation==1:
                img = img[::-1].copy()
            elif augmentation==2:
                img = img[:,::-1].copy()
        img = transforms.ToTensor()(img)
        if self.mode=='test':
            pass
        
        label = self.labels[idx]
        return img, label
    
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.model = timm.create_model('efficientnet_b1', pretrained=True, num_classes=88)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [23]:
batch_size = 8
epochs = 150

# Train
train_dataset = Custom_dataset(np.array(train_imgs), np.array(train_labels), mode='train')
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

# Test
test_dataset = Custom_dataset(np.array(test_imgs), np.array(["tmp"]*len(test_imgs)), mode='test')
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

### 모델 학습

In [None]:
def score_function(real, pred):
    score = f1_score(real, pred, average="macro")
    return score

model = Network().to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
scaler = torch.cuda.amp.GradScaler() 



best=0
for epoch in range(epochs):
    start=time.time()
    train_loss = 0
    train_pred=[]
    train_y=[]
    model.train()
    for batch in (train_loader):
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype=torch.float32, device=device)
        y = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred, y)


        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss += loss.item()/len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
        
    
    train_f1 = score_function(train_y, train_pred)

    TIME = time.time() - start
    print(f'epoch : {epoch+1}/{epochs}    time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s')
    print(f'TRAIN    loss : {train_loss:.5f}    f1 : {train_f1:.5f}')

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b1-533bc792.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b1-533bc792.pth


epoch : 1/150    time : 88s/13040s
TRAIN    loss : 1.11478    f1 : 0.17999
epoch : 2/150    time : 88s/12979s
TRAIN    loss : 0.62227    f1 : 0.31980
epoch : 3/150    time : 88s/12900s
TRAIN    loss : 0.51545    f1 : 0.40152
epoch : 4/150    time : 88s/12814s
TRAIN    loss : 0.42573    f1 : 0.48138
epoch : 5/150    time : 88s/12790s
TRAIN    loss : 0.37334    f1 : 0.53786
epoch : 6/150    time : 87s/12599s
TRAIN    loss : 0.33615    f1 : 0.59699
epoch : 7/150    time : 88s/12565s
TRAIN    loss : 0.31997    f1 : 0.58258
epoch : 8/150    time : 89s/12688s
TRAIN    loss : 0.28047    f1 : 0.63491
epoch : 9/150    time : 88s/12457s
TRAIN    loss : 0.25685    f1 : 0.65594
epoch : 10/150    time : 87s/12136s
TRAIN    loss : 0.21968    f1 : 0.73339
epoch : 11/150    time : 88s/12287s
TRAIN    loss : 0.22244    f1 : 0.73097
epoch : 12/150    time : 87s/12060s
TRAIN    loss : 0.18821    f1 : 0.76888
epoch : 13/150    time : 87s/11972s
TRAIN    loss : 0.17503    f1 : 0.76471
epoch : 14/150    tim

### 추론

In [None]:
model.eval()
f_pred = []

with torch.no_grad():
    for batch in (test_loader):
        x = torch.tensor(batch[0], dtype = torch.float32, device = device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        f_pred.extend(pred.argmax(1).detach().cpu().numpy().tolist())

In [None]:
label_decoder = {val:key for key, val in label_unique.items()}
f_result = [label_decoder[result] for result in f_pred]

### 제출물 생성

In [None]:
submission = pd.read_csv("open/sample_submission.csv")
submission["label"] = f_result
submission

In [None]:
submission.to_csv("baseline.csv", index = False)