# 1. Load libraries

In [2]:
import warnings
warnings.filterwarnings('ignore')

from glob import glob
import pandas as pd
import numpy as np
from tqdm import tqdm
import cv2

import os
import timm
import random

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
from sklearn.metrics import f1_score, accuracy_score
import time

device = torch.device('cuda')

from dask import delayed, compute
from dask.diagnostics import ProgressBar

# 2. Load dataset

In [3]:
train_png = sorted(glob('open/train/*.png'))
test_png  = sorted(glob('open/test/*.png'))

train_y = pd.read_csv('open/train_df.csv')
train_labels = train_y['label']
label_unique = sorted(np.unique(train_labels))
label_unique = dict(zip(label_unique, range(len(label_unique))))
train_labels = [label_unique[k] for k in train_labels]

In [4]:
def img_load(path):
    img = cv2.imread(path)[:,:,::-1]
    return cv2.resize(img, (512, 512))  # (512, 512, 3)

In [5]:
tasks = [delayed(img_load)(m) for m in train_png]
with ProgressBar():
    train_imgs = compute(*tasks, scheduler='processes')

tasks = [delayed(img_load)(m) for m in test_png]
with ProgressBar():
    test_imgs = compute(*tasks, scheduler='processes')

[########################################] | 100% Completed | 18.1s
[########################################] | 100% Completed | 10.1s


In [22]:
class Custom_dataset(Dataset):
    def __init__(self, img_paths, labels, mode='train'):
        self.img_paths = img_paths
        self.labels    = labels
        self.mode      = mode
    def __len__(self):
        return len(self.img_paths)
    def __getitem__(self, idx):
        img = self.img_paths[idx]
        if self.mode == 'train':
            augmentation = random.randint(0, 2)  # [0, 1, 2] 중 하나
            if augmentation == 1:
                img = img[::-1].copy()
            elif augmentation == 2:
                img = img[:, ::-1].copy()
        img = transforms.ToTensor()(img)
        if self.mode == 'test':
            pass
        
        label = self.labels[idx]
        return img, label

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=88)
    def forward(self, x):
        return self.model(x)

# 3. Training & Evaluation

## 3.1 Training

In [23]:
batch_size = 32
epochs     = 25

In [27]:
train_dataset = Custom_dataset(np.array(train_imgs), np.array(train_labels), mode='train')
train_loader  = DataLoader(train_dataset, shuffle=True, batch_size=batch_size, pin_memory=True)

test_dataset  = Custom_dataset(np.array(test_imgs), np.array(['tmp']*len(test_imgs)), mode='test')
test_loader   = DataLoader(test_dataset, shuffle=False, batch_size=batch_size, pin_memory=True)

In [34]:
def score_function(real, pred):
    return f1_score(real, pred, average='macro')

model     = Network().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
scaler    = torch.cuda.amp.GradScaler()

best = 0
for epoch in range(epochs):
    start = time.time()
    
    train_loss = 0
    train_pred = []
    train_y    = []
    model.train()
    
    for batch in train_loader:
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype=torch.float32, device=device)
        y = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred, y)
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss += loss.item() / len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y    += y.detach().cpu().numpy().tolist()
    train_f1 = score_function(train_y, train_pred)
    TIME = time.time() - start
    print(f'epoch     : {epoch+1}/{epochs} \t\t time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s')
    print(f'TRAIN loss: {train_loss:.5f}   \t\t f1 : {train_f1:.5f}')

epoch     : 1/25 		 time : 50s/1188s
TRAIN loss: 1.03063 		   f1 : 0.21377
epoch     : 2/25 		 time : 50s/1145s
TRAIN loss: 0.58518 		   f1 : 0.34015
epoch     : 3/25 		 time : 50s/1090s
TRAIN loss: 0.40728 		   f1 : 0.50303
epoch     : 4/25 		 time : 49s/1019s
TRAIN loss: 0.31436 		   f1 : 0.58622
epoch     : 5/25 		 time : 49s/979s
TRAIN loss: 0.26344 		   f1 : 0.62194
epoch     : 6/25 		 time : 50s/945s
TRAIN loss: 0.20804 		   f1 : 0.72114
epoch     : 7/25 		 time : 49s/879s
TRAIN loss: 0.20026 		   f1 : 0.75674
epoch     : 8/25 		 time : 50s/844s
TRAIN loss: 0.19013 		   f1 : 0.75369
epoch     : 9/25 		 time : 48s/772s
TRAIN loss: 0.14704 		   f1 : 0.81496
epoch     : 10/25 		 time : 49s/733s
TRAIN loss: 0.11201 		   f1 : 0.84493
epoch     : 11/25 		 time : 49s/689s
TRAIN loss: 0.11730 		   f1 : 0.86175
epoch     : 12/25 		 time : 49s/637s
TRAIN loss: 0.11042 		   f1 : 0.86318
epoch     : 13/25 		 time : 48s/580s
TRAIN loss: 0.09942 		   f1 : 0.86859
epoch     : 14/25 		 time : 49

## 3.2 Evaluation

In [35]:
model.eval()
f_pred = []

with torch.no_grad():
    for batch in test_loader:
        x = torch.tensor(batch[0], dtype=torch.float32, device=device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        f_pred.extend(pred.argmax(1).detach().cpu().numpy().tolist())

In [40]:
label_decoder = {v: k for k, v in label_unique.items()}
f_result      = [label_decoder[result] for result in f_pred]

# 4. Submission

In [41]:
submission = pd.read_csv("open/sample_submission.csv")
submission['label'] = f_result
submission

Unnamed: 0,index,label
0,0,tile-glue_strip
1,1,grid-good
2,2,transistor-bent_lead
3,3,cable-missing_cable
4,4,tile-good
...,...,...
2149,2149,tile-gray_stroke
2150,2150,screw-good
2151,2151,grid-good
2152,2152,cable-combined


In [42]:
submission.to_csv('output/baseline.csv', index=False)

In [50]:
from dacon_submit_api.dacon_submit_api import post_submission_file

result = post_submission_file(
    'output/baseline.csv',
    '137ff236e305f302819b930b3b5b72e948603f23c5249a516c32b536d5187a03', 
    '235894', 
    '어스름한 금요일 밤에',
    'baseline'
)

{'isSubmitted': True, 'detail': 'Success'}
