In [1]:
import warnings
warnings.filterwarnings('ignore')

from glob import glob
import pandas as pd
import numpy as np 
from tqdm import tqdm
import cv2

import os
import timm
import random

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
from sklearn.metrics import f1_score, accuracy_score
import time

from sklearn.model_selection import StratifiedKFold
device = torch.device('cuda')

In [2]:
import timm
from pprint import pprint
model_names = timm.list_models(pretrained=True)
pprint(model_names)

['adv_inception_v3',
 'bat_resnext26ts',
 'beit_base_patch16_224',
 'beit_base_patch16_224_in22k',
 'beit_base_patch16_384',
 'beit_large_patch16_224',
 'beit_large_patch16_224_in22k',
 'beit_large_patch16_384',
 'beit_large_patch16_512',
 'botnet26t_256',
 'cait_m36_384',
 'cait_m48_448',
 'cait_s24_224',
 'cait_s24_384',
 'cait_s36_384',
 'cait_xs24_384',
 'cait_xxs24_224',
 'cait_xxs24_384',
 'cait_xxs36_224',
 'cait_xxs36_384',
 'coat_lite_mini',
 'coat_lite_small',
 'coat_lite_tiny',
 'coat_mini',
 'coat_tiny',
 'convit_base',
 'convit_small',
 'convit_tiny',
 'convmixer_768_32',
 'convmixer_1024_20_ks9_p14',
 'convmixer_1536_20',
 'convnext_base',
 'convnext_base_384_in22ft1k',
 'convnext_base_in22ft1k',
 'convnext_base_in22k',
 'convnext_large',
 'convnext_large_384_in22ft1k',
 'convnext_large_in22ft1k',
 'convnext_large_in22k',
 'convnext_small',
 'convnext_tiny',
 'convnext_xlarge_384_in22ft1k',
 'convnext_xlarge_in22ft1k',
 'convnext_xlarge_in22k',
 'crossvit_9_240',
 'crossv

In [2]:
path = '/home/'

In [3]:
train_png = sorted(glob(path + 'open/train/*.png'))
test_png = sorted(glob(path + 'open/test/*.png'))

In [4]:
len(train_png), len(test_png)

(4277, 2154)

In [5]:
train_y = pd.read_csv(path +"open/train_df.csv")

train_labels = train_y["label"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

train_labels = [label_unique[k] for k in train_labels]

In [6]:
def img_load(path):
    img = cv2.imread(path)[:,:,::-1]
    img = cv2.resize(img, (384, 384),interpolation = cv2.INTER_AREA)
    return img

In [7]:
train_imgs = [img_load(m) for m in tqdm(train_png)]
test_imgs = [img_load(n) for n in tqdm(test_png)]

100%|██████████| 4277/4277 [03:18<00:00, 21.60it/s]
100%|██████████| 2154/2154 [01:38<00:00, 21.92it/s]


In [8]:
np.save(path + 'train_imgs_384', np.array(train_imgs))
np.save(path + 'test_imgs_384', np.array(test_imgs))

In [9]:
train_imgs = np.load(path + 'train_imgs_384.npy')
test_imgs = np.load(path + 'test_imgs_384.npy')

In [10]:
meanRGB = [np.mean(x, axis=(0,1)) for x in train_imgs]
stdRGB = [np.std(x, axis=(0,1)) for x in train_imgs]

meanR = np.mean([m[0] for m in meanRGB])/255
meanG = np.mean([m[1] for m in meanRGB])/255
meanB = np.mean([m[2] for m in meanRGB])/255

stdR = np.mean([s[0] for s in stdRGB])/255
stdG = np.mean([s[1] for s in stdRGB])/255
stdB = np.mean([s[2] for s in stdRGB])/255

print("train 평균",meanR, meanG, meanB)
print("train 표준편차",stdR, stdG, stdB)

train 평균 0.4330380901867049 0.4034575319032911 0.39415050509784405
train 표준편차 0.1815717110252788 0.17403455556798705 0.16323395055036488


In [11]:
meanRGB = [np.mean(x, axis=(0,1)) for x in test_imgs]
stdRGB = [np.std(x, axis=(0,1)) for x in test_imgs]

meanR = np.mean([m[0] for m in meanRGB])/255
meanG = np.mean([m[1] for m in meanRGB])/255
meanB = np.mean([m[2] for m in meanRGB])/255

stdR = np.mean([s[0] for s in stdRGB])/255
stdG = np.mean([s[1] for s in stdRGB])/255
stdB = np.mean([s[2] for s in stdRGB])/255

print("test 평균",meanR, meanG, meanB)
print("test 표준편차",stdR, stdG, stdB)

test 평균 0.41825619520929724 0.3931011906330291 0.386631764639131
test 표준편차 0.19505524270747931 0.19005280951759498 0.18053225852732663


In [12]:
class Custom_dataset(Dataset):
    def __init__(self, img_paths, labels, mode='train'):
        self.img_paths = img_paths
        self.labels = labels
        self.mode=mode
    def __len__(self):
        return len(self.img_paths)
    def __getitem__(self, idx):
        img = self.img_paths[idx]
        if self.mode == 'train':
          train_transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean = [0.433038, 0.403458, 0.394151],
                                     std = [0.181572, 0.174035, 0.163234]),
                transforms.RandomAffine((-45, 45)),
                
            ])
          img = train_transform(img)
        if self.mode == 'test':
          test_transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean = [0.418256, 0.393101, 0.386632],
                                     std = [0.195055, 0.190053, 0.185323])
            ])
          img = test_transform(img)

        
        label = self.labels[idx]
        return img, label
    
class Network(nn.Module):
    def __init__(self,mode = 'train'):
        super(Network, self).__init__()
        self.mode = mode
        if self.mode == 'train':
          self.model = timm.create_model('densenet201', pretrained=True, num_classes=88)
        if self.mode == 'test':
          self.model = timm.create_model('densenet201', pretrained=True, num_classes=88)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [13]:
def score_function(real, pred):
    score = f1_score(real, pred, average="macro")
    return score

In [14]:
def main(seed = 2022):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = True
    
main(2022)

In [None]:
import gc

cv = StratifiedKFold(n_splits = 5, random_state = 2022,shuffle=True)
batch_size = 16
epochs = 100
pred_ensemble = []


for idx, (train_idx, val_idx) in enumerate(cv.split(train_imgs, np.array(train_labels))):
  print("----------fold_{} start!----------".format(idx))
  t_imgs, val_imgs = train_imgs[train_idx],  train_imgs[val_idx]
  t_labels, val_labels = np.array(train_labels)[train_idx], np.array(train_labels)[val_idx]

  # Train
  train_dataset = Custom_dataset(np.array(t_imgs), np.array(t_labels), mode='train')
  train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

  # Val
  val_dataset = Custom_dataset(np.array(val_imgs), np.array(val_labels), mode='test')
  val_loader = DataLoader(val_dataset, shuffle=True, batch_size=batch_size)

  gc.collect()
  torch.cuda.empty_cache()
  best=0

  model = Network().to(device)

  optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay = 1e-3)
  criterion = nn.CrossEntropyLoss()
  scaler = torch.cuda.amp.GradScaler()  

  best_f1 = 0
  early_stopping = 0
  for epoch in range(epochs):
    start=time.time()
    train_loss = 0
    train_pred=[]
    train_y=[]
    model.train()
    for batch in (train_loader):
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype=torch.float32, device=device)
        y = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred, y)


        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss += loss.item()/len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
    train_f1 = score_function(train_y, train_pred)
    state_dict= model.state_dict()
    model.eval()
    with torch.no_grad():
      val_loss = 0 
      val_pred = []
      val_y = []
      

      for batch in (val_loader):
        x_val = torch.tensor(batch[0], dtype = torch.float32, device = device)
        y_val = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred_val = model(x_val)
        loss_val = criterion(pred_val, y_val)

        val_loss += loss_val.item()/len(val_loader)
        val_pred += pred_val.argmax(1).detach().cpu().numpy().tolist()
        val_y += y_val.detach().cpu().numpy().tolist()
      val_f1 = score_function(val_y, val_pred)

      if val_f1 > best_f1:
        best_epoch = epoch
        best_loss = val_loss
        best_f1 = val_f1
        early_stopping = 0

        torch.save({'epoch':epoch,
                    'state_dict':state_dict,
                    'optimizer': optimizer.state_dict(),
                    'scaler': scaler.state_dict(),
             }, path +'best_model_{}.pth'.format(idx))
        print('-----------------SAVE:{} epoch----------------'.format(best_epoch+1))
      else:
          early_stopping += 1

            # Early Stopping
      if early_stopping == 20:
        TIME = time.time() - start
        print(f'epoch : {epoch+1}/{epochs}    time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s')
        print(f'TRAIN    loss : {train_loss:.5f}    f1 : {train_f1:.5f}')
        print(f'Val    loss : {val_loss:.5f}    f1 : {val_f1:.5f}')
        break

    TIME = time.time() - start
    print(f'epoch : {epoch+1}/{epochs}    time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s')
    print(f'TRAIN    loss : {train_loss:.5f}    f1 : {train_f1:.5f}')
    print(f'Val    loss : {val_loss:.5f}    f1 : {val_f1:.5f}')

----------fold_0 start!----------


Downloading: "https://download.pytorch.org/models/densenet201-c1103571.pth" to /root/.cache/torch/hub/checkpoints/densenet201-c1103571.pth


-----------------SAVE:1 epoch----------------
epoch : 1/100    time : 86s/8537s
TRAIN    loss : 1.18249    f1 : 0.15039
Val    loss : 0.68536    f1 : 0.18115
-----------------SAVE:2 epoch----------------
epoch : 2/100    time : 81s/7973s
TRAIN    loss : 0.65202    f1 : 0.19735
Val    loss : 0.54040    f1 : 0.30026
-----------------SAVE:3 epoch----------------
epoch : 3/100    time : 83s/8070s
TRAIN    loss : 0.48170    f1 : 0.34018
Val    loss : 0.52257    f1 : 0.35155
-----------------SAVE:4 epoch----------------
epoch : 4/100    time : 83s/7926s
TRAIN    loss : 0.38577    f1 : 0.44144
Val    loss : 0.38071    f1 : 0.49238
-----------------SAVE:5 epoch----------------
epoch : 5/100    time : 81s/7655s
TRAIN    loss : 0.33787    f1 : 0.51555
Val    loss : 0.38526    f1 : 0.49780
-----------------SAVE:6 epoch----------------
epoch : 6/100    time : 81s/7631s
TRAIN    loss : 0.26477    f1 : 0.60912
Val    loss : 0.29453    f1 : 0.55905
-----------------SAVE:7 epoch----------------
epoch 

epoch : 22/100    time : 84s/6531s
TRAIN    loss : 0.06506    f1 : 0.94655
Val    loss : 0.27854    f1 : 0.73190
-----------------SAVE:23 epoch----------------
epoch : 23/100    time : 84s/6472s
TRAIN    loss : 0.04638    f1 : 0.95056
Val    loss : 0.14616    f1 : 0.83123
epoch : 24/100    time : 83s/6308s
TRAIN    loss : 0.03866    f1 : 0.95094
Val    loss : 0.21470    f1 : 0.70688
epoch : 25/100    time : 84s/6322s
TRAIN    loss : 0.04284    f1 : 0.95340
Val    loss : 0.21134    f1 : 0.74819
epoch : 26/100    time : 81s/5997s
TRAIN    loss : 0.03063    f1 : 0.97021
Val    loss : 0.19356    f1 : 0.77036
epoch : 27/100    time : 83s/6042s
TRAIN    loss : 0.06733    f1 : 0.92847
Val    loss : 0.25325    f1 : 0.72929
epoch : 28/100    time : 79s/5681s
TRAIN    loss : 0.04373    f1 : 0.96668
Val    loss : 0.22546    f1 : 0.70782
epoch : 29/100    time : 82s/5847s
TRAIN    loss : 0.04323    f1 : 0.94911
Val    loss : 0.18535    f1 : 0.78369
epoch : 30/100    time : 80s/5601s
TRAIN    loss 

epoch : 45/100    time : 82s/4517s
TRAIN    loss : 0.02153    f1 : 0.97669
Val    loss : 0.18324    f1 : 0.77036
epoch : 46/100    time : 82s/4403s
TRAIN    loss : 0.03459    f1 : 0.96543
Val    loss : 0.17500    f1 : 0.78847
epoch : 47/100    time : 82s/4323s
TRAIN    loss : 0.02182    f1 : 0.98526
Val    loss : 0.13335    f1 : 0.85430
epoch : 48/100    time : 82s/4288s
TRAIN    loss : 0.01840    f1 : 0.98940
Val    loss : 0.13099    f1 : 0.81958
epoch : 49/100    time : 81s/4147s
TRAIN    loss : 0.02254    f1 : 0.98220
Val    loss : 0.15070    f1 : 0.84253
epoch : 50/100    time : 82s/4102s
TRAIN    loss : 0.03264    f1 : 0.97161
Val    loss : 0.14378    f1 : 0.81692
epoch : 51/100    time : 81s/3972s
TRAIN    loss : 0.01553    f1 : 0.97621
Val    loss : 0.10745    f1 : 0.88486
epoch : 52/100    time : 81s/3912s
TRAIN    loss : 0.00583    f1 : 0.99637
Val    loss : 0.10668    f1 : 0.86783
epoch : 53/100    time : 81s/3823s
TRAIN    loss : 0.01878    f1 : 0.97613
Val    loss : 0.13595

In [16]:
pred_ensemble = []
batch_size = 16
# Test
test_dataset = Custom_dataset(np.array(test_imgs), np.array(["tmp"]*len(test_imgs)), mode='test')
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

# for i in range(5):
model_test = Network(mode = 'test').to(device)
model_test.load_state_dict(torch.load((path+'best_model_2.pth'))['state_dict'])
model_test.eval()
pred_prob = []
with torch.no_grad():
    for batch in (test_loader):
        x = torch.tensor(batch[0], dtype = torch.float32, device = device)
        with torch.cuda.amp.autocast():
            pred = model_test(x)
            pred_prob.extend(pred.detach().cpu().numpy())
    pred_ensemble.append(pred_prob)

In [31]:
pred_ensemble = []
batch_size = 16
# Test
test_dataset = Custom_dataset(np.array(test_imgs), np.array(["tmp"]*len(test_imgs)), mode='test')
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

model_test = Network(mode = 'test').to(device)
model_test.load_state_dict(torch.load((path+'best_model_2.pth'))['state_dict'])
model_test.eval()
f_pred = []

with torch.no_grad():
    for batch in (test_loader):
        x = torch.tensor(batch[0], dtype = torch.float32, device = device)
        with torch.cuda.amp.autocast():
            pred = model_test(x)
        f_pred.extend(pred.argmax(1).detach().cpu().numpy().tolist())

In [28]:
pred = np.array(pred_ensemble)
f_pred = np.array(pred).argmax(1).tolist()

In [29]:
pred

array([[[ -7.137, -14.8  , -12.61 , ..., -10.11 ,  -5.582, -15.36 ],
        [ -7.59 ,  -7.52 ,  -7.71 , ...,  -8.266,  -8.06 ,  -7.875],
        [-10.586, -12.516, -10.02 , ..., -13.58 , -12.875, -11.016],
        ...,
        [ -6.566,  -6.1  ,  -6.66 , ...,  -6.934,  -6.81 ,  -7.348],
        [-10.3  , -10.78 ,  -8.3  , ..., -12.32 , -13.586, -11.13 ],
        [ -7.65 ,  -9.12 ,  -6.65 , ...,  -5.38 ,  -1.095,  -1.917]]],
      dtype=float16)

In [32]:
len(f_pred)

2154

In [33]:
label_decoder = {val:key for key, val in label_unique.items()}

In [35]:
f_result = [label_decoder[result] for result in f_pred]

In [36]:
submission = pd.read_csv(path + "open/sample_submission.csv")

submission["label"] = f_result

submission

Unnamed: 0,index,label
0,0,tile-glue_strip
1,1,grid-good
2,2,transistor-good
3,3,tile-gray_stroke
4,4,tile-good
...,...,...
2149,2149,tile-gray_stroke
2150,2150,screw-good
2151,2151,grid-good
2152,2152,cable-poke_insulation


In [37]:
submission.to_csv(path + "densenet201_2_epoch100.csv", index = False)

In [None]:
# d2b19ece8c7374053ee1fd80cfe419ddfc640c01f9ebe4cbd5caeb9f1906974a

from dacon_submit_api import dacon_submit_api 

result = dacon_submit_api.post_submission_file(
'/home/densenet201_2_epoch100.csv', 
'e134e26bd4db1327ed9caad5c47c3d2fc9f161527e3829b0c593ebc832597e1a', 
'235894', 
'ideal9', 
'densenet201_BS16_2' )

# https://www.dacon.io/competitions/official/235894/overview/rules

### 모델 학습

사전 학습 모델의 성능 파악을 할 때 Fold 학습은 실행 시간이 오래걸려서 fold를 나누지 않은 데이터에 대해서 학습을 진행하고 성능을 비교하였습니다.

In [18]:
batch_size = 16
epochs = 30

# Train
train_dataset = Custom_dataset(np.array(train_imgs), np.array(train_labels), mode='train')
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

# Test
test_dataset = Custom_dataset(np.array(test_imgs), np.array(["tmp"]*len(test_imgs)), mode='test')
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [19]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
def score_function(real, pred):
    score = f1_score(real, pred, average="macro")
    return score

model = Network().to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5, weight_decay = 1e-3)
criterion = nn.CrossEntropyLoss()
scaler = torch.cuda.amp.GradScaler() 

batch_size = 16
epochs = 30

best=0
for epoch in range(epochs):
    start=time.time()
    train_loss = 0
    train_pred=[]
    train_y=[]
    model.train()
    for batch in (train_loader):
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype=torch.float32, device=device)
        y = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred, y)


        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss += loss.item()/len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
        
    
    train_f1 = score_function(train_y, train_pred)

    TIME = time.time() - start
    print(f'epoch : {epoch+1}/{epochs}    time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s')
    print(f'TRAIN    loss : {train_loss:.5f}    f1 : {train_f1:.5f}')

### 추론

In [80]:
model.eval()
f_pred = []
pred_prob = []

with torch.no_grad():
    for batch in (test_loader):
        x = torch.tensor(batch[0], dtype = torch.float32, device = device)
        with torch.cuda.amp.autocast():
            pred = model(x)
            pred_prob.extend(pred.detach().cpu().numpy())
        f_pred.extend(pred.argmax(1).detach().cpu().numpy().tolist())

In [None]:
label_decoder = {val:key for key, val in label_unique.items()}

f_result = [label_decoder[result] for result in f_pred]

### 제출물 생성

In [None]:
submission = pd.read_csv(path + "open/sample_submission.csv")

submission["label"] = f_result

submission

In [None]:
submission.to_csv(path + "VGG16_norm.csv", index = False)