In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd /content/drive/MyDrive/workspace/lotte/

/content/drive/MyDrive/workspace/lotte


In [None]:
!unzip ./dataset/LPD_competition.zip -d /content

In [None]:
!pip install efficientnet_pytorch

## 라이브러리 import

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
from efficientnet_pytorch import EfficientNet
from tqdm import tqdm
import os
from tqdm.auto import tqdm
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import models
from glob import glob
import cv2
from PIL import Image
import torch.nn.functional as F
from sklearn.model_selection import KFold, StratifiedKFold
from torch.optim.lr_scheduler import ReduceLROnPlateau 

## Config Setting

In [None]:
#CONFIG
torch.manual_seed(777)
BATCH_SIZE=50
EPOCHS=30
LEARNING_RATE=5e-4
#DEVICE
print(f'PyTorch Version : [{torch.__version__}]')
device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'Device : [{device}]')

## Custom Datasets

In [None]:
class LotteDataset(Dataset):
  def __init__(self, data_root, train_mode):
    super(LotteDataset, self).__init__()
    self.train_mode=train_mode

    if self.train_mode==False:
      self.img_list = glob(os.path.join(data_root, '*.jpg'))
      self.img_list.sort(key=lambda x:int(x.split('/')[3][:-4]))
    else:
      self.img_list = glob(os.path.join(data_root, '*/*.jpg'))
      self.train_y=[]
      for img_path in self.img_list:
        self.train_y.append(int(img_path.split('/')[3]))
        
    self.len = len(self.img_list)

  def __getitem__(self, index):
    img_path = self.img_list[index]
    if self.train_mode:
      label=int(img_path.split('/')[3])
    # Image Loading
    img = Image.open(img_path)

    if self.train_mode:
      return img,label
    else:
      return img

  def __len__(self):
    return self.len

In [None]:
class MapTransform(Dataset):
    def __init__(self, dataset, transform, train_mode):
        self.dataset = dataset
        self.transform=transform
        self.train_mode=train_mode

    def __getitem__(self, index):
        if self.train_mode:
          return self.transform(self.dataset[index][0]), self.dataset[index][1]
        else:
          return self.transform(self.dataset[index])

    def __len__(self):
        return len(self.dataset)

In [None]:
train_transforms=transforms.Compose([
    transforms.RandomChoice([
        transforms.ColorJitter(brightness=(1,1.1)),
        transforms.ColorJitter(contrast=0.1), 
        transforms.ColorJitter(saturation=0.1),
    ]),
    transforms.RandomChoice([
        transforms.RandomAffine(degrees=15, translate=(0.2, 0.2), scale=(0.8, 1.2), shear=10, resample=Image.BILINEAR,fill=255),
        transforms.RandomCrop((224,224)),
    ]),
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])
test_transforms=transforms.Compose([transforms.ToTensor(),
                                    transforms.Resize((224,224)),
                                    transforms.Normalize([0.485, 0.456, 0.406],
                                                          [0.229, 0.224, 0.225])])

all_data=LotteDataset('/content/train',train_mode=True)
test_data=LotteDataset('/content/test',train_mode=False)

trans_test_data=MapTransform(test_data,test_transforms,train_mode=False)
test_iter=DataLoader(trans_test_data,batch_size=BATCH_SIZE,shuffle=False,num_workers=2)

### CutMix

In [None]:
def rand_bbox(W, H, lam):
    cut_rat = torch.sqrt(1.0 - lam)
    cut_w = (W * cut_rat).type(torch.long)
    cut_h = (H * cut_rat).type(torch.long)
    # uniform
    cx = torch.randint(W, (1,)).to(device)
    cy = torch.randint(H, (1,)).to(device)
    x1 = torch.clamp(cx - cut_w // 2, 0, W)
    y1 = torch.clamp(cy - cut_h // 2, 0, H)
    x2 = torch.clamp(cx + cut_w // 2, 0, W)
    y2 = torch.clamp(cy + cut_h // 2, 0, H)
    return x1, y1, x2, y2


def cutmix_data(x, y, alpha=1.0, p=0.5):
    if np.random.random() > p:
        return x, y, torch.zeros_like(y), 1.0
    W, H = x.size(2), x.size(3)
    shuffle = torch.randperm(x.size(0)).to(device)
    cutmix_x = x

    lam = torch.distributions.beta.Beta(alpha, alpha).sample().to(device)

    x1, y1, x2, y2 = rand_bbox(W, H, lam)
    cutmix_x[:, :, x1:x2, y1:y2] = x[shuffle, :, x1:x2, y1:y2]
    # Adjust lambda to match pixel ratio
    lam = 1 - ((x2 - x1) * (y2 - y1) / float(W * H)).item()
    y_a, y_b = y, y[shuffle]
    return cutmix_x, y_a, y_b, lam

### Label Smooth

In [None]:
def loss_fn(outputs, targets):
    if len(targets.shape) == 1:
        return F.cross_entropy(outputs, targets)
    else:
        return torch.mean(torch.sum(-targets * F.log_softmax(outputs, dim=1), dim=1))

def label_smooth_loss_fn(outputs, targets, epsilon=0.1):
    onehot = F.one_hot(targets, 1000).float().to(device)
    targets = (1 - epsilon) * onehot + torch.ones(onehot.shape).to(device) * epsilon / 1000
    return loss_fn(outputs, targets)

## Model Train

In [None]:
def func_eval(Model,data_iter,loss):
    with torch.no_grad():
      Model.eval()
      n_total, n_correct = 0,0
      loss_val_sum=0
      print("(Train or Validation) Data Testing....\n")
      for imgs, labels in tqdm(iter(data_iter)):
        imgs, labels = imgs.to(device), labels.to(device)
        
        model_pred=Model(imgs)

        loss_out=loss(model_pred,labels)
        loss_val_sum+=loss_out

        _, y_pred=torch.max(model_pred.data,1)
        n_correct+=(y_pred==labels).sum().item()
        n_total+=imgs.size(0)
      val_accr=(n_correct/n_total)
      Model.train()
      loss_val_avg=loss_val_sum/len(data_iter)
    print("Testing Done.\n")
    return val_accr,loss_val_avg

def get_submission(Model,data_iter,epoch,fold):
  with torch.no_grad():
    Model.eval()
    pred_label=[]
    print("Final Testing....\n")
    for imgs in tqdm(iter(test_iter)):
      model_pred=Model(imgs.to(device))

      _, y_pred=torch.max(model_pred.data,1)
      pred_label.extend(y_pred.tolist())

  Model.train()

  submission = pd.read_csv('./dataset/sample.csv', encoding = 'utf-8')
  submission['prediction'] = pred_label
  submission.to_csv('./submission_Efficient'+str(fold)+'_'+str(epoch)+'.csv', index = False)

In [None]:
folds=StratifiedKFold(n_splits=4,shuffle=True)
f = open("./efficient04_trainlog.txt", 'w')

for current_fold,(train_idx, vali_idx) in enumerate(folds.split(all_data,all_data.train_y)):
  Model = EfficientNet.from_pretrained('efficientnet-b4')
  # Model freeze
  for m in list(Model.children())[:-2]:
    for param in m.parameters():
      param.requires_grad=False
  Model.eval()
  train_data=torch.utils.data.Subset(all_data,train_idx)
  vali_data=torch.utils.data.Subset(all_data,vali_idx)

  trans_train_data=MapTransform(train_data,train_transforms,train_mode=True)
  trans_vali_data=MapTransform(vali_data,test_transforms,train_mode=True)

  train_iter=DataLoader(trans_train_data,batch_size=BATCH_SIZE,shuffle=True,num_workers=2)
  vali_iter=DataLoader(trans_vali_data,batch_size=BATCH_SIZE,shuffle=True,num_workers=2)

  scaler = torch.cuda.amp.GradScaler()
  optimizer = optim.Adam(Model.parameters(), lr=LEARNING_RATE)
  scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=1,threshold_mode='abs',min_lr=1e-8, verbose=True)
  loss=label_smooth_loss_fn
  print(f'::::::::: Fold : {current_fold} :::::::::::\n')
  f.write(f'::::::::: Fold : {current_fold} :::::::::::\n')
  Model.train()
  Model.to(device)
  prev_vali_loss=100
  best_vali_loss=100
  flag=0
  over_check=0
  for epoch in range(EPOCHS) :
    if epoch==3:
      for m in Model.children():
        for param in m.parameters():
          param.requires_grad=True
      #Feature 추출 Freeze
      for m in list(Model.children())[:-7]:
        for param in m.parameters():
          param.requires_grad=False
    loss_val_sum=0
    for imgs, labels in tqdm(iter(train_iter)):
      # Cut mix P=0.5
      imgs, labels = imgs.to(device), labels.to(device)
      imgs, labels_a, labels_b, lam = cutmix_data(imgs, labels)

      # optimizer.zero_grad()
      for param in Model.parameters():
        param.grad = None
      model_pred=Model(imgs)
      loss_out = lam * loss(model_pred, labels_a) + (1 - lam) * loss(model_pred, labels_b)

      scaler.scale(loss_out).backward()
      scaler.step(optimizer)
      scaler.update()

      loss_val_sum+=loss_out

    loss_val_avg=loss_val_sum/len(train_iter)
    vali_accr,vali_loss=func_eval(Model,vali_iter,loss_fn)
    if epoch>19:
      get_submission(Model,test_iter,epoch,current_fold)
      print("epoch:[%d] train loss:[%.5f] vali loss:[%.5f] vali_accr:[%.5f]\n"%(epoch,loss_val_avg,vali_loss,vali_accr))
      f.write("epoch:[%d] train loss:[%.5f] vali loss:[%.5f] vali_accr:[%.5f]\n"%(epoch,loss_val_avg,vali_loss,vali_accr))
      print("Model Save....\n")
      torch.save({'model_state_dict': Model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()}, './checkpoint_efficient04/'+str(current_fold)+'_efficient04_epoch_'+str(epoch)+'.tar')
    else:
      print("epoch:[%d] train loss:[%.5f] vali loss:[%.5f] vali_accr:[%.5f]\n"%(epoch,loss_val_avg,vali_loss,vali_accr))
      f.write("epoch:[%d] train loss:[%.5f] vali loss:[%.5f] vali_accr:[%.5f]\n"%(epoch,loss_val_avg,vali_loss,vali_accr))
    scheduler.step(vali_loss) # LR Scheduler

    if prev_vali_loss<vali_loss: #Stop Train
      flag+=1
      if flag==10 : 
        print("Stop Training...\n")
        break
    if best_vali_loss>vali_loss:
      flag=0
      best_vali_loss=vali_loss
    prev_vali_loss=vali_loss
f.close()