#Initial Settings

In [None]:
from google.colab import drive 
drive.mount('/content/gdrive/')
path='/content/gdrive/My Drive/Colab Notebooks/06_Anomaly/'

import os
os.chdir(path)

!pip install timm
!pip install https://github.com/ufoym/imbalanced-dataset-sampler/archive/master.zip

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).
Collecting https://github.com/ufoym/imbalanced-dataset-sampler/archive/master.zip
  Using cached https://github.com/ufoym/imbalanced-dataset-sampler/archive/master.zip


#Library

In [None]:
import torch.optim as optim
import albumentations as A
import torch.nn as nn
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np 
import random
import torch
import timm
import cv2
import os
import time
import sys

from torchsampler.imbalanced import ImbalancedDatasetSampler
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Dataset, DataLoader
from albumentations.pytorch import ToTensor
from glob import glob
from tqdm import tqdm
from sklearn.metrics import f1_score

device = torch.device('cuda')

# 학습을 위한 Hyperparameter 셋팅

In [None]:
seed = 51
random.seed(seed)
torch.manual_seed(seed)

lr = 1e-3
folds = 5
batch_size = 16
epochs = 70

resized_image = 512
crop_image = 498

# image augmentation

In [None]:
albumentations_transform = A.Compose([
    A.RandomCrop(crop_image, crop_image),
    A.HorizontalFlip(p=0.5), # Same with transforms.RandomHorizontalFlip()
    A.VerticalFlip(p=0.5),
    A.Rotate(p=0.5),
    A.GridDistortion(always_apply=False, p=0.5, num_steps=10, distort_limit=(-0.2, 0.2), interpolation=2, border_mode=0),
    A.Cutout(always_apply=False, p=0.5, num_holes=40, max_h_size=10, max_w_size=10),
    ToTensor()
])
albumentations_transform_pred = A.Compose([
    ToTensor()
])

#Custom Dataset & Model

In [None]:
class Custom_dataset(Dataset):
    def __init__(self, img_paths, labels, mode='train',transform = None):
        self.img_paths = img_paths
        self.labels = labels
        self.mode = mode
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img = self.img_paths[idx]
        if self.transform:
            img = self.transform(image=img)['image']

        label = self.labels[idx]
        return img, label

    def get_labels(self):
        return self.labels

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.model = timm.create_model('efficientnet_b4', pretrained=True, num_classes=88)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
def img_load(path):
    img = cv2.imread(path)[:,:,::-1]
    img = cv2.resize(img, (resized_image, resized_image))
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    return img

In [None]:
train_png = sorted(glob('open/train/*.png'))

train_y = pd.read_csv("open/train_df.csv")
train_labels = train_y["label"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

train_labels = [label_unique[k] for k in train_labels]
train_imgs = [img_load(m) for m in tqdm(train_png)]

train_dataset = Custom_dataset(train_imgs, train_labels, mode='train',transform=albumentations_transform)

100%|██████████| 4277/4277 [03:05<00:00, 23.10it/s]


# f1 Score function


In [None]:
def score_function(real, pred):
    score = f1_score(real, pred, average="macro")
    return score

# Train (5 fold)

In [None]:
kfold = StratifiedKFold(n_splits=folds,shuffle=True,random_state = seed)

k_loss_plot, k_val_loss_plot = [],[]
for fold, (train_idx, valid_idx) in enumerate(kfold.split(train_dataset,train_labels)):
  sub_train_imgs = [train_imgs[i] for i in train_idx]
  sub_train_labs = [train_labels[i] for i in train_idx]
  sub_train_dataset = Custom_dataset(sub_train_imgs,sub_train_labs, mode='train',transform = albumentations_transform )

  sub_valid_imgs = [train_imgs[i] for i in valid_idx]
  sub_valid_labs = [train_labels[i] for i in valid_idx]
  valid_dataset = Custom_dataset(sub_valid_imgs, sub_valid_labs, mode='test',transform =albumentations_transform_pred )

  train_subsampler = ImbalancedDatasetSampler(sub_train_dataset)

  train_loader = DataLoader(sub_train_dataset, batch_size=batch_size,sampler = train_subsampler)
  valid_loader = DataLoader(valid_dataset, batch_size=batch_size)

  model = Network().to(device)

  optimizer = torch.optim.Adam(model.parameters(), lr=lr)
  criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
  scaler = torch.cuda.amp.GradScaler() 

  val_loss_plot, val_score_plot = [], []
  best=0
  for epoch in range(epochs):
      start=time.time()
      train_loss = 0
      train_pred=[]
      train_y=[]
      model.train()
      for batch in (train_loader):
          optimizer.zero_grad()
          x = torch.tensor(batch[0], dtype=torch.float32, device=device)
          y = torch.tensor(batch[1], dtype=torch.long, device=device)
          with torch.cuda.amp.autocast():
              pred = model(x)
          loss = criterion(pred, y)

          scaler.scale(loss).backward()
          scaler.step(optimizer)
          scaler.update()
          
          train_loss += loss.item()/len(train_loader)
          train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
          train_y += y.detach().cpu().numpy().tolist()
          
      # scheduler.step()

      train_f1 = score_function(train_y, train_pred)

      TIME = time.time() - start
      print(f'epoch : {epoch+1}/{epochs}    time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s')
      print(f'TRAIN    loss : {train_loss:.5f}    f1 : {train_f1:.5f}')

      #valid
      valid_loss = 0
      valid_pred=[]
      valid_y=[]
      model.eval()
      for batch in (valid_loader):
          optimizer.zero_grad()
          x = torch.tensor(batch[0], dtype=torch.float32, device=device)
          y = torch.tensor(batch[1], dtype=torch.long, device=device)
          with torch.no_grad():
              pred = model(x)
          loss = criterion(pred, y)
          
          valid_loss += loss.item()/len(train_loader)
          valid_pred += pred.argmax(1).detach().cpu().numpy().tolist()
          valid_y += y.detach().cpu().numpy().tolist()
          
      
      valid_f1 = score_function(valid_y, valid_pred)

      TIME = time.time() - start
      print(f'Valid    loss : {valid_loss:.5f}    f1 : {valid_f1:.5f}')
      val_score_plot.append(valid_f1)
      val_loss_plot.append(valid_loss)
      ##save model
      if np.max(val_score_plot) == val_score_plot[-1]:
        torch.save(model.state_dict(), "./model/"+str(fold)+".pt")

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b4_ra2_320-7eb33cd5.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b4_ra2_320-7eb33cd5.pth


epoch : 1/70    time : 143s/9878s
TRAIN    loss : 2.19655    f1 : 0.48315
Valid    loss : 0.33271    f1 : 0.60425
