## Import Packages

In [1]:
import albumentations
import albumentations.pytorch
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from tqdm.notebook import tqdm

import timm
import ttach as tta
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import wandb
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize
from sklearn.metrics import f1_score
from efficientnet_pytorch import EfficientNet

tqdm.pandas()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"{device} is using!")

cuda:0 is using!


  from pandas import Panel


## Config Setting

In [2]:
config = {'NUM_EPOCHS' : 15,
          'BATCH_SIZE' : 8,
          'NUM_CLASSES' : 18,
          'LEARNING_RATE' : 1e-4,
          'MODEL' : 'nf_resnet50',  # 불러올 모델 이름
          'MODEL_NAME' : 'nf_resnet50',  # 저장할 떄 이름
          'NUM_WORKERS' : 2,
          'LOG_STEPS' : 450,
          'SAVE_PATH' : './epoch/',
          'LOAD_MODEL' : False,  # 학습을 이어서 할 때 True
          'LOAD_MODEL_PATH' : './epoch/nf_resnet50_epoch_4_0.7522671719026435.pth'  # 이어서 학습할 파일의 경로
         }

In [3]:
# wandb.init(config={'batch_size': config['BATCH_SIZE'],
#                    'lr': config['LEARNING_RATE'],
#                    'epochs': config['NUM_EPOCHS'],
#                    'backbone': config['MODEL_NAME']})

## Augmentation Setting

In [4]:
train_transform = albumentations.Compose(
  [
      albumentations.Resize(256,256),
#       albumentations.RandomRotation(15),
#       albumentations.HorizontalFlip(p=0.3),
      albumentations.OneOf([albumentations.ShiftScaleRotate(rotate_limit=15, p=0.5),
                            albumentations.RandomBrightnessContrast(p=0.5),
                            albumentations.MotionBlur(p=0.5),
                            albumentations.OpticalDistortion(p=0.5),
                            albumentations.GaussNoise(p=0.5)], p=1),
      albumentations.Normalize((0.548, 0.504, 0.479), (0.237, 0.247, 0.246)),
      albumentations.pytorch.transforms.ToTensorV2(),
      #       이미지 원본 사이즈는 384, 512   
  ]
)

test_transform = albumentations.Compose(
  [
      albumentations.Resize(288,288),
      albumentations.Normalize((0.548, 0.504, 0.479), (0.237, 0.247, 0.246)),
      albumentations.pytorch.transforms.ToTensorV2()
      #       이미지 원본 사이즈는 384, 512   
  ]
)

## Read DataFrame

In [5]:
def make_test_full_path(s):
    path = 'input/data/eval/images/'
    return path + s

train_df = pd.read_csv('./stratified_df/train_df.csv')
valid_df = pd.read_csv('./stratified_df/valid_df.csv')
test_df = pd.read_csv('./input/data/eval/info.csv')
test_df['full_path'] = test_df['ImageID'].progress_apply(make_test_full_path)
submission_df = pd.read_csv('./input/data/eval/info.csv')

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=12600.0), HTML(value='')))




## Dataset & DataLoader

In [6]:
class TrainDataset(Dataset):
    def __init__(self, path, label, transform):
        img_list = []
        for p in tqdm(path):
            img = cv2.imread(p)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img_list.append(img)
        
        self.X = img_list
        self.y = label
        self.transform = transform

    def __len__(self):
        len_dataset = len(self.X)
        return len_dataset

    def __getitem__(self, idx):
        X,y = self.X[idx], self.y[idx]
        X = self.transform(image=X)['image']
        return X, y

In [7]:
class TestDataset(Dataset):
    def __init__(self, path, label, transform):
        img_list = []
        for p in tqdm(path):
            img = cv2.imread(p)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img_list.append(img)
        
        self.X = img_list
        self.y = label
        self.transform = transform

    def __len__(self):
        len_dataset = len(self.X)
        return len_dataset

    def __getitem__(self, idx):
        X,y = self.X[idx], self.y[idx]
        X = self.transform(image=X)['image']
        return X

In [8]:
def get_dataset(df, transform, train=True):
    if train:
        dataset = TrainDataset(path=df['full_path'].values,
                               label=df['label'].values,
                               transform=transform)
    else:
        dataset = TestDataset(path=df['full_path'].values,
                              label=df['ans'].values,
                              transform=transform)
    return dataset

def get_loader(dataset, config, shuffle=True):
    loader = DataLoader(dataset, batch_size=config['BATCH_SIZE'], shuffle=shuffle, 
                        num_workers=config['NUM_WORKERS'], pin_memory=True)
    return loader

In [9]:
dataset_train = get_dataset(train_df, train_transform, train=True)
dataset_valid = get_dataset(valid_df, test_transform, train=True)

train_dataloader = get_loader(dataset_train, config, shuffle=True)
valid_dataloader = get_loader(dataset_valid, config, shuffle=False)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15120.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3780.0), HTML(value='')))




## Modeling

In [10]:
class Net(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.net = timm.create_model(config['MODEL'], num_classes=config['NUM_CLASSES'], pretrained=True).to(device)
        if config['LOAD_MODEL']:
            self.net.load_state_dict(torch.load(config['LOAD_MODEL_PATH']))
        
    def forward(self, x):
        x = self.net(x)
        return x

In [11]:
def train(model, config, train_dataloader, device):
    running_loss = 0.0
    model.train()
    
    for step, (inputs, labels) in enumerate(tqdm(train_dataloader)):
        optimizer.zero_grad()
        inputs = inputs.to(device)
        labels = labels.to(device)
        logits = model(inputs)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if step % config['LOG_STEPS'] == config['LOG_STEPS'] - 1:
            step_loss = running_loss / config['LOG_STEPS']
            print(f'Traning Steps : {step + 1} Traning Loss : {step_loss}')
            running_loss = 0.0
            
    scheduler.step()

In [12]:
def valid(model, valid_dataloader, device):
    gt_list = []
    pred_list = []
    
    model.eval()
    correct = 0
    
    with torch.no_grad():
        for x, y in tqdm(valid_dataloader):
            x = x.to(device)
            y = y.to(device)
            
            logits = model(x)
            _, pred = torch.max(logits, 1)
            correct += torch.sum(pred == y.data)
            
            for i in y.cpu().numpy():
                gt_list.append(i)
            for j in pred.cpu().numpy():
                pred_list.append(j)
    
    f1 = f1_score(gt_list, pred_list, average='macro')
    acc = correct / dataset_valid.__len__()
    del gt_list, pred_list
    
    print(f'Validation f1_score : {f1}')
    print(f'Validation accuracy : {acc}')
    return f1, acc

In [13]:
def run(model, config, train_dataloader, valid_dataloader, device):
    best_f1 = 0.0
    for epoch in range(config['NUM_EPOCHS']):
        print(f'Epoch : {epoch + 1}')
        train(model, config, train_dataloader, device)
        f1, acc = valid(model, valid_dataloader, device)
        
        wandb.log({'Valid F1': f1,
                   'Valid acc': acc})
        
        if f1 > best_f1:
            best_f1 = f1
            save_path = config['SAVE_PATH']
            model_name = config['MODEL_NAME']
            torch.save(model.state_dict(), f'{save_path}/{model_name}_epoch_{epoch+1}_{best_f1}.pth')
            
        print('-'*50)

In [14]:
model = Net(config)

classes = train_df['label'].value_counts().sort_index().values
class_weight = torch.tensor(np.max(classes) / classes).to(device, dtype=torch.float)

criterion = nn.CrossEntropyLoss(weight=class_weight)
optimizer = AdamW(model.parameters(), lr=config['LEARNING_RATE'])
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0)

In [15]:
run(model, config, train_dataloader, valid_dataloader, device)

In [None]:
run(model, config, train_dataloader, valid_dataloader, device)

Epoch : 1


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1890.0), HTML(value='')))

Traning Steps : 450 Traning Loss : 2.8097855959998235
Traning Steps : 900 Traning Loss : 2.6953102424409656
Traning Steps : 1350 Traning Loss : 2.6359584641456606
Traning Steps : 1800 Traning Loss : 2.1951496097776624



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


Validation f1_score : 0.2292238527527793
Validation accuracy : 0.3619047701358795
--------------------------------------------------
Epoch : 2


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1890.0), HTML(value='')))

Traning Steps : 450 Traning Loss : 1.8447581464714473
Traning Steps : 900 Traning Loss : 1.4843462986416287
Traning Steps : 1350 Traning Loss : 1.3187636015150281
Traning Steps : 1800 Traning Loss : 1.1588879405789905



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


Validation f1_score : 0.5201821990244266
Validation accuracy : 0.5997354388237
--------------------------------------------------
Epoch : 3


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1890.0), HTML(value='')))

Traning Steps : 450 Traning Loss : 0.9811540197001564
Traning Steps : 900 Traning Loss : 0.9182523472441567
Traning Steps : 1350 Traning Loss : 0.8935298770169418
Traning Steps : 1800 Traning Loss : 0.8431402543187141



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


Validation f1_score : 0.5839287590679072
Validation accuracy : 0.6505290865898132
--------------------------------------------------
Epoch : 4


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1890.0), HTML(value='')))

Traning Steps : 450 Traning Loss : 0.6781455506881078
Traning Steps : 900 Traning Loss : 0.7026414045194784
Traning Steps : 1350 Traning Loss : 0.659082089273466
Traning Steps : 1800 Traning Loss : 0.7534832505799002



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


Validation f1_score : 0.5699627893241783
Validation accuracy : 0.6164021492004395
--------------------------------------------------
Epoch : 5


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1890.0), HTML(value='')))

Traning Steps : 450 Traning Loss : 0.5698209126707581
Traning Steps : 900 Traning Loss : 0.5649262813561492
Traning Steps : 1350 Traning Loss : 0.5747102175839245
Traning Steps : 1800 Traning Loss : 0.516323318005436


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Traning Steps : 450 Traning Loss : 0.3492051477254265
Traning Steps : 900 Traning Loss : 0.2938006750307977
Traning Steps : 1350 Traning Loss : 0.32458504610694944


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Traning Steps : 450 Traning Loss : 0.16245164609151996
Traning Steps : 900 Traning Loss : 0.14749505703932503


## Inferences

In [16]:
def inferences(test_dataloader):
    pred_list = []
    for images in tqdm(test_dataloader):
        model.eval()
        with torch.no_grad():
            images = images.to(device)
            pred = model(images)
            pred = pred.argmax(dim=-1)
            pred_list.extend(pred.cpu().numpy())
    
    return pred_list

In [None]:
dataset_test = get_dataset(test_df, test_transform, train=False)
test_dataloader = get_loader(dataset_test, config, shuffle=False)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=12600.0), HTML(value='')))

In [30]:
submission_df['ans'] = inferences(test_dataloader)
submission_df.head()
submission_df.to_csv('./submission.csv', index=False)

## Inferences with TTA

In [None]:
tta_transforms = tta.Compose(
    [
#         tta.HorizontalFlip(),
#         tta.Rotate90(angles=[0, 90]),
#         tta.Scale(scales=[1, 2]),
#         tta.FiveCrops(384, 384),
#         tta.Add([0, 0.25]),
        tta.Scale(scales=[1, 2]),
        tta.Multiply(factors=[0.9, 1, 1.1]),
    ])