<a href="https://colab.research.google.com/github/choki0715/UnLiteFlowNet-PIV/blob/master/baseline3_crossvit_18_dagger_240_BN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!mkdir data
!unzip -q /content/drive/MyDrive/beef/data.zip -d ./data

In [None]:
!pip install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.5.4-py3-none-any.whl (431 kB)
[K     |████████████████████████████████| 431 kB 5.0 MB/s 
Installing collected packages: timm
Successfully installed timm-0.5.4


In [None]:
import pandas as pd
import numpy as np
import os
from os import path as osp
import cv2
from tqdm.notebook import tqdm

import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
import albumentations
import timm

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

from warnings import filterwarnings
filterwarnings("ignore")

device = torch.device('cuda')

In [None]:
# timm.list_models(pretrained=True)

In [None]:
print(timm.__version__)

0.5.4


In [None]:
# path
train_path = './data/train'
test_path = './data/test'
train_csv_path = osp.join(train_path, 'grade_labels.csv')
test_csv_path = osp.join(test_path, 'test_images.csv')

# data
image_size = 224
label_dict = {'1++': 0, '1+': 1, '1': 2, '2': 3, '3': 4}
reversed_label_dict = {v:k for k,v in label_dict.items()}

kfold = 10
fold_id = 0
mean_pixel_min_value = 0

num_classes = len(label_dict)
backbone_name = 'crossvit_18_dagger_240'


batch_size = 32
n_worker = 4

init_lr = 5e-5
n_epochs = 200

random_state=139


In [None]:
def load_data(csv_path, is_train=True):
  df = pd.read_csv(csv_path)
  df['file_path'] = df.imname.apply(lambda x: osp.join(osp.dirname(csv_path), 'images', x))
  if is_train:
    df['label'] = df.grade.apply(lambda x: label_dict[x])
  return df

def set_fold_column(df, n_splits=kfold):
  skf = StratifiedKFold(n_splits=kfold, shuffle=True, random_state=random_state)
  df['fold'] = -1
  for fold, (train_idx, valid_idx) in enumerate(skf.split(df, df.label)):
      df.loc[valid_idx, 'fold'] = fold
  return df

def get_mean_pixel_value(file_path):
  img = cv2.imread(file_path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  return np.mean(img)

# 평균 밝기를 측정하여 column에 추가하고 min_value보다 큰 행만 남긴다. (어두운 이미지 제거용도)
def set_mean_pixel_column(df, min_value=10):
  df['mean_pixel'] = df.file_path.apply(lambda x: get_mean_pixel_value(x))
  df = df[df.mean_pixel > min_value]
  df = df.reset_index(drop=True)
  return df

def sigmoid(x):
   return 1 / (1 +np.exp(-x))

In [None]:
train_all_df = load_data(train_csv_path, is_train=True)
test_df = load_data(test_csv_path, is_train=False)

train_all_df = set_fold_column(train_all_df, n_splits=kfold)
if mean_pixel_min_value > 0:
  train_all_df = set_mean_pixel_column(train_all_df, mean_pixel_min_value)

print(f'train shape : {train_all_df.shape}, test shape : {test_df.shape}')
display(test_df.head())
train_all_df.head()

train shape : (10000, 5), test shape : (8658, 2)


Unnamed: 0,imname,file_path
0,WuSUZJHN6t.jpg,./data/test/images/WuSUZJHN6t.jpg
1,hrua4NW4Cj.jpg,./data/test/images/hrua4NW4Cj.jpg
2,GDOHhHZJug.jpg,./data/test/images/GDOHhHZJug.jpg
3,Xewfe9T1kN.jpg,./data/test/images/Xewfe9T1kN.jpg
4,y3vLHbbHFs.jpg,./data/test/images/y3vLHbbHFs.jpg


Unnamed: 0,imname,grade,file_path,label,fold
0,cow_1++_4567.jpg,1++,./data/train/images/cow_1++_4567.jpg,0,5
1,cow_2_1390.jpg,2,./data/train/images/cow_2_1390.jpg,3,0
2,cow_1++_2581.jpg,1++,./data/train/images/cow_1++_2581.jpg,0,6
3,cow_2_1689.jpg,2,./data/train/images/cow_2_1689.jpg,3,6
4,cow_3_3287.jpg,3,./data/train/images/cow_3_3287.jpg,4,0


In [None]:
train_all_df.grade.unique()

array(['1++', '2', '3', '1+', '1'], dtype=object)

In [None]:
train_all_df.label.value_counts()

2    2201
0    2134
1    2134
3    2090
4    1441
Name: label, dtype: int64

In [None]:
class BeefDataset(Dataset):
    def __init__(self, df, mode, transform=None):
        self.df = df.reset_index(drop=True)
        self.mode = mode
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        row = self.df.loc[index]
        img = cv2.imread(row.file_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#        r, g, b = cv2.split(img1)
#        r = r/255.
#        g = g/255.
#        b = b/255.
#
#        img2 = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # ejk
#        h, s, v = cv2.split(img2)
#        h = h/360.
#        s = s/100.
#        v = v/100.
#
#       img = cv2.merge((r,g,b, h, s, v)) # ejk

        if self.transform is not None:
            res = self.transform(image=img)
            img = res['image'].transpose(2,0,1)
        
        if self.mode == 'test':
            return torch.tensor(img).float()
        else:
            return torch.tensor(img).float(), torch.tensor(row.label)

class BeefModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = timm.create_model(backbone_name, pretrained=True, num_classes=num_classes) #, in_chans=6)
        head_list = [ nn.Sequential(
                nn.Linear(in_features = 224, out_features=112),
                nn.LayerNorm(112),
                nn.ReLU(),
                nn.Linear(in_features = 112, out_features=num_classes),
               # nn.LayerNorm(32),
               # nn.ReLU(),    
               # nn.Linear(in_features = 32, out_features=num_classes)
             ), 
                nn.Sequential(
                nn.Linear(in_features = 448, out_features=224),
                nn.LayerNorm(224),
                nn.ReLU(),
                nn.Linear(in_features = 224, out_features=num_classes),
               # nn.LayerNorm(64),
               # nn.ReLU(),    
               # nn.Linear(in_features = 64, out_features=num_classes)
             )
             ]
        self.model.head = nn.ModuleList( head_list)    


#         (0): Linear(in_features=224, out_features=5, bias=True)
#         (1): Linear(in_features=448, out_features=5, bias=True)


#        self.model.classifier = nn.Sequential(
#            nn.BatchNorm2d(2560),
#            nn.Linear(in_features = 2560, out_features=1024),
#            nn.BatchNorm2d(1024),
#            nn.ReLU(),
#            # nn.Dropout(p=0.3),
#            nn.Linear(in_features=1024, out_features=256),
#            nn.BatchNorm2d(256),
#            nn.ReLU(),
#            # nn.Dropout(p=0.2),
#            nn.Linear(in_features=256, out_features=num_classes)
#        )


    def forward(self, x):
        return self.model(x)
        

In [None]:
class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))
        

In [None]:
model = BeefModel().to(device)
optimizer = optim.Adam(model.parameters(), lr = init_lr)
lsr = 0.15
criterion = nn.CrossEntropyLoss(label_smoothing=lsr).to(device)
# criterion = LabelSmoothingLoss(classes=5, smoothing=0.1).to(device)

Downloading: "https://github.com/IBM/CrossViT/releases/download/weights-0.1/crossvit_18_dagger_224.pth" to /root/.cache/torch/hub/checkpoints/crossvit_18_dagger_224.pth


In [None]:
model

BeefModel(
  (model): CrossViT(
    (patch_embed): ModuleList(
      (0): PatchEmbed(
        (proj): Sequential(
          (0): Conv2d(3, 56, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
          (1): ReLU(inplace=True)
          (2): Conv2d(56, 112, kernel_size=(3, 3), stride=(3, 3))
          (3): ReLU(inplace=True)
          (4): Conv2d(112, 224, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        )
      )
      (1): PatchEmbed(
        (proj): Sequential(
          (0): Conv2d(3, 112, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
          (1): ReLU(inplace=True)
          (2): Conv2d(112, 224, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (3): ReLU(inplace=True)
          (4): Conv2d(224, 448, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        )
      )
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): ModuleList(
      (0): MultiScaleBlock(
        (blocks): ModuleList(
          (0): Sequential(
            (0): Block(


In [None]:
transforms_train = albumentations.Compose([
    albumentations.VerticalFlip(p=0.5),
    albumentations.HorizontalFlip(p=0.5),
    albumentations.RandomBrightness(limit=0.2, p=0.75),
    albumentations.RandomContrast(limit=0.2, p=0.75),
    albumentations.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=90, border_mode=0, p=1),
    albumentations.Resize(image_size, image_size),
    albumentations.Normalize()
])

transforms_valid = albumentations.Compose([                      
    albumentations.Resize(image_size, image_size),
    albumentations.Normalize()
])


In [None]:
train_df = train_all_df[train_all_df['fold'] != fold_id]
valid_df = train_all_df[train_all_df['fold'] == fold_id]

train_dataset = BeefDataset(train_df, 'train', transform = transforms_train)
valid_dataset = BeefDataset(valid_df, 'valid', transform = transforms_valid)
test_dataset= BeefDataset(test_df, 'test', transform = transforms_valid)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers = n_worker)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers = n_worker)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers = n_worker)

In [None]:
def train_epoch(train_loader):
    model.train()
    bar = tqdm(train_loader)
    losses = []
    for batch_idx, (images, targets) in enumerate(bar):
        images, targets = images.to(device), targets.to(device)            
        
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, targets)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())

        bar.set_description(f'loss: {loss.item():.5f}')

    loss_train = np.mean(losses)
    return loss_train

def evaluate(valid_loader):
    loss = 0.0
    correct = 0
    outputs = []
    model.eval()
    with torch.no_grad():
        for images, targets in tqdm(iter(valid_loader)):
            images, targets = images.to(device), targets.to(device)

            output = model(images)
            loss += criterion(output, targets)

            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(targets.view_as(pred)).sum().item()

            outputs.extend(output.tolist())
    acc = 100 * correct / len(valid_loader.dataset)
    print('Valid set: Loss: {:.4f}, Accuracy: {}/{} {:.4f}%'.format(loss / len(valid_loader), correct, len(valid_loader.dataset), acc))
    return loss, acc

In [None]:
# train 

best_acc = 0
for epoch in range(1,n_epochs+1):
  print(f'{epoch} Epoch')
  train_epoch(train_loader)
  val_loss, val_acc = evaluate(valid_loader)

  if best_acc < val_acc:
      best_acc = val_acc
      jit_model = torch.jit.script(model)
      torch.jit.save(jit_model, f'./drive/MyDrive/beef/{backbone_name}_LN_LSR_15_epoch{epoch}.pt')
      print(f'Model saved')
  print()

1 Epoch


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Valid set: Loss: 1.3163, Accuracy: 478/1000 47.8000%
Model saved

2 Epoch


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Valid set: Loss: 1.2884, Accuracy: 474/1000 47.4000%

3 Epoch


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Valid set: Loss: 1.2623, Accuracy: 529/1000 52.9000%
Model saved

4 Epoch


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Valid set: Loss: 1.2122, Accuracy: 557/1000 55.7000%
Model saved

5 Epoch


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Valid set: Loss: 1.1560, Accuracy: 600/1000 60.0000%
Model saved

6 Epoch


  0%|          | 0/282 [00:00<?, ?it/s]

In [None]:
# test

preds = []
model.eval()
with torch.no_grad():
    for images in tqdm(iter(test_loader)):
        images = images.to(device)
        logit = model(images)
        pred = logit.argmax(dim=1)
        preds.extend(pred.tolist())

grade_preds = [reversed_label_dict[pred] for pred in preds]

submission = pd.read_csv('./data/sample_submission.csv')
temp = submission.copy()
temp['id'] = test_df.imname
temp['grade'] = grade_preds

submission = pd.merge(submission['id'], temp, on='id', how='left')
submission.to_csv('submit.csv', index=False)

  0%|          | 0/542 [00:00<?, ?it/s]