In [1]:
import torchvision
import torch
from torch import nn

from torchvision import transforms
from torch.utils.data import Dataset,DataLoader
from torch.cuda.amp import autocast, GradScaler
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image

import cv2
import random
import pandas as pd
import numpy as np
import math
import time
from tqdm import tqdm
from torchvision import models
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold

In [2]:
torch.cuda.empty_cache()


In [3]:
seed=1
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True

In [4]:
test_dir = '/opt/ml/input/data/eval'

In [5]:
torch.cuda.is_available()

True

In [6]:
class MyModel(nn.Module):
    def __init__(self, num_classes: int = 1000):
        super(MyModel, self).__init__()
        model = models.vgg19(pretrained=True)
        self.features = model.features
        self.dropout=nn.Dropout(0.2)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Linear(512,num_classes)
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, num_classes),
        )


    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.dropout(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [7]:
model=MyModel(18)
device = torch.device('cuda')
model = model.to(device)

In [8]:
class Dataset(Dataset):
    def __init__(self, img_paths, transform,label):
        self.img_paths = img_paths
        self.transform = transform
        self.label = label
        
    def __getitem__(self, index):
        image = Image.open(self.img_paths[index]).convert('RGB')
        # image = np.array(image)
        label = self.label[index]
        if self.transform:
            image = self.transform(image)
        return image,label

    def __len__(self):
        return len(self.img_paths)

In [9]:
size=384
size2=384

In [10]:
df = pd.read_csv('new_train.csv')
trans = transforms.Compose([
    transforms.Resize((size,size2), Image.BILINEAR),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

data = Dataset(df['path'],trans,df['class_label'])
df2 = pd.read_csv('new_valid.csv')
data2=Dataset(df2['path'],trans,df2['class_label'])



In [11]:
bs=64
train_dataloader = DataLoader(data, batch_size=bs, shuffle=True)
val_dataloader = DataLoader(data2, batch_size=bs*2)

In [12]:
CFG = {
    'fold_num': 5,
    'seed': 19,
    'epochs': 30,
    'T_0': 10,
    'lr': 1e-4,
    'min_lr': 1e-5,
    'weight_decay':1e-6,
    'num_workers': 2,
    'accum_iter': 2, 
    'verbose_step': 1,
    'device': 'cuda:0'
}

In [13]:
def train_one_epoch(epoch, model, loss_fn, optimizer, train_loader, device, scheduler=None, schd_batch_update=False):
    model.train()

    t = time.time()
    running_loss = None

    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, (imgs, image_labels) in pbar:
        imgs = imgs.to(device).float()
        image_labels = image_labels.to(device).long()

        with autocast():
            image_preds = model(imgs)   

            loss = loss_fn(image_preds, image_labels)
            
            scaler.scale(loss).backward()

            if running_loss is None:
                running_loss = loss.item()
            else:
                running_loss = running_loss * .99 + loss.item() * .01

            if ((step + 1) %  CFG['accum_iter'] == 0) or ((step + 1) == len(train_loader)):

                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad() 
                
                if scheduler is not None and schd_batch_update:
                    scheduler.step()

            if ((step + 1) % CFG['verbose_step'] == 0) or ((step + 1) == len(train_loader)):
                description = f'epoch {epoch} loss: {running_loss:.4f}'
                
                pbar.set_description(description)
                
    if scheduler is not None and not schd_batch_update:
        scheduler.step()

In [14]:
def valid_one_epoch(epoch, model, loss_fn, val_loader, device, scheduler=None, schd_loss_update=False):
    model.eval()

    t = time.time()
    loss_sum = 0
    sample_num = 0
    image_preds_all = []
    image_targets_all = []
    
    pbar = tqdm(enumerate(val_loader), total=len(val_loader))
    for step, (imgs, image_labels) in pbar:
        imgs = imgs.to(device).float()
        image_labels = image_labels.to(device).long()
        
        image_preds = model(imgs)   
        image_preds_all += [torch.argmax(image_preds, 1).detach().cpu().numpy()]
        image_targets_all += [image_labels.detach().cpu().numpy()]
        
        loss = loss_fn(image_preds, image_labels)
        
        loss_sum += loss.item()*image_labels.shape[0]
        sample_num += image_labels.shape[0]  

        if ((step + 1) % CFG['verbose_step'] == 0) or ((step + 1) == len(val_loader)):
            description = f'epoch {epoch} loss: {loss_sum/sample_num:.4f}'
            pbar.set_description(description)
    
    image_preds_all = np.concatenate(image_preds_all)
    image_targets_all = np.concatenate(image_targets_all)
    print('validation multi-class f1_score = {:.4f}'.format(f1_score(image_preds_all,image_targets_all,average='macro')))
    
    if scheduler is not None:
        if schd_loss_update:
            scheduler.step(loss_sum/sample_num)
        else:
            scheduler.step()
    
    return f1_score(image_preds_all,image_targets_all,average='macro')

In [15]:
scaler = GradScaler()   
optimizer = optim.Adam(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'])
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=CFG['T_0'], T_mult=2, eta_min=CFG['min_lr'], last_epoch=-1)

loss_tr = nn.CrossEntropyLoss()
loss_fn = nn.CrossEntropyLoss()

In [16]:
for epoch in range(CFG['epochs']):
    train_one_epoch(epoch, model, loss_tr, optimizer, train_dataloader, device, scheduler=scheduler, schd_batch_update=False)

    with torch.no_grad():
        valid_f1 = valid_one_epoch(epoch, model, loss_fn, val_dataloader, device, scheduler=None, schd_loss_update=False)
    folder_name = 'saved_model'
    torch.save(model.state_dict(), folder_name+'/{}_fold_{}.pt'.format('vgg19', epoch,np.round(valid_f1,3)))
# del optimizer, train_dataloader, val_dataloader, scaler, scheduler
torch.cuda.empty_cache()

epoch 0 loss: 0.8087: 100%|██████████| 248/248 [03:57<00:00,  1.04it/s]
epoch 0 loss: 0.3543: 100%|██████████| 31/31 [00:55<00:00,  1.80s/it]


validation multi-class f1_score = 0.6829


epoch 1 loss: 0.2827: 100%|██████████| 248/248 [03:43<00:00,  1.11it/s]
epoch 1 loss: 0.3900: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.7259


epoch 2 loss: 0.1522: 100%|██████████| 248/248 [03:44<00:00,  1.11it/s]
epoch 2 loss: 0.3585: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.7482


epoch 3 loss: 0.0935: 100%|██████████| 248/248 [03:44<00:00,  1.11it/s]
epoch 3 loss: 0.4023: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.7785


epoch 4 loss: 0.0406: 100%|██████████| 248/248 [03:43<00:00,  1.11it/s]
epoch 4 loss: 0.5292: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.7817


epoch 5 loss: 0.0203: 100%|██████████| 248/248 [03:44<00:00,  1.11it/s]
epoch 5 loss: 0.5769: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.7933


epoch 6 loss: 0.0122: 100%|██████████| 248/248 [03:40<00:00,  1.12it/s]
epoch 6 loss: 0.5450: 100%|██████████| 31/31 [00:45<00:00,  1.46s/it]


validation multi-class f1_score = 0.8020


epoch 7 loss: 0.0045: 100%|██████████| 248/248 [03:42<00:00,  1.11it/s]
epoch 7 loss: 0.7490: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.8039


epoch 8 loss: 0.0039: 100%|██████████| 248/248 [03:41<00:00,  1.12it/s]
epoch 8 loss: 0.6631: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.8059


epoch 9 loss: 0.0027: 100%|██████████| 248/248 [03:39<00:00,  1.13it/s]
epoch 9 loss: 0.6996: 100%|██████████| 31/31 [00:45<00:00,  1.47s/it]


validation multi-class f1_score = 0.7982


epoch 10 loss: 0.1253: 100%|██████████| 248/248 [03:37<00:00,  1.14it/s]
epoch 10 loss: 0.3968: 100%|██████████| 31/31 [00:44<00:00,  1.43s/it]


validation multi-class f1_score = 0.7739


epoch 11 loss: 0.0446: 100%|██████████| 248/248 [03:38<00:00,  1.13it/s]
epoch 11 loss: 0.5445: 100%|██████████| 31/31 [00:44<00:00,  1.43s/it]


validation multi-class f1_score = 0.7787


epoch 12 loss: 0.0214: 100%|██████████| 248/248 [03:37<00:00,  1.14it/s]
epoch 12 loss: 0.5644: 100%|██████████| 31/31 [00:44<00:00,  1.43s/it]


validation multi-class f1_score = 0.7803


epoch 13 loss: 0.0297: 100%|██████████| 248/248 [03:37<00:00,  1.14it/s]
epoch 13 loss: 0.5952: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.8031


epoch 14 loss: 0.0125: 100%|██████████| 248/248 [03:36<00:00,  1.14it/s]
epoch 14 loss: 0.5162: 100%|██████████| 31/31 [00:44<00:00,  1.43s/it]


validation multi-class f1_score = 0.8145


epoch 15 loss: 0.0066: 100%|██████████| 248/248 [03:35<00:00,  1.15it/s]
epoch 15 loss: 0.5714: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.8039


epoch 16 loss: 0.0023: 100%|██████████| 248/248 [03:36<00:00,  1.15it/s]
epoch 16 loss: 0.6698: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.7959


epoch 17 loss: 0.0011: 100%|██████████| 248/248 [03:36<00:00,  1.14it/s]
epoch 17 loss: 0.6812: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.8070


epoch 18 loss: 0.0029: 100%|██████████| 248/248 [03:36<00:00,  1.15it/s]
epoch 18 loss: 0.6738: 100%|██████████| 31/31 [00:44<00:00,  1.43s/it]


validation multi-class f1_score = 0.8152


epoch 19 loss: 0.0007: 100%|██████████| 248/248 [03:36<00:00,  1.15it/s]
epoch 19 loss: 0.7278: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.8033


epoch 20 loss: 0.0005: 100%|██████████| 248/248 [03:34<00:00,  1.15it/s]
epoch 20 loss: 0.8125: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.7858


epoch 21 loss: 0.0001: 100%|██████████| 248/248 [03:40<00:00,  1.12it/s]
epoch 21 loss: 0.8074: 100%|██████████| 31/31 [00:44<00:00,  1.42s/it]


validation multi-class f1_score = 0.7926


epoch 22 loss: 0.0002: 100%|██████████| 248/248 [03:40<00:00,  1.13it/s]
epoch 22 loss: 0.8368: 100%|██████████| 31/31 [00:44<00:00,  1.45s/it]


validation multi-class f1_score = 0.7939


epoch 23 loss: 0.0000: 100%|██████████| 248/248 [03:38<00:00,  1.13it/s]
epoch 23 loss: 0.8518: 100%|██████████| 31/31 [00:44<00:00,  1.42s/it]


validation multi-class f1_score = 0.7959


epoch 24 loss: 0.0004: 100%|██████████| 248/248 [03:40<00:00,  1.13it/s]
epoch 24 loss: 0.8352: 100%|██████████| 31/31 [00:43<00:00,  1.42s/it]


validation multi-class f1_score = 0.7994


epoch 25 loss: 0.0001: 100%|██████████| 248/248 [03:41<00:00,  1.12it/s]
epoch 25 loss: 0.9633: 100%|██████████| 31/31 [00:44<00:00,  1.45s/it]


validation multi-class f1_score = 0.8034


epoch 26 loss: 0.0001: 100%|██████████| 248/248 [03:38<00:00,  1.13it/s]
epoch 26 loss: 0.9321: 100%|██████████| 31/31 [00:44<00:00,  1.43s/it]


validation multi-class f1_score = 0.7982


epoch 27 loss: 0.0000: 100%|██████████| 248/248 [03:40<00:00,  1.12it/s]
epoch 27 loss: 0.9159: 100%|██████████| 31/31 [00:44<00:00,  1.45s/it]


validation multi-class f1_score = 0.7975


epoch 28 loss: 0.0000: 100%|██████████| 248/248 [03:38<00:00,  1.13it/s]
epoch 28 loss: 0.9223: 100%|██████████| 31/31 [00:44<00:00,  1.44s/it]


validation multi-class f1_score = 0.8012


epoch 29 loss: 0.0000: 100%|██████████| 248/248 [03:41<00:00,  1.12it/s]
epoch 29 loss: 0.9314: 100%|██████████| 31/31 [00:44<00:00,  1.43s/it]


validation multi-class f1_score = 0.7987
