In [1]:
from torch.utils.data import DataLoader, Subset
from torchvision import models,transforms
import pandas as pd
import numpy as np
import os
from PIL import Image
from typing import Tuple, List
import torch
from torch import optim,nn
from torch.autograd import Variable
from tqdm import tqdm
import matplotlib.pyplot as plt

from dataset import MaskDataset, AddGaussianNoise
from models import get_pre_trained

data_dir = '../../input/data/train'

In [2]:
os.getcwd()

'/opt/ml/level1-image-classification-level1-recsys-08/code'

In [3]:
model_name = 'densenet161'
# model_name = 'resnet50'
num_classes = 18
# Initialize the model for this run
model = get_pre_trained(model_name, feature_extract = False, pretrained = True)
# Define the device:
device = torch.device('cuda:0')
# Put the model on the device:
model = model.to(device)

In [4]:
norm_mean = (0.485, 0.456, 0.406)
norm_std = (0.229, 0.224, 0.225)

train_transform = transforms.Compose([transforms.RandomRotation(degrees = 15),
                                      transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1),
                                      transforms.RandomVerticalFlip(p=0.5),
                                      transforms.ToTensor(),
                                      transforms.Normalize(norm_mean, norm_std),
#                                       AddGaussianNoise()
                                     ])
val_transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize(norm_mean, norm_std)])

dataset = MaskDataset(data_dir=data_dir,transforms=train_transform, adj_csv = True, val_ratio=0.1, up_sampling = 3)
batch_size = 16
train_set, val_set = dataset.split_dataset()
use_cuda = torch.cuda.is_available()

train_loader = DataLoader(
    train_set,
    batch_size=batch_size,
    shuffle=True,
    pin_memory=use_cuda,
    drop_last=True,
)

val_loader = DataLoader(
    val_set,
    batch_size=batch_size,
    shuffle=False,
    pin_memory=use_cuda,
    drop_last=True,
)

upsamling starts ...
Data split completed: val_ratio=0.1
n_train=26064, n_val=2895


In [5]:
# this function is used during training process, to calculation the loss and accuracy
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [6]:
def train(model, train_loader, criterion, optimizer, epoch):
    save_dir = os.path.join(os.getcwd(), 'save')
    model.train()
    train_loss = AverageMeter()
    train_acc = AverageMeter()
    curr_iter = (epoch - 1) * len(train_loader)
    
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    
    for i, data in enumerate(train_loader):
        images, labels = data
        N = images.size(0)
        # print('image shape:',images.size(0), 'label shape',labels.size(0))
        images = Variable(images).to(device)
        labels = Variable(labels).to(device)

        optimizer.zero_grad()
        outputs = model(images)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        prediction = outputs.max(1, keepdim=True)[1]
        train_acc.update(prediction.eq(labels.view_as(prediction)).sum().item()/N)
        train_loss.update(loss.item())
        curr_iter += 1
        if (i + 1) % 200 == 0:
            print('[epoch %d], [iter %d / %d], [train loss %.5f], [train acc %.5f]' % (
                epoch, i + 1, len(train_loader), train_loss.avg, train_acc.avg))
            total_loss_train.append(train_loss.avg)
            total_acc_train.append(train_acc.avg)
    return train_loss.avg, train_acc.avg

In [7]:
def validate(model, val_loader, criterion, optimizer, epoch):
    model.eval()
    val_loss = AverageMeter()
    val_acc = AverageMeter()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            images, labels = data
            N = images.size(0)
            images = Variable(images).to(device)
            labels = Variable(labels).to(device)

            outputs = model(images)
            prediction = outputs.max(1, keepdim=True)[1]

            val_acc.update(prediction.eq(labels.view_as(prediction)).sum().item()/N)

            val_loss.update(criterion(outputs, labels).item())

    print('------------------------------------------------------------')
    print('[epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss.avg, val_acc.avg))
    print('------------------------------------------------------------')
    return val_loss.avg, val_acc.avg

In [None]:
MODEL_PATH = '../model' + '/' + model_name
os.makedirs(MODEL_PATH, exist_ok=True)
epoch_num = 12
best_val_acc = 0
total_loss_train, total_acc_train = [],[]
total_loss_val, total_acc_val = [],[]
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss().to(device)
for epoch in tqdm(range(1, epoch_num+1)):
    loss_train, acc_train = train(model, train_loader, criterion, optimizer, epoch)
    loss_val, acc_val = validate(model, val_loader, criterion, optimizer, epoch)
    total_loss_val.append(loss_val)
    total_acc_val.append(acc_val)
    if acc_val > best_val_acc:
        best_val_acc = acc_val
        torch.save(model.state_dict(), os.path.join(MODEL_PATH, f"{model_name}_best.pt"))
        print('*****************************************************')
        print('best record: [epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, loss_val, acc_val))
        print('*****************************************************')

  0%|          | 0/12 [00:00<?, ?it/s]

[epoch 1], [iter 200 / 1629], [train loss 2.12937], [train acc 0.30406]
[epoch 1], [iter 400 / 1629], [train loss 1.81498], [train acc 0.39563]
[epoch 1], [iter 600 / 1629], [train loss 1.58529], [train acc 0.46479]
[epoch 1], [iter 800 / 1629], [train loss 1.43412], [train acc 0.50758]
[epoch 1], [iter 1000 / 1629], [train loss 1.32533], [train acc 0.54131]
[epoch 1], [iter 1200 / 1629], [train loss 1.23563], [train acc 0.56776]
[epoch 1], [iter 1400 / 1629], [train loss 1.15487], [train acc 0.59487]
[epoch 1], [iter 1600 / 1629], [train loss 1.09273], [train acc 0.61535]
------------------------------------------------------------
[epoch 1], [val loss 0.57674], [val acc 0.78333]
------------------------------------------------------------


  8%|▊         | 1/12 [28:18<5:11:28, 1698.93s/it]

*****************************************************
best record: [epoch 1], [val loss 0.57674], [val acc 0.78333]
*****************************************************
[epoch 2], [iter 200 / 1629], [train loss 0.58204], [train acc 0.78063]
[epoch 2], [iter 400 / 1629], [train loss 0.57304], [train acc 0.78781]
[epoch 2], [iter 600 / 1629], [train loss 0.56716], [train acc 0.78969]
[epoch 2], [iter 800 / 1629], [train loss 0.55185], [train acc 0.79578]
[epoch 2], [iter 1000 / 1629], [train loss 0.54184], [train acc 0.79981]
[epoch 2], [iter 1200 / 1629], [train loss 0.53668], [train acc 0.80156]
[epoch 2], [iter 1400 / 1629], [train loss 0.53131], [train acc 0.80286]
[epoch 2], [iter 1600 / 1629], [train loss 0.52529], [train acc 0.80625]
------------------------------------------------------------
[epoch 2], [val loss 0.41792], [val acc 0.84583]
------------------------------------------------------------


 17%|█▋        | 2/12 [56:25<4:41:58, 1691.88s/it]

*****************************************************
best record: [epoch 2], [val loss 0.41792], [val acc 0.84583]
*****************************************************
[epoch 3], [iter 200 / 1629], [train loss 0.43187], [train acc 0.83625]
[epoch 3], [iter 400 / 1629], [train loss 0.42654], [train acc 0.83859]
[epoch 3], [iter 600 / 1629], [train loss 0.41594], [train acc 0.84240]
[epoch 3], [iter 800 / 1629], [train loss 0.41041], [train acc 0.84695]
[epoch 3], [iter 1000 / 1629], [train loss 0.40247], [train acc 0.84925]
[epoch 3], [iter 1200 / 1629], [train loss 0.39923], [train acc 0.85250]
[epoch 3], [iter 1400 / 1629], [train loss 0.39424], [train acc 0.85460]
[epoch 3], [iter 1600 / 1629], [train loss 0.38863], [train acc 0.85672]
------------------------------------------------------------
[epoch 3], [val loss 0.38322], [val acc 0.86667]
------------------------------------------------------------


 25%|██▌       | 3/12 [1:24:28<4:13:07, 1687.55s/it]

*****************************************************
best record: [epoch 3], [val loss 0.38322], [val acc 0.86667]
*****************************************************
[epoch 4], [iter 200 / 1629], [train loss 0.28535], [train acc 0.89125]
[epoch 4], [iter 400 / 1629], [train loss 0.30734], [train acc 0.88484]
[epoch 4], [iter 600 / 1629], [train loss 0.31653], [train acc 0.88104]
[epoch 4], [iter 800 / 1629], [train loss 0.32161], [train acc 0.88156]
[epoch 4], [iter 1000 / 1629], [train loss 0.31136], [train acc 0.88681]
[epoch 4], [iter 1200 / 1629], [train loss 0.30777], [train acc 0.88854]
[epoch 4], [iter 1400 / 1629], [train loss 0.30370], [train acc 0.89107]
[epoch 4], [iter 1600 / 1629], [train loss 0.30376], [train acc 0.89086]
------------------------------------------------------------
[epoch 4], [val loss 0.26056], [val acc 0.90833]
------------------------------------------------------------


 33%|███▎      | 4/12 [1:52:27<3:44:35, 1684.43s/it]

*****************************************************
best record: [epoch 4], [val loss 0.26056], [val acc 0.90833]
*****************************************************
[epoch 5], [iter 200 / 1629], [train loss 0.23718], [train acc 0.91344]
[epoch 5], [iter 400 / 1629], [train loss 0.24814], [train acc 0.90953]
[epoch 5], [iter 600 / 1629], [train loss 0.25016], [train acc 0.90719]
[epoch 5], [iter 800 / 1629], [train loss 0.24509], [train acc 0.91031]
[epoch 5], [iter 1000 / 1629], [train loss 0.24591], [train acc 0.91075]
[epoch 5], [iter 1200 / 1629], [train loss 0.24698], [train acc 0.91057]
[epoch 5], [iter 1400 / 1629], [train loss 0.24394], [train acc 0.91219]
[epoch 5], [iter 1600 / 1629], [train loss 0.24277], [train acc 0.91258]


 42%|████▏     | 5/12 [2:20:14<3:15:46, 1678.01s/it]

------------------------------------------------------------
[epoch 5], [val loss 0.31443], [val acc 0.88854]
------------------------------------------------------------
[epoch 6], [iter 200 / 1629], [train loss 0.20185], [train acc 0.92812]
[epoch 6], [iter 400 / 1629], [train loss 0.20293], [train acc 0.92906]
[epoch 6], [iter 600 / 1629], [train loss 0.21326], [train acc 0.92531]
[epoch 6], [iter 800 / 1629], [train loss 0.21240], [train acc 0.92555]
[epoch 6], [iter 1000 / 1629], [train loss 0.21230], [train acc 0.92563]
[epoch 6], [iter 1200 / 1629], [train loss 0.20969], [train acc 0.92651]
[epoch 6], [iter 1400 / 1629], [train loss 0.21021], [train acc 0.92612]
[epoch 6], [iter 1600 / 1629], [train loss 0.21058], [train acc 0.92668]
------------------------------------------------------------
[epoch 6], [val loss 0.14830], [val acc 0.94583]
------------------------------------------------------------


 50%|█████     | 6/12 [2:48:23<2:48:09, 1681.62s/it]

*****************************************************
best record: [epoch 6], [val loss 0.14830], [val acc 0.94583]
*****************************************************
[epoch 7], [iter 200 / 1629], [train loss 0.15859], [train acc 0.94906]
[epoch 7], [iter 400 / 1629], [train loss 0.16268], [train acc 0.94563]
[epoch 7], [iter 600 / 1629], [train loss 0.16378], [train acc 0.94615]
[epoch 7], [iter 800 / 1629], [train loss 0.16651], [train acc 0.94352]
[epoch 7], [iter 1000 / 1629], [train loss 0.17140], [train acc 0.94144]
[epoch 7], [iter 1200 / 1629], [train loss 0.16865], [train acc 0.94255]
[epoch 7], [iter 1400 / 1629], [train loss 0.17082], [train acc 0.94219]
[epoch 7], [iter 1600 / 1629], [train loss 0.17066], [train acc 0.94215]
------------------------------------------------------------
[epoch 7], [val loss 0.12730], [val acc 0.95903]
------------------------------------------------------------


 58%|█████▊    | 7/12 [3:16:21<2:20:03, 1680.63s/it]

*****************************************************
best record: [epoch 7], [val loss 0.12730], [val acc 0.95903]
*****************************************************
[epoch 8], [iter 200 / 1629], [train loss 0.13264], [train acc 0.95562]
[epoch 8], [iter 400 / 1629], [train loss 0.14534], [train acc 0.95172]
[epoch 8], [iter 600 / 1629], [train loss 0.14590], [train acc 0.95010]
[epoch 8], [iter 800 / 1629], [train loss 0.14502], [train acc 0.94977]
[epoch 8], [iter 1000 / 1629], [train loss 0.15065], [train acc 0.94781]
[epoch 8], [iter 1200 / 1629], [train loss 0.15058], [train acc 0.94729]
[epoch 8], [iter 1400 / 1629], [train loss 0.14530], [train acc 0.94937]
[epoch 8], [iter 1600 / 1629], [train loss 0.14487], [train acc 0.95000]
------------------------------------------------------------
[epoch 8], [val loss 0.11751], [val acc 0.96076]
------------------------------------------------------------


 67%|██████▋   | 8/12 [3:44:20<1:51:59, 1679.99s/it]

*****************************************************
best record: [epoch 8], [val loss 0.11751], [val acc 0.96076]
*****************************************************
[epoch 9], [iter 200 / 1629], [train loss 0.14386], [train acc 0.94688]
[epoch 9], [iter 400 / 1629], [train loss 0.12316], [train acc 0.95562]
[epoch 9], [iter 600 / 1629], [train loss 0.13710], [train acc 0.95188]
[epoch 9], [iter 800 / 1629], [train loss 0.13441], [train acc 0.95258]
[epoch 9], [iter 1000 / 1629], [train loss 0.13868], [train acc 0.95156]
[epoch 9], [iter 1200 / 1629], [train loss 0.13273], [train acc 0.95464]
[epoch 9], [iter 1400 / 1629], [train loss 0.13084], [train acc 0.95549]
[epoch 9], [iter 1600 / 1629], [train loss 0.13093], [train acc 0.95559]


 75%|███████▌  | 9/12 [4:12:13<1:23:53, 1677.81s/it]

------------------------------------------------------------
[epoch 9], [val loss 0.16034], [val acc 0.94444]
------------------------------------------------------------
[epoch 10], [iter 200 / 1629], [train loss 0.10640], [train acc 0.96031]
[epoch 10], [iter 400 / 1629], [train loss 0.13383], [train acc 0.95359]
[epoch 10], [iter 600 / 1629], [train loss 0.11882], [train acc 0.95917]
[epoch 10], [iter 800 / 1629], [train loss 0.11265], [train acc 0.96164]
[epoch 10], [iter 1000 / 1629], [train loss 0.11292], [train acc 0.96113]
[epoch 10], [iter 1200 / 1629], [train loss 0.11925], [train acc 0.95917]
[epoch 10], [iter 1400 / 1629], [train loss 0.11855], [train acc 0.96000]
[epoch 10], [iter 1600 / 1629], [train loss 0.11663], [train acc 0.96070]


In [None]:
fig = plt.figure(num = 2, figsize = (12,6))
fig1 = fig.add_subplot(2,1,1)
fig2 = fig.add_subplot(2,1,2)
fig1.plot(total_loss_train, label = 'training loss')
fig1.plot(total_acc_train, label = 'training accuracy')
fig1.legend()
fig2.plot(total_loss_val, label = 'validation loss')
fig2.plot(total_acc_val, label = 'validation accuracy')
fig2.legend()
plt.show()