In [1]:
# # Run just once
# !pip install torchmetrics

In [2]:
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import glob
from tqdm import tqdm
import random
from sklearn.model_selection import train_test_split
import warnings

import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler, ConcatDataset
from torchvision import transforms, utils
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from torchmetrics import F1

from Dataset import AgeDataset
from myutil import mapAttributes, save_aug, mapAge
from model import MyResNet18

In [3]:
warnings.filterwarnings(action='ignore') 

In [4]:
# # run just once! (takes about 30 sec.)
# base_path = "../input/data/train"
# df = pd.read_csv(os.path.join(base_path, "train.csv"))
# old_labels = df.loc[df.age>=60]
# myaug = transforms.Compose([
#     transforms.RandomHorizontalFlip(),
#     transforms.ColorJitter(contrast=0.1),
#     transforms.RandomPosterize(bits=2),
#     transforms.ToTensor()
# ])
# save_aug(myaug, old_labels)

In [5]:
# set random seed (이렇게 하면 되는건가.. 흠)
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
     
seed_everything(42)

## split train, val folders

In [6]:
base_path = "/opt/ml/input/data/train"

# combine two dataframes (original + augmented)
df1 = pd.read_csv(os.path.join(base_path, "train.csv"))
df2 = pd.read_csv(os.path.join(base_path, "old_path_augmented.csv"))
df = df1.append(df2)  # 2892 rows (= 2700 + 192)

y_data = df.apply(lambda x: mapAge(x['path']), axis=1)

x_train, x_val, y_train, y_val = train_test_split(df.index, y_data, test_size=0.2, random_state=42, stratify=y_data)
len(x_train), len(x_val)  # 2313, 579 확인!

(2313, 579)

In [7]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 32
LEARNING_RATE = 1e-5
EPOCHS = 30

## Dataset & Dataloader

In [9]:
train_dataset = AgeDataset(
    data_path = base_path,
    data = df.iloc[x_train],
    transform = transforms.Compose([
         transforms.Resize((128, 128)),
         transforms.ToTensor()
    ])
)

val_dataset = AgeDataset(
    data_path = base_path,
    data = df.iloc[x_val],
    transform = transforms.Compose([
         transforms.Resize((128, 128)),
         transforms.ToTensor()
    ])
)

100%|██████████| 2313/2313 [00:00<00:00, 19709.21it/s]
100%|██████████| 579/579 [00:00<00:00, 19721.63it/s]

from dataset: 2313
from dataset: 579





In [10]:
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=4,
    drop_last = True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    num_workers=4,
    drop_last = True
)

# x, y = next(iter(train_loader))
# x.shape, y.shape  # (torch.Size([32, 3, 128, 128]), torch.Size([32]))

In [11]:
# model, loss, optm
resnet18 = MyResNet18(num_classes=3).to(DEVICE)
multi_criterion = nn.MultiLabelSoftMarginLoss(reduction='mean')
optm = torch.optim.Adam(resnet18.parameters(), lr=LEARNING_RATE)

In [12]:
# calculates accuracy
def get_acc(y_pred, y_test):  # torch.Size([32, 3]) torch.Size([32])
    output = torch.argmax(y_pred, dim=1)
    correct = sum(output == y_test)/len(output)
    return torch.sum(correct)

# labels with proper dimension
def labeling(x, num_labels):
    output = torch.zeros((x.shape[0], num_labels))
    for i in range(x.shape[0]):
        output[i,x[i]] = 1
    return output

# F1 score
f1 = F1(num_classes=3)

## Train

In [13]:
# train

tr_writer = SummaryWriter("logs/exp_15/tr")
val_writer = SummaryWriter("logs/exp_15/val")

for ep in tqdm(range(EPOCHS)):

    # == training phase =================================
    train_loss_ep = 0
    train_accuracy_ep = 0
    train_f1_ep = 0
    for X, y in iter(train_loader):
        
        resnet18.train()
        
        # change dim for loss func
        _y = labeling(y.unsqueeze(1), 3)  # torch.Size([32, 3])
        X, _y = X.to(DEVICE), _y.to(DEVICE)
        
        predict = resnet18(X)  # torch.Size([32, 3])
        
        train_loss = multi_criterion(predict, _y)
        train_accuracy = get_acc(predict, y.squeeze().to(DEVICE))
        train_f1 = f1(torch.argmax(predict, 1).to("cpu"), y)
        
        optm.zero_grad()
    
        train_loss.backward()
        optm.step()
        
        train_loss_ep += train_loss
        train_accuracy_ep += train_accuracy
        train_f1_ep += train_f1
        # end of one dataloader
    
    train_loss_avg = train_loss_ep / len(train_loader)
    train_accuracy_avg = train_accuracy_ep / len(train_loader)
    train_f1_avg = train_f1_ep / len(train_loader)
    
    # tensorboard
    tr_writer.add_scalar(
        'loss',
        train_loss_avg,
        ep
    )
    tr_writer.add_scalar(
        'acc',
        train_accuracy_avg,
        ep
    )
    tr_writer.add_scalar(
        'f1',
        train_f1_avg,
        ep
    )
    
    # === testing phase =================================
    with torch.no_grad():
        val_loss_ep = 0
        val_accuracy_ep = 0
        val_f1_ep = 0
        for X, y in iter(val_loader):
            ## testing phrase
            resnet18.eval()

            # change dim for loss func
            _y = labeling(y.unsqueeze(1), 3)

            X, _y = X.to(DEVICE), _y.to(DEVICE)
            predict = resnet18(X)

            val_loss = multi_criterion(predict, _y)
            val_accuracy = get_acc(predict, y.squeeze().to(DEVICE))
            val_f1 = f1(torch.argmax(predict, 1).to("cpu"), y)

            val_loss_ep += val_loss
            val_accuracy_ep += val_accuracy
            val_f1_ep += val_f1


        val_loss_avg = val_loss_ep / len(val_loader)
        val_accuracy_avg = val_accuracy_ep / len(val_loader)
        val_f1_avg = val_f1_ep / len(val_loader)
    
    # tensorboard
    val_writer.add_scalar(
        'loss',
        val_loss_avg,
        ep
    )
    val_writer.add_scalar(
        'acc',
        val_accuracy_avg,
        ep
    )
    val_writer.add_scalar(
        'f1',
        val_f1_avg,
        ep
    )
    
    print("train_loss_avg:", train_loss_avg.item(), "train_accuracy_avg:", train_accuracy_avg.item(), "train_f1_avg:", train_f1_avg)
    print("val_loss_avg:", val_loss_avg.item(), "val_accuracy_avg:", val_accuracy_avg.item(), "val_f1_avg:", val_f1_avg)

  3%|▎         | 1/30 [00:24<11:40, 24.16s/it]

train_loss_avg: 0.43779584765434265 train_accuracy_avg: 0.7366336584091187 train_f1_avg: tensor(0.7366)
val_loss_avg: 0.25326958298683167 val_accuracy_avg: 0.8536706566810608 val_f1_avg: tensor(0.8537)


  7%|▋         | 2/30 [00:48<11:16, 24.15s/it]

train_loss_avg: 0.2713545858860016 train_accuracy_avg: 0.8444926142692566 train_f1_avg: tensor(0.8445)
val_loss_avg: 0.20579001307487488 val_accuracy_avg: 0.87202388048172 val_f1_avg: tensor(0.8720)


 10%|█         | 3/30 [01:12<10:50, 24.09s/it]

train_loss_avg: 0.20599499344825745 train_accuracy_avg: 0.888737678527832 train_f1_avg: tensor(0.8887)
val_loss_avg: 0.18783751130104065 val_accuracy_avg: 0.8829365372657776 val_f1_avg: tensor(0.8829)


 13%|█▎        | 4/30 [01:36<10:26, 24.10s/it]

train_loss_avg: 0.15631328523159027 train_accuracy_avg: 0.9252475500106812 train_f1_avg: tensor(0.9252)
val_loss_avg: 0.1812954992055893 val_accuracy_avg: 0.888888955116272 val_f1_avg: tensor(0.8889)


 17%|█▋        | 5/30 [02:00<10:04, 24.17s/it]

train_loss_avg: 0.1163567304611206 train_accuracy_avg: 0.9549505114555359 train_f1_avg: tensor(0.9550)
val_loss_avg: 0.181706503033638 val_accuracy_avg: 0.8928571939468384 val_f1_avg: tensor(0.8929)


 20%|██        | 6/30 [02:24<09:39, 24.16s/it]

train_loss_avg: 0.08627931028604507 train_accuracy_avg: 0.9732673764228821 train_f1_avg: tensor(0.9733)
val_loss_avg: 0.1862054020166397 val_accuracy_avg: 0.8948413133621216 val_f1_avg: tensor(0.8948)


 23%|██▎       | 7/30 [02:49<09:17, 24.22s/it]

train_loss_avg: 0.06564103066921234 train_accuracy_avg: 0.9813737869262695 train_f1_avg: tensor(0.9814)
val_loss_avg: 0.19475562870502472 val_accuracy_avg: 0.8945932984352112 val_f1_avg: tensor(0.8946)


 27%|██▋       | 8/30 [03:13<08:53, 24.25s/it]

train_loss_avg: 0.054399918764829636 train_accuracy_avg: 0.9823638796806335 train_f1_avg: tensor(0.9824)
val_loss_avg: 0.2018149048089981 val_accuracy_avg: 0.8928571939468384 val_f1_avg: tensor(0.8929)


 30%|███       | 9/30 [03:37<08:29, 24.28s/it]

train_loss_avg: 0.061474867165088654 train_accuracy_avg: 0.9748144149780273 train_f1_avg: tensor(0.9748)
val_loss_avg: 0.20702315866947174 val_accuracy_avg: 0.8955853581428528 val_f1_avg: tensor(0.8956)


 33%|███▎      | 10/30 [04:02<08:05, 24.29s/it]

train_loss_avg: 0.0627971738576889 train_accuracy_avg: 0.9766708016395569 train_f1_avg: tensor(0.9767)
val_loss_avg: 0.2011970579624176 val_accuracy_avg: 0.8980655074119568 val_f1_avg: tensor(0.8981)


 37%|███▋      | 11/30 [04:26<07:41, 24.26s/it]

train_loss_avg: 0.04555661603808403 train_accuracy_avg: 0.9829826951026917 train_f1_avg: tensor(0.9830)
val_loss_avg: 0.21223242580890656 val_accuracy_avg: 0.8928571939468384 val_f1_avg: tensor(0.8929)


 40%|████      | 12/30 [04:50<07:17, 24.31s/it]

train_loss_avg: 0.04471603035926819 train_accuracy_avg: 0.9830445647239685 train_f1_avg: tensor(0.9830)
val_loss_avg: 0.21212993562221527 val_accuracy_avg: 0.8983135223388672 val_f1_avg: tensor(0.8983)


 43%|████▎     | 13/30 [05:15<06:53, 24.31s/it]

train_loss_avg: 0.044635217636823654 train_accuracy_avg: 0.9821163415908813 train_f1_avg: tensor(0.9821)
val_loss_avg: 0.21838022768497467 val_accuracy_avg: 0.8911210894584656 val_f1_avg: tensor(0.8911)


 47%|████▋     | 14/30 [05:39<06:28, 24.31s/it]

train_loss_avg: 0.041064318269491196 train_accuracy_avg: 0.9833539724349976 train_f1_avg: tensor(0.9834)
val_loss_avg: 0.2131245881319046 val_accuracy_avg: 0.897569477558136 val_f1_avg: tensor(0.8976)


 50%|█████     | 15/30 [06:03<06:04, 24.30s/it]

train_loss_avg: 0.03801656514406204 train_accuracy_avg: 0.984158456325531 train_f1_avg: tensor(0.9842)
val_loss_avg: 0.22589392960071564 val_accuracy_avg: 0.8933532238006592 val_f1_avg: tensor(0.8934)


 53%|█████▎    | 16/30 [06:28<05:40, 24.32s/it]

train_loss_avg: 0.036728017032146454 train_accuracy_avg: 0.985148549079895 train_f1_avg: tensor(0.9851)
val_loss_avg: 0.22264882922172546 val_accuracy_avg: 0.895089328289032 val_f1_avg: tensor(0.8951)


 57%|█████▋    | 17/30 [06:52<05:15, 24.28s/it]

train_loss_avg: 0.03629192337393761 train_accuracy_avg: 0.9849010109901428 train_f1_avg: tensor(0.9849)
val_loss_avg: 0.21960784494876862 val_accuracy_avg: 0.897569477558136 val_f1_avg: tensor(0.8976)


 60%|██████    | 18/30 [07:16<04:50, 24.25s/it]

train_loss_avg: 0.03390559181571007 train_accuracy_avg: 0.984839141368866 train_f1_avg: tensor(0.9848)
val_loss_avg: 0.23732726275920868 val_accuracy_avg: 0.8931052088737488 val_f1_avg: tensor(0.8931)


 63%|██████▎   | 19/30 [07:40<04:26, 24.26s/it]

train_loss_avg: 0.032863255590200424 train_accuracy_avg: 0.9878712892532349 train_f1_avg: tensor(0.9879)
val_loss_avg: 0.21668946743011475 val_accuracy_avg: 0.902033805847168 val_f1_avg: tensor(0.9020)


 67%|██████▋   | 20/30 [08:04<04:02, 24.26s/it]

train_loss_avg: 0.030375299975275993 train_accuracy_avg: 0.9883663654327393 train_f1_avg: tensor(0.9884)
val_loss_avg: 0.2350822240114212 val_accuracy_avg: 0.89384925365448 val_f1_avg: tensor(0.8938)


 70%|███████   | 21/30 [08:29<03:38, 24.24s/it]

train_loss_avg: 0.029693249613046646 train_accuracy_avg: 0.9887376427650452 train_f1_avg: tensor(0.9887)
val_loss_avg: 0.23294806480407715 val_accuracy_avg: 0.8965774178504944 val_f1_avg: tensor(0.8966)


 73%|███████▎  | 22/30 [08:53<03:14, 24.27s/it]

train_loss_avg: 0.028608987107872963 train_accuracy_avg: 0.9887376427650452 train_f1_avg: tensor(0.9887)
val_loss_avg: 0.23408085107803345 val_accuracy_avg: 0.8968254327774048 val_f1_avg: tensor(0.8968)


 77%|███████▋  | 23/30 [09:17<02:50, 24.29s/it]

train_loss_avg: 0.027610881254076958 train_accuracy_avg: 0.9897277355194092 train_f1_avg: tensor(0.9897)
val_loss_avg: 0.24797217547893524 val_accuracy_avg: 0.8940972685813904 val_f1_avg: tensor(0.8941)


 80%|████████  | 24/30 [09:42<02:26, 24.34s/it]

train_loss_avg: 0.026461830362677574 train_accuracy_avg: 0.9896039962768555 train_f1_avg: tensor(0.9896)
val_loss_avg: 0.2356463223695755 val_accuracy_avg: 0.8955853581428528 val_f1_avg: tensor(0.8956)


 80%|████████  | 24/30 [10:04<02:31, 25.18s/it]


KeyboardInterrupt: 

In [None]:
# # How to Save

# best_val_loss = 0
# best_val_f1 = 0

# for ep in tqdm(range(EPOCHS)):
    
#     # train, valid ...
    
#     if best_val_f1 < val_f1_avg or best_val_loss >= val_loss_avg:
#         best_val_loss = val_loss_avg
#         best_val_f1 = val_f1_avg
#         torch.save(resnet18.state_dict(), f"{resnet18.__class__.__name__}_{ep}_loss{train_loss_avg:.2f}_acc{train_accuracy_avg:2f}.pt")
        
# #         torch.save({
# #             'epoch': ep,
# #             'model_state_dict': resnet18.state_dict(),
# #             'optimizer_state_dict': optm.state_dict(),
# #             'loss': train_loss_avg,
# #             }, f"saved/{resnet18.__class__.__name__}_{ep}_loss{train_loss_avg:.2f}_acc{train_accuracy_avg:2f}.pt")

