In [1]:
# # Run just once
# !pip install torchmetrics

In [2]:
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import glob
from tqdm import tqdm
import random
from sklearn.model_selection import train_test_split
import warnings

import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler, ConcatDataset
from torchvision import transforms, utils
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from torchmetrics import F1

from Dataset import AgeDataset
from myutil import mapAttributes, save_aug, mapAge
from model import MyResNet18

In [3]:
warnings.filterwarnings(action='ignore') 

In [4]:
# # run just once! (takes about 30 sec.)
# base_path = "../input/data/train"
# df = pd.read_csv(os.path.join(base_path, "train.csv"))
# old_labels = df.loc[df.age>=60]
# myaug = transforms.Compose([
#     transforms.RandomHorizontalFlip(),
#     transforms.ColorJitter(contrast=0.1),
#     transforms.RandomPosterize(bits=2),
#     transforms.ToTensor()
# ])
# save_aug(myaug, old_labels)

100%|██████████| 192/192 [00:24<00:00,  7.82it/s]


In [5]:
# set random seed (이렇게 하면 되는건가.. 흠)
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
     
seed_everything(42)

## split train, val folders

In [6]:
base_path = "/opt/ml/input/data/train"

# combine two dataframes (original + augmented)
df1 = pd.read_csv(os.path.join(base_path, "train.csv"))
df2 = pd.read_csv(os.path.join(base_path, "old_path_augmented.csv"))
df = df1.append(df2)  # 2892 rows (= 2700 + 192)

y_data = df.apply(lambda x: mapAge(x['path']), axis=1)

x_train, x_val, y_train, y_val = train_test_split(df.index, y_data, test_size=0.2, random_state=42, stratify=y_data)
len(x_train), len(x_val)  # 2313, 579 확인!

(2313, 579)

In [7]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 32
LEARNING_RATE = 0.0001
EPOCHS = 30

## Dataset & Dataloader

In [8]:
df = pd.read_csv(os.path.join(base_path, "train.csv"))

train_dataset = AgeDataset(
    data_path = base_path,
    data = df.loc[x_train],
    transform = transforms.Compose([
         transforms.Resize((128, 128)),
         transforms.ToTensor()
    ])
)

val_dataset = AgeDataset(
    data_path = base_path,
    data = df.loc[x_val],
    transform = transforms.Compose([
         transforms.Resize((128, 128)),
         transforms.ToTensor()
    ])
)

100%|██████████| 2313/2313 [00:00<00:00, 19241.15it/s]
100%|██████████| 579/579 [00:00<00:00, 20161.74it/s]


In [9]:
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=4,
    drop_last = True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    num_workers=4,
    drop_last = True
)

# x, y = next(iter(train_loader))
# x.shape, y.shape  # (torch.Size([32, 3, 128, 128]), torch.Size([32]))

In [10]:
# model, loss, optm
resnet18 = MyResNet18(num_classes=3).to(DEVICE)
multi_criterion = nn.MultiLabelSoftMarginLoss(reduction='mean')
optm = torch.optim.Adam(resnet18.parameters(), lr=LEARNING_RATE)

In [11]:
# calculates accuracy
def get_acc(y_pred, y_test):  # torch.Size([32, 3]) torch.Size([32])
    output = torch.argmax(y_pred, dim=1)
    correct = sum(output == y_test)/len(output)
    return torch.sum(correct)

# labels with proper dimension
def labeling(x, num_labels):
    output = torch.zeros((x.shape[0], num_labels))
    for i in range(x.shape[0]):
        output[i,x[i]] = 1
    return output

# F1 score
f1 = F1(num_classes=3)

## Train

In [12]:
# train

tr_writer = SummaryWriter("logs/exp_12/tr")
val_writer = SummaryWriter("logs/exp_12/val")

for ep in tqdm(range(EPOCHS)):

    # == training phase =================================
    train_loss_ep = 0
    train_accuracy_ep = 0
    train_f1_ep = 0
    for X, y in iter(train_loader):
        
        resnet18.train()
        
        # change dim for loss func
        _y = labeling(y.unsqueeze(1), 3)  # torch.Size([32, 3])
        X, _y = X.to(DEVICE), _y.to(DEVICE)
        
        predict = resnet18(X)  # torch.Size([32, 3])
        
        train_loss = multi_criterion(predict, _y)
        train_accuracy = get_acc(predict, y.squeeze().to(DEVICE))
        train_f1 = f1(torch.argmax(predict, 1).to("cpu"), y)
        
        optm.zero_grad()
    
        train_loss.backward()
        optm.step()
        
        train_loss_ep += train_loss
        train_accuracy_ep += train_accuracy
        train_f1_ep += train_f1
        # end of one dataloader
    
    train_loss_avg = train_loss_ep / len(train_loader)
    train_accuracy_avg = train_accuracy_ep / len(train_loader)
    train_f1_avg = train_f1_ep / len(train_loader)
    
    # tensorboard
    tr_writer.add_scalar(
        'train_loss_avg',
        train_loss_avg,
        ep
    )
    tr_writer.add_scalar(
        'train_accuracy_avg',
        train_accuracy_avg,
        ep
    )
    tr_writer.add_scalar(
        'train_f1_avg',
        train_f1_avg,
        ep
    )
    
    # === testing phase =================================
    with torch.no_grad():
        val_loss_ep = 0
        val_accuracy_ep = 0
        val_f1_ep = 0
        for X, y in iter(val_loader):
            ## testing phrase
            resnet18.eval()

            # change dim for loss func
            _y = labeling(y.unsqueeze(1), 3)

            X, _y = X.to(DEVICE), _y.to(DEVICE)
            predict = resnet18(X)

            val_loss = multi_criterion(predict, _y)
            val_accuracy = get_acc(predict, y.squeeze().to(DEVICE))
            val_f1 = f1(torch.argmax(predict, 1).to("cpu"), y)

            val_loss_ep += val_loss
            val_accuracy_ep += val_accuracy
            val_f1_ep += val_f1


        val_loss_avg = val_loss_ep / len(val_loader)
        val_accuracy_avg = val_accuracy_ep / len(val_loader)
        val_f1_avg = val_f1_ep / len(val_loader)
    
    # tensorboard
    val_writer.add_scalar(
        'val_loss_avg',
        val_loss_avg,
        ep
    )
    val_writer.add_scalar(
        'val_accuracy_avg',
        val_accuracy_avg,
        ep
    )
    val_writer.add_scalar(
        'val_f1_avg',
        val_f1_avg,
        ep
    )
    
    print("train_loss_avg:", train_loss_avg.item(), "train_accuracy_avg:", train_accuracy_avg.item(), "train_f1_avg:", train_f1_avg)
    print("val_loss_avg:", val_loss_avg.item(), "val_accuracy_avg:", val_accuracy_avg.item(), "val_f1_avg:", val_f1_avg)

  3%|▎         | 1/30 [00:24<11:42, 24.24s/it]

train_loss_avg: 0.3161337375640869 train_accuracy_avg: 0.810519814491272 train_f1_avg: tensor(0.8105)
val_loss_avg: 0.24205219745635986 val_accuracy_avg: 0.85317462682724 val_f1_avg: tensor(0.8532)


  7%|▋         | 2/30 [00:48<11:17, 24.19s/it]

train_loss_avg: 0.17072679102420807 train_accuracy_avg: 0.915099024772644 train_f1_avg: tensor(0.9151)
val_loss_avg: 0.22901587188243866 val_accuracy_avg: 0.8764881491661072 val_f1_avg: tensor(0.8765)


 10%|█         | 3/30 [01:12<10:52, 24.17s/it]

train_loss_avg: 0.10740253329277039 train_accuracy_avg: 0.9539604187011719 train_f1_avg: tensor(0.9540)
val_loss_avg: 0.23859208822250366 val_accuracy_avg: 0.8841766119003296 val_f1_avg: tensor(0.8842)


 13%|█▎        | 4/30 [01:36<10:27, 24.14s/it]

train_loss_avg: 0.06800457090139389 train_accuracy_avg: 0.9760519862174988 train_f1_avg: tensor(0.9761)
val_loss_avg: 0.20037534832954407 val_accuracy_avg: 0.905754029750824 val_f1_avg: tensor(0.9058)


 17%|█▋        | 5/30 [02:00<10:05, 24.21s/it]

train_loss_avg: 0.052631959319114685 train_accuracy_avg: 0.9801980257034302 train_f1_avg: tensor(0.9802)
val_loss_avg: 0.20469212532043457 val_accuracy_avg: 0.906994104385376 val_f1_avg: tensor(0.9070)


 20%|██        | 6/30 [02:25<09:40, 24.18s/it]

train_loss_avg: 0.03425491601228714 train_accuracy_avg: 0.9879332184791565 train_f1_avg: tensor(0.9879)
val_loss_avg: 0.19832223653793335 val_accuracy_avg: 0.9126984477043152 val_f1_avg: tensor(0.9127)


 23%|██▎       | 7/30 [02:49<09:17, 24.22s/it]

train_loss_avg: 0.020756512880325317 train_accuracy_avg: 0.9931930899620056 train_f1_avg: tensor(0.9932)
val_loss_avg: 0.22970439493656158 val_accuracy_avg: 0.9060020446777344 val_f1_avg: tensor(0.9060)


 27%|██▋       | 8/30 [03:14<08:56, 24.37s/it]

train_loss_avg: 0.015586357563734055 train_accuracy_avg: 0.9954826831817627 train_f1_avg: tensor(0.9955)
val_loss_avg: 0.24454696476459503 val_accuracy_avg: 0.9084821939468384 val_f1_avg: tensor(0.9085)


 30%|███       | 9/30 [03:39<08:37, 24.65s/it]

train_loss_avg: 0.012813189998269081 train_accuracy_avg: 0.997029721736908 train_f1_avg: tensor(0.9970)
val_loss_avg: 0.2691914737224579 val_accuracy_avg: 0.90327388048172 val_f1_avg: tensor(0.9033)


 33%|███▎      | 10/30 [04:03<08:12, 24.61s/it]

train_loss_avg: 0.02396000176668167 train_accuracy_avg: 0.9909653663635254 train_f1_avg: tensor(0.9910)
val_loss_avg: 0.31640681624412537 val_accuracy_avg: 0.8829365372657776 val_f1_avg: tensor(0.8829)


 37%|███▋      | 11/30 [04:28<07:48, 24.64s/it]

train_loss_avg: 0.024752095341682434 train_accuracy_avg: 0.9878712892532349 train_f1_avg: tensor(0.9879)
val_loss_avg: 0.27101826667785645 val_accuracy_avg: 0.8985615372657776 val_f1_avg: tensor(0.8986)


 40%|████      | 12/30 [04:53<07:24, 24.70s/it]

train_loss_avg: 0.013903932645916939 train_accuracy_avg: 0.9946163892745972 train_f1_avg: tensor(0.9946)
val_loss_avg: 0.2775839865207672 val_accuracy_avg: 0.8953373432159424 val_f1_avg: tensor(0.8953)


 43%|████▎     | 13/30 [05:17<06:57, 24.57s/it]

train_loss_avg: 0.004060744773596525 train_accuracy_avg: 0.9988861680030823 train_f1_avg: tensor(0.9989)
val_loss_avg: 0.2764400541782379 val_accuracy_avg: 0.90327388048172 val_f1_avg: tensor(0.9033)


 47%|████▋     | 14/30 [05:41<06:31, 24.50s/it]

train_loss_avg: 0.0016956357285380363 train_accuracy_avg: 0.9999381303787231 train_f1_avg: tensor(0.9999)
val_loss_avg: 0.285016268491745 val_accuracy_avg: 0.9005457162857056 val_f1_avg: tensor(0.9005)


 50%|█████     | 15/30 [06:06<06:06, 24.41s/it]

train_loss_avg: 0.001063520205207169 train_accuracy_avg: 0.9999381303787231 train_f1_avg: tensor(0.9999)
val_loss_avg: 0.30449268221855164 val_accuracy_avg: 0.9015377759933472 val_f1_avg: tensor(0.9015)


 53%|█████▎    | 16/30 [06:30<05:40, 24.32s/it]

train_loss_avg: 0.0010539666982367635 train_accuracy_avg: 0.9999381303787231 train_f1_avg: tensor(0.9999)
val_loss_avg: 0.31167715787887573 val_accuracy_avg: 0.9027778506278992 val_f1_avg: tensor(0.9028)


 57%|█████▋    | 17/30 [06:54<05:16, 24.36s/it]

train_loss_avg: 0.028254186734557152 train_accuracy_avg: 0.9873144030570984 train_f1_avg: tensor(0.9873)
val_loss_avg: 0.26927241683006287 val_accuracy_avg: 0.9012897610664368 val_f1_avg: tensor(0.9013)


 60%|██████    | 18/30 [07:18<04:51, 24.31s/it]

train_loss_avg: 0.034952856600284576 train_accuracy_avg: 0.9835396409034729 train_f1_avg: tensor(0.9835)
val_loss_avg: 0.32314732670783997 val_accuracy_avg: 0.8918651342391968 val_f1_avg: tensor(0.8919)


 63%|██████▎   | 19/30 [07:43<04:27, 24.33s/it]

train_loss_avg: 0.005068671423941851 train_accuracy_avg: 0.9983910918235779 train_f1_avg: tensor(0.9984)
val_loss_avg: 0.30836305022239685 val_accuracy_avg: 0.9005457162857056 val_f1_avg: tensor(0.9005)


 67%|██████▋   | 20/30 [08:07<04:03, 24.37s/it]

train_loss_avg: 0.0020776838064193726 train_accuracy_avg: 0.9994431138038635 train_f1_avg: tensor(0.9994)
val_loss_avg: 0.3095111548900604 val_accuracy_avg: 0.9035218954086304 val_f1_avg: tensor(0.9035)


 70%|███████   | 21/30 [08:32<03:39, 24.37s/it]

train_loss_avg: 0.0016063719522207975 train_accuracy_avg: 0.9996906518936157 train_f1_avg: tensor(0.9997)
val_loss_avg: 0.2963673770427704 val_accuracy_avg: 0.9102182984352112 val_f1_avg: tensor(0.9102)


 73%|███████▎  | 22/30 [08:56<03:14, 24.36s/it]

train_loss_avg: 0.0005533727817237377 train_accuracy_avg: 0.9999381303787231 train_f1_avg: tensor(0.9999)
val_loss_avg: 0.3006106913089752 val_accuracy_avg: 0.9097222685813904 val_f1_avg: tensor(0.9097)


 77%|███████▋  | 23/30 [09:20<02:50, 24.34s/it]

train_loss_avg: 0.00035639957059174776 train_accuracy_avg: 1.0 train_f1_avg: tensor(1.)
val_loss_avg: 0.30650487542152405 val_accuracy_avg: 0.9114583730697632 val_f1_avg: tensor(0.9115)


 80%|████████  | 24/30 [09:45<02:25, 24.32s/it]

train_loss_avg: 0.00026701774913817644 train_accuracy_avg: 1.0 train_f1_avg: tensor(1.)
val_loss_avg: 0.31427621841430664 val_accuracy_avg: 0.9112103581428528 val_f1_avg: tensor(0.9112)


 83%|████████▎ | 25/30 [10:09<02:01, 24.26s/it]

train_loss_avg: 0.000493402243591845 train_accuracy_avg: 0.9999381303787231 train_f1_avg: tensor(0.9999)
val_loss_avg: 0.3322078585624695 val_accuracy_avg: 0.9112103581428528 val_f1_avg: tensor(0.9112)


 87%|████████▋ | 26/30 [10:33<01:37, 24.26s/it]

train_loss_avg: 0.021138319745659828 train_accuracy_avg: 0.9901609420776367 train_f1_avg: tensor(0.9902)
val_loss_avg: 0.42308419942855835 val_accuracy_avg: 0.8415179252624512 val_f1_avg: tensor(0.8415)


 90%|█████████ | 27/30 [10:57<01:12, 24.30s/it]

train_loss_avg: 0.05035959929227829 train_accuracy_avg: 0.973948061466217 train_f1_avg: tensor(0.9739)
val_loss_avg: 0.3325302004814148 val_accuracy_avg: 0.8923611640930176 val_f1_avg: tensor(0.8924)


 93%|█████████▎| 28/30 [11:22<00:48, 24.29s/it]

train_loss_avg: 0.007511787582188845 train_accuracy_avg: 0.9969678521156311 train_f1_avg: tensor(0.9970)
val_loss_avg: 0.39886611700057983 val_accuracy_avg: 0.8869048357009888 val_f1_avg: tensor(0.8869)


 97%|█████████▋| 29/30 [11:46<00:24, 24.28s/it]

train_loss_avg: 0.007497994229197502 train_accuracy_avg: 0.9961014986038208 train_f1_avg: tensor(0.9961)
val_loss_avg: 0.34848788380622864 val_accuracy_avg: 0.8990575671195984 val_f1_avg: tensor(0.8991)


100%|██████████| 30/30 [12:10<00:00, 24.35s/it]

train_loss_avg: 0.002167279366403818 train_accuracy_avg: 0.9994431138038635 train_f1_avg: tensor(0.9994)
val_loss_avg: 0.3560841977596283 val_accuracy_avg: 0.8960813879966736 val_f1_avg: tensor(0.8961)





In [13]:
# # How to Save

# best_val_loss = 0
# best_val_f1 = 0

# for ep in tqdm(range(EPOCHS)):
    
#     # train, valid ...
    
#     if best_val_f1 < val_f1_avg or best_val_loss >= val_loss_avg:
#         best_val_loss = val_loss_avg
#         best_val_f1 = val_f1_avg
#         torch.save(resnet18.state_dict(), f"{resnet18.__class__.__name__}_{ep}_loss{train_loss_avg:.2f}_acc{train_accuracy_avg:2f}.pt")
        
# #         torch.save({
# #             'epoch': ep,
# #             'model_state_dict': resnet18.state_dict(),
# #             'optimizer_state_dict': optm.state_dict(),
# #             'loss': train_loss_avg,
# #             }, f"saved/{resnet18.__class__.__name__}_{ep}_loss{train_loss_avg:.2f}_acc{train_accuracy_avg:2f}.pt")

