In [1]:
import argparse
import glob
import json
import multiprocessing
import os
import random
import re
from importlib import import_module
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import torch
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from dataset import ProfileClassEqualSplitTrainMaskDataset
from loss import create_criterion

import pandas as pd

In [2]:
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

In [3]:
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']


In [4]:
def grid_image(np_images, gts, preds, n=16, shuffle=False):
    batch_size = np_images.shape[0]
    assert n <= batch_size

    choices = random.choices(range(batch_size), k=n) if shuffle else list(range(n))
    figure = plt.figure(figsize=(12, 18 + 2))  # cautions: hardcoded, 이미지 크기에 따라 figsize 를 조정해야 할 수 있습니다. T.T
    plt.subplots_adjust(top=0.8)               # cautions: hardcoded, 이미지 크기에 따라 top 를 조정해야 할 수 있습니다. T.T
    n_grid = np.ceil(n ** 0.5)
    tasks = ["mask", "gender", "age"]
    for idx, choice in enumerate(choices):
        gt = gts[choice].item()
        pred = preds[choice].item()
        image = np_images[choice]
        # title = f"gt: {gt}, pred: {pred}"
        gt_decoded_labels = ProfileClassEqualSplitTrainMaskDataset.decode_multi_class(gt)
        pred_decoded_labels = ProfileClassEqualSplitTrainMaskDataset.decode_multi_class(pred)
        title = "\n".join([
            f"{task} - gt: {gt_label}, pred: {pred_label}"
            for gt_label, pred_label, task
            in zip(gt_decoded_labels, pred_decoded_labels, tasks)
        ])

        plt.subplot(n_grid, n_grid, idx + 1, title=title)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(image, cmap=plt.cm.binary)

    return figure

In [5]:
def increment_path(path, exist_ok=False):
    """ Automatically increment path, i.e. runs/exp --> runs/exp0, runs/exp1 etc.

    Args:
        path (str or pathlib.Path): f"{model_dir}/{args.name}".
        exist_ok (bool): whether increment path (increment if False).
    """
    path = Path(path)
    if (path.exists() and exist_ok) or (not path.exists()):
        return str(path)
    else:
        dirs = glob.glob(f"{path}*")
        matches = [re.search(rf"%s(\d+)" % path.stem, d) for d in dirs]
        i = [int(m.groups()[0]) for m in matches if m]
        n = max(i) + 1 if i else 2
        return f"{path}{n}"

In [6]:
def mask_label(label):
    if label<6:
        return 0
    elif label<12:
        return 1
    else:
        return 3
    
def gender_label(label):
    if label%6<3:
        return 0
    else:
        return 1

def age_label(label):
    if label%3==0:
        return 0
    elif label%3==1:
        return 1
    else: 
        return 2
    
def change_age_label(label):
    if label%3==0:
        return 0
    elif label%3==1:
        return 0
    else: 
        return 1

In [7]:
def make_correlation(real,pred, mask=None, over=None):
    if over==True:
        correct_arr=torch.zeros((2))
        len_arr=torch.zeros((2))
        for i in range(len(real)):
            if real[i]==pred[i]:
                correct_arr[real[i]]+=1
            len_arr[real[i]]+=1
        cor=correct_arr/len_arr       
        col=['under60','over60']
        return pd.DataFrame(len_arr.view([-1,2]).numpy(),columns=col),pd.DataFrame(cor.view([-1,2]).numpy(),columns=col)
    if mask==True:
        correct_arr=torch.zeros((3))
        len_arr=torch.zeros((3))
        for i in range(len(real)):
            if real[i]==pred[i]:
                correct_arr[real[i]]+=1
            len_arr[real[i]]+=1
        cor=correct_arr/len_arr       
        col=['under30','30to60','over60']
        return pd.DataFrame(len_arr.view([-1,3]).numpy(),columns=col),pd.DataFrame(cor.view([-1,3]).numpy(),columns=col)

    correct_arr=torch.ones((18))
    len_arr=torch.ones((18))
    for i in range(len(real)):
        if real[i]==pred[i]:
            correct_arr[real[i]]+=1
        len_arr[real[i]]+=1
    cor=correct_arr/len_arr       
    col=['under30','30to60','over60']
    ind=['mask_m','mask_f','incorrect_m','incorrect_f','correct_m','incorrect_f']
    return pd.DataFrame(cor.view([-1,3]).numpy(),index=ind,columns=col)

In [8]:
def train(data_dir, model_dir, args):
    seed_everything(args.seed)

    save_dir = increment_path(os.path.join(model_dir, args.name))

    # -- settings
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # -- dataset
    dataset_module = getattr(import_module("dataset"), args.dataset)  # default: BaseAugmentation
    dataset = dataset_module(
        data_dir=data_dir,
    )
    num_classes = dataset.age_num_classes  # 18

    # -- augmentation
    transform_module = getattr(import_module("dataset"), args.augmentation)  # default: BaseAugmentation
    transform = transform_module(
        resize=args.resize,
        mean=dataset.mean,
        std=dataset.std,
    )
    dataset.set_transform(transform)

    # -- data_loader
    train_set, val_set = dataset.split_dataset()

    train_loader = DataLoader(
        train_set,
        batch_size=args.batch_size,
        num_workers=multiprocessing.cpu_count()//2,
        shuffle=True,
        pin_memory=use_cuda,
        drop_last=True,
    )

    val_loader = DataLoader(
        val_set,
        batch_size=args.valid_batch_size,
        num_workers=multiprocessing.cpu_count()//2,
        shuffle=False,
        pin_memory=use_cuda,
        drop_last=True,
    )

    # -- model
    model_module = getattr(import_module("model"), args.model)  # default: BaseModel
    model = model_module(num_classes=2).to(device)
    model = torch.nn.DataParallel(model)

    # -- loss & metric
    criterion = create_criterion(args.f1_criterion)  # default: cross_entropy
    opt_module = getattr(import_module("torch.optim"), args.optimizer)  # default: SGD
    optimizer = opt_module(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=args.lr,
        weight_decay=5e-4
    )
    scheduler = StepLR(optimizer, args.lr_decay_step, gamma=0.5)

    # -- logging
    logger = SummaryWriter(log_dir=save_dir)
    with open(os.path.join(save_dir, 'config.json'), 'w', encoding='utf-8') as f:
        json.dump(vars(args), f, ensure_ascii=False, indent=4)

    best_val_acc = 0
    best_val_loss = np.inf
    for epoch in range(args.epochs):
        # train loop
        model.train()
        loss_value = 0
        matches = 0
        for idx, train_batch in enumerate(train_loader):
            inputs, labels = train_batch
            inputs = inputs.to(device)
            
            labels=torch.tensor(list(map(change_age_label,labels)))
            
            labels = labels.to(device)

            optimizer.zero_grad()

            outs = model(inputs)
            
            preds = torch.argmax(outs, dim=-1)
            loss = criterion(outs, labels)
            

            loss.backward()
            optimizer.step()
            
            loss_value += loss.item()
            
            
            
            matches += (preds == labels).sum().item()
            if (idx + 1) % args.log_interval == 0:
                train_loss = loss_value / args.log_interval
                train_acc = matches / args.batch_size / args.log_interval
                current_lr = get_lr(optimizer)
                print(
                    f"Epoch[{epoch}/{args.epochs}]({idx + 1}/{len(train_loader)}) || "
                    f"training loss {train_loss:4.4} || training accuracy {train_acc:4.2%} || lr {current_lr}"
                )
                logger.add_scalar("Train/loss", train_loss, epoch * len(train_loader) + idx)
                logger.add_scalar("Train/accuracy", train_acc, epoch * len(train_loader) + idx)

                loss_value = 0
                matches = 0

        scheduler.step()

        # val loop
        with torch.no_grad():
            print("Calculating validation results...")
            model.eval()
            val_loss_items = []
            val_acc_items = []
            figure = None
            all_labels=[]
            all_preds=[]
            for val_batch in val_loader:
                inputs, labels = val_batch
                inputs = inputs.to(device)
                
                labels=torch.tensor(list(map(change_age_label,labels)))
                
                labels = labels.to(device)

                outs = model(inputs)
                preds = torch.argmax(outs, dim=-1)
                
                ######################################################################

                all_labels.extend(labels)
                all_preds.extend(preds)
                

                
                loss_item = criterion(outs, labels).item()
                acc_item = (labels == preds).sum().item()
                val_loss_items.append(loss_item)
                val_acc_items.append(acc_item)

                if figure is None:
                    inputs_np = torch.clone(inputs).detach().cpu().permute(0, 2, 3, 1).numpy()
                    inputs_np = dataset_module.denormalize_image(inputs_np, dataset.mean, dataset.std)
                    figure = grid_image(
                        inputs_np, labels, preds, n=16, shuffle=args.dataset != "MaskSplitByProfileDataset"
                    )
            
            val_loss = np.sum(val_loss_items) / len(val_loader)
            val_acc = np.sum(val_acc_items) / len(val_set)
            best_val_loss = min(best_val_loss, val_loss)
            if val_acc > best_val_acc:
                print(f"New best model for val accuracy : {val_acc:4.2%}! saving the best model..")
                torch.save(model.module.state_dict(), f"{save_dir}/best.pth")
                best_val_acc = val_acc
            torch.save(model.module.state_dict(), f"{save_dir}/last.pth")
            print(
                f"[Val] acc : {val_acc:4.2%}, loss: {val_loss:4.2} || "
                f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
            )
            logger.add_scalar("Val/loss", val_loss, epoch)
            logger.add_scalar("Val/accuracy", val_acc, epoch)
            logger.add_figure("results", figure, epoch)
            
            len_arr,cor=make_correlation(all_labels,all_preds,mask=True,over=True)
            print(len_arr)
            print()
            print(cor)
            print()
  ######################################################################
  

In [9]:
import easydict
args=easydict.EasyDict({
    'seed':42,
    'epochs':5,
    'dataset':'ProfileClassEqualSplitTrainMaskDataset',
    'augmentation':'BaseAugmentation',
    'resize':[256,192],
    'batch_size':64,
    'valid_batch_size':1000,
    'model':'Resnet34',
    'optimizer':'Adam',
    'lr':1e-3,
    'val_ratio':0.2,
    'criterion':'cross_entropy',
    'f1_criterion':'f1',
    'lr_decay_step':20,
    'log_interval':20,
    'name':'exp',
    'data_dir':os.environ.get('SM_CHANNEL_TRAIN', '/opt/ml/input/data/train/images'),
    'model_dir':os.environ.get('SM_MODEL_DIR', './model')
})

In [10]:
train('/opt/ml/input/data','./model',args)

Epoch[0/5](20/236) || training loss 0.492 || training accuracy 90.86% || lr 0.001
Epoch[0/5](40/236) || training loss 0.4532 || training accuracy 90.00% || lr 0.001
Epoch[0/5](60/236) || training loss 0.4158 || training accuracy 90.39% || lr 0.001
Epoch[0/5](80/236) || training loss 0.3931 || training accuracy 91.64% || lr 0.001
Epoch[0/5](100/236) || training loss 0.3716 || training accuracy 93.36% || lr 0.001
Epoch[0/5](120/236) || training loss 0.357 || training accuracy 92.34% || lr 0.001
Epoch[0/5](140/236) || training loss 0.3551 || training accuracy 93.52% || lr 0.001
Epoch[0/5](160/236) || training loss 0.3511 || training accuracy 92.66% || lr 0.001
Epoch[0/5](180/236) || training loss 0.3311 || training accuracy 94.92% || lr 0.001
Epoch[0/5](200/236) || training loss 0.3532 || training accuracy 91.72% || lr 0.001
Epoch[0/5](220/236) || training loss 0.3688 || training accuracy 89.92% || lr 0.001
Calculating validation results...
New best model for val accuracy : 77.25%! saving

In [11]:
if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    from dotenv import load_dotenv
    import os
    load_dotenv(verbose=True)

    # Data and model checkpoints directories
    parser.add_argument('--seed', type=int, default=42, help='random seed (default: 42)')
    parser.add_argument('--epochs', type=int, default=1, help='number of epochs to train (default: 1)')
    parser.add_argument('--dataset', type=str, default='MaskBaseDataset', help='dataset augmentation type (default: MaskBaseDataset)')
    parser.add_argument('--augmentation', type=str, default='BaseAugmentation', help='data augmentation type (default: BaseAugmentation)')
    parser.add_argument("--resize", nargs="+", type=list, default=[128, 96], help='resize size for image when training')
    parser.add_argument('--batch_size', type=int, default=64, help='input batch size for training (default: 64)')
    parser.add_argument('--valid_batch_size', type=int, default=1000, help='input batch size for validing (default: 1000)')
    parser.add_argument('--model', type=str, default='BaseModel', help='model type (default: BaseModel)')
    parser.add_argument('--optimizer', type=str, default='SGD', help='optimizer type (default: SGD)')
    parser.add_argument('--lr', type=float, default=1e-3, help='learning rate (default: 1e-3)')
    parser.add_argument('--val_ratio', type=float, default=0.2, help='ratio for validaton (default: 0.2)')
    parser.add_argument('--criterion', type=str, default='cross_entropy', help='criterion type (default: cross_entropy)')
    parser.add_argument('--lr_decay_step', type=int, default=20, help='learning rate scheduler deacy step (default: 20)')
    parser.add_argument('--log_interval', type=int, default=20, help='how many batches to wait before logging training status')
    parser.add_argument('--name', default='exp', help='model save at {SM_MODEL_DIR}/{name}')

    # Container environment
    parser.add_argument('--data_dir', type=str, default=os.environ.get('SM_CHANNEL_TRAIN', '/opt/ml/input/data/train/images'))
    parser.add_argument('--model_dir', type=str, default=os.environ.get('SM_MODEL_DIR', './model'))

    args = parser.parse_args()
    print(args)

    data_dir = args.data_dir
    model_dir = args.model_dir

    train(data_dir, model_dir, args)

usage: ipykernel_launcher.py [-h] [--seed SEED] [--epochs EPOCHS]
                             [--dataset DATASET] [--augmentation AUGMENTATION]
                             [--resize RESIZE [RESIZE ...]]
                             [--batch_size BATCH_SIZE]
                             [--valid_batch_size VALID_BATCH_SIZE]
                             [--model MODEL] [--optimizer OPTIMIZER] [--lr LR]
                             [--val_ratio VAL_RATIO] [--criterion CRITERION]
                             [--lr_decay_step LR_DECAY_STEP]
                             [--log_interval LOG_INTERVAL] [--name NAME]
                             [--data_dir DATA_DIR] [--model_dir MODEL_DIR]
ipykernel_launcher.py: error: unrecognized arguments: -f /opt/ml/.local/share/jupyter/runtime/kernel-603eafb4-f1fe-424b-8094-b9ee64965c34.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
cor=torch.tensor(range(3))
col=['under30','30to60','over60']

print(pd.DataFrame(cor.view([-1,3]).numpy(),columns=col))