# 1. 패키지 로드

In [64]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import os
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

from PIL import Image
from tqdm import tqdm
import time
import gc
import random
from box import Box
import cv2
import cvlib as cv
import timm

import warnings
warnings.filterwarnings('ignore')

# 2. 학습 관련 함수

## 2-1. seed 고정

In [65]:
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

## 2-2. 데이터 전처리

In [66]:
# ages 생성
def get_ages(x):
    if x < 30: return 0
    elif x < 60: return 1
    else: return 2

# genders 생성
def get_genders(x):
    if x == 'male': return 0
    else: return 1

# masks 생성
def get_masks(x):
    if x == 'normal': return 2
    elif x == 'incorrect_mask': return 1
    else: return 0

# # age_cats 생성
# def get_age_cats(x):
#     if x < 20: return 0
#     elif x < 30: return 1
#     elif x < 40: return 2
#     elif x < 50: return 3
#     elif x < 60: return 4
#     else: return 5

def get_age_cats(x):
    if x < 25: return 0
    elif x < 30: return 1
    elif x < 45: return 2
    elif x < 52: return 3
    elif x < 57: return 4
    elif x < 60: return 5
    else: return 6

# labels 생성
def get_labels(masks, genders, ages):
    return masks * 6 + genders * 3 + ages

# label_cats 생성
def get_label_cats(masks, genders, ages):
    return masks * 12 + genders * 6 + ages

# 마스크 이상치 변경
def swap_mask(swap_li : list, df : pd.DataFrame) -> pd.DataFrame:
    swap_df = df.copy()
    for swap_id in swap_li:
        _swap_df = swap_df[swap_df['id'] == swap_id]
        
        normal_swap_df = _swap_df[_swap_df['mask'] == 'normal']
        incorrect_mask_swap_df = _swap_df[_swap_df['mask'] == 'incorrect_mask']
        
        normal_path = normal_swap_df['path'].values[0]
        incorrect_mask_path = incorrect_mask_swap_df['path'].values[0]
        
        swap_df.loc[normal_swap_df.index, 'path'] = incorrect_mask_path
        swap_df.loc[incorrect_mask_swap_df.index, 'path'] = normal_path
    
    return swap_df

# train_df + mask 결측치 처리
def make_train_df(df : pd.DataFrame, swap_mask_li : list, cfg) -> pd.DataFrame:
    train_df = []
    
    for line in df.iloc:
        for file in list(os.listdir(os.path.join(cfg.train_image_dir, line['path']))):
            if file[0] == '.':
                continue
            
            mask = file.split('.')[0]
            gender = line['gender']
            age = line['age']
            
            masks = get_masks(mask)
            genders = get_genders(gender)
            ages = get_ages(age)
            age_cats = get_age_cats(age)
            
            data = {
                'id' : line['id'],
                'mask' : mask,
                'gender' : gender,
                'age' : age,
                'masks' : masks,
                'genders' : genders,
                'ages' : ages,
                'age_cats' : age_cats,
                'labels': get_labels(masks = masks, genders = genders, ages = ages),
                'label_cats': get_label_cats(masks = masks, genders = genders, ages = age_cats),
                'path': os.path.join(cfg.train_image_dir, line['path'], file),
            }
            
            train_df.append(data)
            
    train_df = pd.DataFrame(train_df)
    
    train_df['idx'] = train_df.index
    
    train_df = swap_mask(swap_li = swap_mask_li, df = train_df)
    
    return train_df

# 성별 이상치 처리
def swap_gender(swap_li : list, df : pd.DataFrame) -> pd.DataFrame:
    swap_df = df.copy()
    for swap in swap_li:
        swap_id, swap_gender = swap
        swap_df.loc[swap_df[swap_df['id'] == swap_id].index, 'gender'] = swap_gender
    return swap_df

# 사람 나누기 데이터 + 성별 결측치 처리
def preprocessing_df(df : pd.DataFrame, swap_gender_li : list) -> pd.DataFrame:
    
    preprocessing_df = df.copy()
    preprocessing_df = swap_gender(swap_li = swap_gender_li, df = preprocessing_df)
    
    preprocessing_df['ages'] = preprocessing_df['age'].apply(lambda x : get_ages(x))
    preprocessing_df['genders'] = preprocessing_df['gender'].apply(lambda x : get_genders(x))
    
    preprocessing_df['cv_taget_col'] = 'ages' + '_' + preprocessing_df['ages'].astype(str) + '_' + 'genders' + '_' + preprocessing_df['genders'].astype(str)
    
    return preprocessing_df

## 2-3 이상치 시각화

In [67]:
# 이상치 이미지 시각화
def show_img(img_id_li, df, cfg):
    for img_id in img_id_li:
        get_df = df[df['id'] == img_id]
        
        img_age = get_df['age'].tolist()[0]
        img_gender = get_df['gender'].tolist()[0]
        
        img_path = get_df['path'].tolist()[0]
        img_path = os.path.join(cfg.train_image_dir, img_path)
        img_name_li = sorted(list(os.listdir(img_path)))
        
        fig, ax = plt.subplots(1, 7, figsize = (30, 15))
        ax = ax.flatten()
        
        idx = 0
        for _img_name in img_name_li:
            if _img_name[0] == '.': continue
            
            if _img_name.split('.')[0] == 'normal': imag_name = 'normal'
            elif _img_name.split('.')[0] == 'incorrect_mask': imag_name = 'incorrect_mask'
            else: imag_name = 'mask'
            
            get_img_path = os.path.join(img_path, _img_name)
            
            img = Image.open(get_img_path)
            img = np.array(img)
            ax[idx].imshow(img)
            ax[idx].set_title(f'{img_id} / {img_age} / {img_gender} / {imag_name}')
            ax[idx].set_xticks([])
            ax[idx].set_yticks([])
            idx += 1
            
        plt.show()

# image path로 이미지 시각화
def path_li_show_img(path_li):
    fig, ax = plt.subplots(1, 7, figsize = (30, 15))
    ax = ax.flatten()
    idx = 0
    for path in path_li:
        image_name = path.split('/')[-1]
        img = Image.open(path)
        img = np.array(img)
        ax[idx].imshow(img)
        ax[idx].set_title(f'{image_name}')
        ax[idx].set_xticks([])
        ax[idx].set_yticks([])
        idx += 1
    plt.show()

## 2-4. 데이터 스플릿

In [68]:
# val_idx 생성
def get_val_idx(df : pd.DataFrame, target_col : str):
    skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 22)
    for trn_idx, val_idx in skf.split(df, df[target_col]):
        yield val_idx

class StratifiedSampler(torch.utils.data.Sampler):
    """Stratified batch sampling
    Provides equal representation of target classes in each batch
    """
    def __init__(self, y, batch_size, shuffle=True):
        if torch.is_tensor(y):
            y = y.cpu().numpy()
        assert len(y.shape) == 1, 'label array must be 1D'
        n_batches = int(len(y) / batch_size)
        self.skf = StratifiedKFold(n_splits = n_batches, shuffle = shuffle)
        self.X = torch.randn(len(y),1).numpy()
        self.y = y
        self.shuffle = shuffle

    def __iter__(self):
        if self.shuffle:
            self.skf.random_state = torch.randint(0,int(1e8),size=()).item()
        for train_idx, test_idx in self.skf.split(self.X, self.y):
            yield test_idx

    def __len__(self):
        return len(self.y)

## 2-5. 모델 관련

In [69]:
class CreateModel(nn.Module):
    def __init__(self, cfg , pretrained : bool = True):
        super(CreateModel, self).__init__()
        self.model = timm.create_model(cfg.timm_model_name, pretrained = pretrained, num_classes = cfg.num_classes)

    def forward(self, img):
        out = self.model(img)
        return out

In [70]:
def model_train(model, optimizer, criterion, data_loader):
    model.train()
    
    train_loss = 0
    real_pred_li = []
    label_pred_li = []
    
    for images, targets in data_loader:
        images, targets = images.to(device), targets.to(device)
        optimizer.zero_grad()
        
        # GA 추가시 아래 부분에 추가하기
        #############################
        
        benign_outputs = model(images)
        loss = criterion(benign_outputs, targets)
        loss.backward()
        
        optimizer.step()
        
        train_loss += loss.item()
        
        predicted = benign_outputs.argmax(dim=-1)
        
        label_pred_li.extend(predicted.detach().cpu().numpy())
        real_pred_li.extend(targets.cpu().numpy())
        
#     label_pred_li = [label_cats2labels[i] for i in label_pred_li]
#     real_pred_li = [label_cats2labels[i] for i in real_pred_li]
    
    train_loss /= len(data_loader)
    train_acc = get_acc_score(y_true = real_pred_li, y_pred = label_pred_li)
    train_fi_score = get_f1_score(y_true = real_pred_li, y_pred = label_pred_li)

    return train_loss, train_acc, train_fi_score

def model_eval(model, criterion, data_loader):
    model.eval()
    
    val_loss = 0
    real_pred_li = []
    label_pred_li = []
    
    with torch.no_grad():
        for images, targets in data_loader:
            images, targets = images.to(device), targets.to(device)

            benign_outputs = model(images)
            loss = criterion(benign_outputs, targets)

            val_loss += loss.item()

            predicted = benign_outputs.argmax(dim=-1)
        
            label_pred_li.extend(predicted.cpu().numpy())
            real_pred_li.extend(targets.cpu().numpy())
    
#     label_pred_li = [label_cats2labels[i] for i in label_pred_li]
#     real_pred_li = [label_cats2labels[i] for i in real_pred_li]
    
    val_loss /= len(data_loader)
    val_acc = get_acc_score(y_true = real_pred_li, y_pred = label_pred_li)
    val_fi_score = get_f1_score(y_true = real_pred_li, y_pred = label_pred_li)
   
    return val_loss, val_acc, val_fi_score

def get_val_pred_li(model, data_loader):
    model.eval()
    real_pred_li = []
    label_pred_li = []
    ensemble_pred_li = []
    
    with torch.no_grad():
        for images, targets in data_loader:
            images = images.to(device)
            output = model(images)
            
            label = output.argmax(dim=-1)
            label_pred_li.extend(label.cpu().numpy())
            
            ensemble_label = output.softmax(1)
            ensemble_pred_li.append(ensemble_label.cpu().numpy())
            
            real_pred_li.extend(targets.cpu().numpy())
            
#     label_pred_li = [label_cats2labels[i] for i in label_pred_li]
#     real_pred_li = [label_cats2labels[i] for i in real_pred_li]
    
    return label_pred_li, np.concatenate(ensemble_pred_li), real_pred_li

def get_submission_pred_li(model, data_loader):
    model.eval()
    label_pred_li = []
    ensemble_pred_li = []
    
    with torch.no_grad():
        for images in data_loader:
            images = images.to(device)
            output = model(images)
            
            label = output.argmax(dim=-1)
            label_pred_li.extend(label.cpu().numpy())
            
            ensemble_label = output.softmax(1)
            ensemble_pred_li.append(ensemble_label.cpu().numpy())
            
#     label_pred_li = [label_cats2labels[i] for i in label_pred_li]
    
    return label_pred_li, np.concatenate(ensemble_pred_li)

## 2-6. 평가

In [71]:
def get_f1_score(y_true, y_pred):
    return f1_score(y_true, y_pred, average='macro')

def get_acc_score(y_true, y_pred):
    return accuracy_score(y_true, y_pred)

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

## 2-7. 로스

In [72]:
class FocalLoss(nn.Module):
    def __init__(self, weight=None, gamma=2., reduction='mean'):
        nn.Module.__init__(self)
        self.weight = weight
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, input_tensor, target_tensor):
        log_prob = F.log_softmax(input_tensor, dim=-1)
        prob = torch.exp(log_prob)
        return F.nll_loss(
            ((1 - prob) ** self.gamma) * log_prob,
            target_tensor,
            weight=self.weight,
            reduction=self.reduction
        )


class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes=3, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))


# https://gist.github.com/SuperShinyEyes/dcc68a08ff8b615442e3bc6a9b55a354
class F1Loss(nn.Module):
    def __init__(self, classes=3, epsilon=1e-7):
        super().__init__()
        self.classes = classes
        self.epsilon = epsilon

    def forward(self, y_pred, y_true):
        assert y_pred.ndim == 2
        assert y_true.ndim == 1
        y_true = F.one_hot(y_true, self.classes).to(torch.float32)
        y_pred = F.softmax(y_pred, dim=1)

        tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
        tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32)
        fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
        fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)

        precision = tp / (tp + fp + self.epsilon)
        recall = tp / (tp + fn + self.epsilon)

        f1 = 2 * (precision * recall) / (precision + recall + self.epsilon)
        f1 = f1.clamp(min=self.epsilon, max=1 - self.epsilon)
        return 1 - f1.mean()

# 3. 학습 환경 설정

## 3-1. config

In [73]:
v = 1
config = {
    'seed' : 22,
    'data_split_col' : 'cv_taget_col',
    'oof' : 5,
    'tagets_col' : 'labels',
    
    'train_data_name' : 'train.csv',
    'train_data_dir' : '/opt/ml/input/data/train',
    'train_image_dir' : '/opt/ml/input/data/train/images',
    
    'submission_data_name' : 'info.csv',
    'submission_data_dir' : '/opt/ml/input/data/eval',
    'submission_image_dir' : '/opt/ml/input/data/eval/images',
    
    'file_name' : f'Ensembel_v{v}.csv',
    
    'maskModel':{
        
        'tagets_col' : 'masks',
        'split_col' : 'masks',
        
        'timm_model_name' : 'regnety_002',
        
        'model_dir' : '/opt/ml/model',
        'model_name' : f'mask_model_v{v}',
        'num_workers' : 3,
        'epochs' : 10,
        'batch_size' : 128,
        'lr' : 9e-05,
        'num_classes' : 3,
        
        'loss' : 'cel',
        'smoothing' : 0.1,
        
        'image_size' : [512, 384],
        'image_normal_mean' : [0.5, 0.5, 0.5],
        'image_normal_std' : [0.2, 0.2, 0.2],
        
    },
    
    'genderModel':{
        
        'tagets_col' : 'genders',
        'split_col' : 'genders',
        
        'timm_model_name' : 'regnety_002',
        
        'model_dir' : '/opt/ml/model',
        'model_name' : f'gender_model_v{v}',
        'num_workers' : 3,
        'epochs' : 10,
        'batch_size' : 128,
        'lr' : 9e-05,
        'num_classes' : 2,
        
        'loss' : 'cel',
        'smoothing' : 0.1,
        
        'image_size' : [512, 384],
        'image_normal_mean' : [0.5, 0.5, 0.5],
        'image_normal_std' : [0.2, 0.2, 0.2],
        
    },
    
    'ageModel':{
        
        'tagets_col' : 'ages',
        'split_col' : 'ages',
        
        'timm_model_name' : 'regnety_002',
        
        'model_dir' : '/opt/ml/model',
        'model_name' : f'age_model_v{v}',
        'num_workers' : 3,
        'epochs' : 20,
        'batch_size' : 128,
        'lr' : 9e-05,
        'num_classes' : 3,
        
        'loss' : 'labelsmoothing',
        'smoothing' : 0.1,
        
        'image_size' : [512, 384],
        'image_normal_mean' : [0.5, 0.5, 0.5],
        'image_normal_std' : [0.2, 0.2, 0.2],
        
    },
    
}

config = Box(config)

## 3-2 이미지

In [74]:
# 변환할 transform
from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize, Lambda, RandomHorizontalFlip, ToPILImage, CenterCrop, Grayscale

def image_face_crop(image):
    image = np.array(image)
    face, confidence = cv.detect_face(image)
    if not face : return image
    x, y, w, h = face[0]
    H, W, C = image.shape
    image = image[max(y - 100, 0) : min(h + 100, H), max(0 , x - 100) : min(w + 100, W)]
    return image

transform = {
        "maskModel": transforms.Compose(
            [
                Resize(config.maskModel.image_size, Image.BILINEAR),
                ToTensor(),
                Normalize(mean=config.maskModel.image_normal_mean, std=config.maskModel.image_normal_std),
                Grayscale(num_output_channels = 3)
            ]
        ),
        "genderModel": transforms.Compose(
            [
                Resize(config.genderModel.image_size, Image.BILINEAR),
                ToTensor(),
                Normalize(mean=config.genderModel.image_normal_mean, std=config.genderModel.image_normal_std),
                Grayscale(num_output_channels = 3)
            ]
        ),
        "ageModel": transforms.Compose(
            [
                Resize(config.ageModel.image_size, Image.BILINEAR),
                ToTensor(),
                Normalize(mean=config.ageModel.image_normal_mean, std=config.ageModel.image_normal_std),
                Grayscale(num_output_channels = 3)
            ]
        ),
    
}

In [75]:
class CustomDataset(Dataset):
    def __init__(self, df : pd.DataFrame, cfg, transform = None, mode : bool = True, mode_type = None, arg_transform = None):
        self.mode = mode
        self.df = df
        self.mode_type = mode_type
        if self.mode:
            if self.mode_type =='arg':
                self.img_paths = self.df['path'].tolist()
                self.targets = self.df[cfg.tagets_col].tolist()
                self.split_targets = self.df[cfg.split_col].tolist()
                self.arg_cols = self.df['arg_types'].tolist()
            else:
                self.img_paths = self.df['path'].tolist()
                self.targets = self.df[cfg.tagets_col].tolist()
                self.split_targets = self.df[cfg.split_col].tolist()
        else:
            self.img_paths = [os.path.join(cfg.submission_image_dir, img_id) for img_id in self.df.ImageID]
        self.transform = transform
        self.arg_transform = arg_transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])
        if self.mode_type =='arg':
            if self.arg_cols[index] == 0:
                image = self.transform(image)
            else:
                image = self.arg_transform(image)
        else:
            if self.transform:
                image = self.transform(image)
        
        # 이 부분에 해당 라벨에 따른 데이터 변환 여부 추가
        # val 데이터의 경우 데이터 변환이 일어나면 안되기 때문에
        # if self.데이터 변환해주는 transform:
        #     if self.targets[index].data == labels: <- 확률적으로
        #          image = self.데이터 변환해주는 transform(image)
        # 데이터 변환
        
        if self.mode:
            targets = torch.tensor(self.targets[index])
            return image, targets
        
        else: return image

    def __len__(self):
        return len(self.img_paths)

In [76]:
def get_trn_val_dataset_dataloader(trn_df : pd.DataFrame, val_df : pd.DataFrame, cfg, model_name : str):
    
    trn_dataset = CustomDataset(df = trn_df,
                            cfg = cfg,
                            transform = transform[model_name],
                            mode = True,
                           )
    
    train_loader = DataLoader(trn_dataset,
                       batch_size = cfg.batch_size,
                       num_workers = cfg.num_workers,
                       shuffle = True,
                        )

    val_dataset = CustomDataset(df = val_df,
                            cfg = cfg,
                            transform = transform[model_name],
                            mode = True,)

    val_loader = DataLoader(val_dataset,
                       batch_size = cfg.batch_size,
                       num_workers = cfg.num_workers,
                       shuffle = False,)

    return trn_dataset, train_loader, val_dataset, val_loader

def get_criterion(config):
    # loss 설정
    if config.loss == 'cel':
        criterion = nn.CrossEntropyLoss()
    elif config.loss == 'labelsmoothing':
        criterion = LabelSmoothingLoss(classes=config.num_classes, smoothing = config.smoothing, dim=-1)
    elif config.loss == 'focal':
        criterion = FocalLoss(weight = config.weight, gamma=2.0, reduction='mean')
    elif config.loss == 'f1':
        criterion = F1Loss(classes=config.num_classes, epsilon=1e-7)
    else:
        print('not loss')
    return criterion

def train(model, optimizer, criterion, train_loader, val_loader, scheduler, config, version = ''):
    besf_f1 = 0
    for epoch in range(1, config.epochs + 1):
        epoch_start_time = time.time()

        train_loss, train_acc, train_fi_score = model_train(model = model, 
                                                        optimizer = optimizer, 
                                                        criterion = criterion, 
                                                        data_loader = train_loader)

        val_loss, val_acc, val_fi_score, = model_eval(model = model,
                                                  criterion = criterion,
                                                  data_loader = val_loader)

        now_lr = get_lr(optimizer = optimizer)

        epoch_end_time = time.time()

        print(f'''{fold_num}fold, epoch: {epoch}, lr: {now_lr}, train_loss: {train_loss:.4f}, train_acc: {train_acc:.4f}, train_f1: {train_fi_score:.4f}, val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}, val_fi: {val_fi_score:.4f}, 학습시간: {epoch_end_time - epoch_start_time} \n''')

        scheduler.step(val_loss)

        if besf_f1 < val_fi_score:
            besf_f1 = val_fi_score
            torch.save(model.state_dict(), os.path.join(config.model_dir, f'{version}_{fold_num}fold_{config.model_name}.pt'))
            print(val_fi_score, '모델 저장')
            
def get_eval_img_show(train_df, idx_li, pred_labels, config):
    image_show_df = train_df.set_index('idx').loc[idx_li, :].reset_index(drop = True)
    image_show_df['pred_labels'] = pred_labels
    false_image_show_df = image_show_df[image_show_df[config.tagets_col] != image_show_df['pred_labels']]
    labels_li = [i for i in range(config.num_classes)]
    
    for labels in labels_li:
        _false_image_show_df = false_image_show_df[false_image_show_df[config.tagets_col] == labels]
        path_labels_pred_labels_li = _false_image_show_df[['path', config.tagets_col, 'pred_labels']].values[:7]

        idx = 0
        fig, ax = plt.subplots(1, 7, figsize = (30, 15))
        ax = ax.flatten()
        for path_labels_pred_labels in path_labels_pred_labels_li:
            img = Image.open(path_labels_pred_labels[0])
            img = np.array(img)
            ax[idx].imshow(img)
            ax[idx].set_title(f'true {path_labels_pred_labels[1]} / pred {path_labels_pred_labels[2]}')
            ax[idx].set_xticks([])
            ax[idx].set_yticks([])
            idx += 1
        plt.show()

# 4. 데이터 로드 

In [77]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [78]:
swap_gender_li = [["001498-1", "female"], ["004432", "female"],["005223", "female"], 
                  ['006359', 'male'], ['006360', 'male'], ['006361', 'male'], ['006362', 'male'], ['006363', 'male'], ['006364', 'male'],]
swap_mask_li = ['000020', '004418', '005227']

In [79]:
seed_everything(config.seed)

In [80]:
df = pd.read_csv(os.path.join(config.train_data_dir, config.train_data_name))
submission = pd.read_csv(os.path.join(config.submission_data_dir, config.submission_data_name))
submission['idx'] = submission.index

# 5. Mask Model

In [81]:
version = 'mask'

In [None]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

total_start_time = time.time()

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # trn, val 데이터 셋
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[val_id_df['id'].tolist(), :].reset_index(drop = True)
    trn_df = train_df.set_index('id').loc[trn_id_df['id'].tolist(), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.maskModel, 
                                                                                        model_name = 'maskModel')
    
    # 모델 정의
    model = CreateModel(cfg = config.maskModel, pretrained = True).to(device)
    criterion = get_criterion(config = config.maskModel)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.maskModel.lr, amsgrad = True)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = 0.1, eps = 1e-09, patience = 3)
    
    train(model = model,
          optimizer = optimizer, 
          criterion = criterion, 
          train_loader = train_loader, 
          val_loader = val_loader, 
          scheduler = scheduler, 
          config = config.maskModel,
         version = version)
    
    fold_end_time = time.time()
    
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time} \n')
    
    # 메모리 정리
    gc.collect()
    torch.cuda.empty_cache()

total_end_time = time.time()
print(f'총 훈련 시간: {total_end_time - total_start_time}')

1fold, epoch: 1, lr: 9e-05, train_loss: 0.3123, train_acc: 0.9091, train_f1: 0.8529, val_loss: 0.0475, val_acc: 0.9945, val_fi: 0.9919, 학습시간: 66.67264175415039 

0.9919379814974117 모델 저장
1fold, epoch: 2, lr: 9e-05, train_loss: 0.0289, train_acc: 0.9960, train_f1: 0.9939, val_loss: 0.0335, val_acc: 0.9934, val_fi: 0.9903, 학습시간: 66.7642285823822 

1fold, epoch: 3, lr: 9e-05, train_loss: 0.0110, train_acc: 0.9989, train_f1: 0.9985, val_loss: 0.0159, val_acc: 0.9968, val_fi: 0.9953, 학습시간: 67.55312180519104 

0.995297609444599 모델 저장
1fold, epoch: 4, lr: 9e-05, train_loss: 0.0095, train_acc: 0.9983, train_f1: 0.9975, val_loss: 0.0210, val_acc: 0.9945, val_fi: 0.9917, 학습시간: 67.51592302322388 

1fold, epoch: 5, lr: 9e-05, train_loss: 0.0050, train_acc: 0.9992, train_f1: 0.9988, val_loss: 0.0137, val_acc: 0.9960, val_fi: 0.9937, 학습시간: 67.77535772323608 

1fold, epoch: 6, lr: 9e-05, train_loss: 0.0039, train_acc: 0.9994, train_f1: 0.9990, val_loss: 0.0161, val_acc: 0.9963, val_fi: 0.9946, 학습시간: 

In [82]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

real_labels = []
pred_labels = []
idx_li = []

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # val
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[val_id_df['id'].tolist(), :].reset_index(drop = True)
    trn_df = train_df.set_index('id').loc[trn_id_df['id'].tolist(), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.maskModel, 
                                                                                        model_name = 'maskModel')
    model = CreateModel(cfg = config.maskModel, pretrained = False).to(device)
    model.load_state_dict(torch.load(os.path.join(config.maskModel.model_dir, f'{version}_{fold_num}fold_{config.maskModel.model_name}.pt')))
    val_label_pred_li, val_ensemble_pred_li, real_pred_li = get_val_pred_li(model = model, data_loader = val_loader)
    
    real_labels += real_pred_li
    pred_labels += val_label_pred_li
    idx_li += val_df['idx'].tolist()
    
    fold_end_time = time.time()
    
    _acc = get_acc_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    _f1_score = get_f1_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time}, acc: {_acc}, f1_score: {_f1_score} \n')

1fold 훈련 시간: 12.581791877746582, acc: 0.9973593873778717, f1_score: 0.9962921663275243 

2fold 훈련 시간: 11.721102714538574, acc: 0.9984126984126984, f1_score: 0.9977744791834811 

3fold 훈련 시간: 11.36781120300293, acc: 0.9986772486772487, f1_score: 0.9981440208859516 

4fold 훈련 시간: 12.443526029586792, acc: 0.9986796936889358, f1_score: 0.9981513431113194 

5fold 훈련 시간: 11.822243928909302, acc: 0.9981481481481481, f1_score: 0.9974054850996427 



In [83]:
train_f1 = get_f1_score(y_true = real_labels, y_pred = pred_labels)
train_acc = get_acc_score(y_true = real_labels, y_pred = pred_labels)
train_confusion_matrix = pd.DataFrame((confusion_matrix(y_true = real_labels, y_pred = pred_labels)))
print(f'train confusion_matrix')
display(train_confusion_matrix.style.background_gradient(cmap='YlOrRd', axis = 1))
print(f'train fi : {train_f1:.4f}, train acc: {train_acc:.4f} \n')

train confusion_matrix


Unnamed: 0,0,1,2
0,13499,11,0
1,21,2681,0
2,1,0,2701


train fi : 0.9976, train acc: 0.9983 



In [None]:
get_eval_img_show(train_df = train_df, idx_li = idx_li, pred_labels = pred_labels, config = config.maskModel)

# 5. Gender Model

## 5-1. Mask0

In [84]:
version = 'gender_mask0'

In [None]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[train_df['masks'] == 0].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

total_start_time = time.time()

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # trn, val 데이터 셋
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.genderModel, 
                                                                                        model_name = 'genderModel'
                                                                                       )
    
    # 모델 정의
    model = CreateModel(cfg = config.genderModel, pretrained = True).to(device)
    criterion = get_criterion(config = config.genderModel)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.genderModel.lr, amsgrad = True)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = 0.1, eps = 1e-09, patience = 3)
    
    train(model = model,
          optimizer = optimizer, 
          criterion = criterion, 
          train_loader = train_loader, 
          val_loader = val_loader, 
          scheduler = scheduler, 
          config = config.genderModel,
         version = version)
    
    fold_end_time = time.time()
    
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time} \n')
    
    # 메모리 정리
    gc.collect()
    torch.cuda.empty_cache()

total_end_time = time.time()
print(f'총 훈련 시간: {total_end_time - total_start_time}')

In [85]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[train_df['masks'] == 0].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

real_labels = []
pred_labels = []
idx_li = []

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # val
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.genderModel, 
                                                                                        model_name = 'genderModel')
    model = CreateModel(cfg = config.genderModel, pretrained = False).to(device)
    model.load_state_dict(torch.load(os.path.join(config.genderModel.model_dir, f'{version}_{fold_num}fold_{config.genderModel.model_name}.pt')))
    val_label_pred_li, val_ensemble_pred_li, real_pred_li = get_val_pred_li(model = model, data_loader = val_loader)
    
    real_labels += real_pred_li
    pred_labels += val_label_pred_li
    idx_li += val_df['idx'].tolist()
    
    fold_end_time = time.time()
    
    _acc = get_acc_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    _f1_score = get_f1_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time}, acc: {_acc}, f1_score: {_f1_score} \n')

1fold 훈련 시간: 8.578167200088501, acc: 0.9811460258780037, f1_score: 0.9801566667515318 

2fold 훈련 시간: 8.941086530685425, acc: 0.9862962962962963, f1_score: 0.9855256639829982 

3fold 훈련 시간: 8.93705439567566, acc: 0.9881481481481481, f1_score: 0.9875062468765617 

4fold 훈련 시간: 8.868355512619019, acc: 0.9829944547134936, f1_score: 0.981989386142466 

5fold 훈련 시간: 8.274568319320679, acc: 0.9825925925925926, f1_score: 0.9816530447632454 



In [86]:
train_f1 = get_f1_score(y_true = real_labels, y_pred = pred_labels)
train_acc = get_acc_score(y_true = real_labels, y_pred = pred_labels)
train_confusion_matrix = pd.DataFrame((confusion_matrix(y_true = real_labels, y_pred = pred_labels)))
print(f'train confusion_matrix')
display(train_confusion_matrix.style.background_gradient(cmap='YlOrRd', axis = 1))
print(f'train fi : {train_f1:.4f}, train acc: {train_acc:.4f} \n')

train confusion_matrix


Unnamed: 0,0,1
0,5104,121
1,92,8193


train fi : 0.9834, train acc: 0.9842 



In [None]:
get_eval_img_show(train_df = train_df, idx_li = idx_li, pred_labels = pred_labels, config = config.genderModel)

## 5-2. Mask1

In [87]:
version = 'gender_mask1'

In [None]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[train_df['masks'] == 1].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

total_start_time = time.time()

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # trn, val 데이터 셋
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.genderModel, 
                                                                                        model_name = 'genderModel'
                                                                                       )
    
    # 모델 정의
    model = CreateModel(cfg = config.genderModel, pretrained = True).to(device)
    criterion = get_criterion(config = config.genderModel)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.genderModel.lr, amsgrad = True)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = 0.1, eps = 1e-09, patience = 3)
    
    train(model = model,
          optimizer = optimizer, 
          criterion = criterion, 
          train_loader = train_loader, 
          val_loader = val_loader, 
          scheduler = scheduler, 
          config = config.genderModel,
         version = version)
    
    fold_end_time = time.time()
    
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time} \n')
    
    # 메모리 정리
    gc.collect()
    torch.cuda.empty_cache()

total_end_time = time.time()
print(f'총 훈련 시간: {total_end_time - total_start_time}')

In [88]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[train_df['masks'] == 1].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

real_labels = []
pred_labels = []
idx_li = []

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # val
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.genderModel, 
                                                                                        model_name = 'genderModel')
    model = CreateModel(cfg = config.genderModel, pretrained = False).to(device)
    model.load_state_dict(torch.load(os.path.join(config.genderModel.model_dir, f'{version}_{fold_num}fold_{config.genderModel.model_name}.pt')))
    val_label_pred_li, val_ensemble_pred_li, real_pred_li = get_val_pred_li(model = model, data_loader = val_loader)
    
    real_labels += real_pred_li
    pred_labels += val_label_pred_li
    idx_li += val_df['idx'].tolist()
    
    fold_end_time = time.time()
    
    _acc = get_acc_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    _f1_score = get_f1_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time}, acc: {_acc}, f1_score: {_f1_score} \n')

1fold 훈련 시간: 2.87207293510437, acc: 0.966728280961183, f1_score: 0.9649146826540612 

2fold 훈련 시간: 2.737664222717285, acc: 0.9796296296296296, f1_score: 0.978552782180756 

3fold 훈련 시간: 2.6834635734558105, acc: 0.9833333333333333, f1_score: 0.982389709212791 

4fold 훈련 시간: 2.7728588581085205, acc: 0.9815157116451017, f1_score: 0.9805423680046037 

5fold 훈련 시간: 2.8625779151916504, acc: 0.975925925925926, f1_score: 0.9746532880318024 



In [89]:
train_f1 = get_f1_score(y_true = real_labels, y_pred = pred_labels)
train_acc = get_acc_score(y_true = real_labels, y_pred = pred_labels)
train_confusion_matrix = pd.DataFrame((confusion_matrix(y_true = real_labels, y_pred = pred_labels)))
print(f'train confusion_matrix')
display(train_confusion_matrix.style.background_gradient(cmap='YlOrRd', axis = 1))
print(f'train fi : {train_f1:.4f}, train acc: {train_acc:.4f} \n')

train confusion_matrix


Unnamed: 0,0,1
0,1015,30
1,31,1626


train fi : 0.9762, train acc: 0.9774 



In [None]:
get_eval_img_show(train_df = train_df, idx_li = idx_li, pred_labels = pred_labels, config = config.genderModel)

## 5-3. Mask2

In [90]:
version = 'gender_mask2'

In [None]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[train_df['masks'] == 2].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

total_start_time = time.time()

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # trn, val 데이터 셋
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.genderModel, 
                                                                                        model_name = 'genderModel')
    
    # 모델 정의
    model = CreateModel(cfg = config.genderModel, pretrained = True).to(device)
    criterion = get_criterion(config = config.genderModel)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.genderModel.lr, amsgrad = True)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = 0.1, eps = 1e-09, patience = 3)
    
    train(model = model,
          optimizer = optimizer, 
          criterion = criterion, 
          train_loader = train_loader, 
          val_loader = val_loader, 
          scheduler = scheduler, 
          config = config.genderModel,
         version = version)
    
    fold_end_time = time.time()
    
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time} \n')
    
    # 메모리 정리
    gc.collect()
    torch.cuda.empty_cache()

total_end_time = time.time()
print(f'총 훈련 시간: {total_end_time - total_start_time}')

In [91]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[train_df['masks'] == 2].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

real_labels = []
pred_labels = []
idx_li = []

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # val
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.genderModel, 
                                                                                        model_name = 'genderModel')
    model = CreateModel(cfg = config.genderModel, pretrained = False).to(device)
    model.load_state_dict(torch.load(os.path.join(config.genderModel.model_dir, f'{version}_{fold_num}fold_{config.genderModel.model_name}.pt')))
    val_label_pred_li, val_ensemble_pred_li, real_pred_li = get_val_pred_li(model = model, data_loader = val_loader)
    
    real_labels += real_pred_li
    pred_labels += val_label_pred_li
    idx_li += val_df['idx'].tolist()
    
    fold_end_time = time.time()
    
    _acc = get_acc_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    _f1_score = get_f1_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time}, acc: {_acc}, f1_score: {_f1_score} \n')

1fold 훈련 시간: 2.7864930629730225, acc: 0.977818853974122, f1_score: 0.9766508416055244 

2fold 훈련 시간: 2.8143398761749268, acc: 0.9796296296296296, f1_score: 0.9785149256160682 

3fold 훈련 시간: 2.722050905227661, acc: 0.9851851851851852, f1_score: 0.9844155844155844 

4fold 훈련 시간: 2.744270086288452, acc: 0.977818853974122, f1_score: 0.976730515570339 

5fold 훈련 시간: 2.848047971725464, acc: 0.9814814814814815, f1_score: 0.9805194805194806 



In [92]:
train_f1 = get_f1_score(y_true = real_labels, y_pred = pred_labels)
train_acc = get_acc_score(y_true = real_labels, y_pred = pred_labels)
train_confusion_matrix = pd.DataFrame((confusion_matrix(y_true = real_labels, y_pred = pred_labels)))
print(f'train confusion_matrix')
display(train_confusion_matrix.style.background_gradient(cmap='YlOrRd', axis = 1))
print(f'train fi : {train_f1:.4f}, train acc: {train_acc:.4f} \n')

train confusion_matrix


Unnamed: 0,0,1
0,1024,21
1,32,1625


train fi : 0.9794, train acc: 0.9804 



In [None]:
get_eval_img_show(train_df = train_df, idx_li = idx_li, pred_labels = pred_labels, config = config.genderModel)

# 6. Age Model

## 6-1. Mask0 - male

In [93]:
version = 'age_mask0_male'

In [None]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[(train_df['masks'] == 0) & (train_df['genders'] == 0)].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

total_start_time = time.time()

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # trn, val 데이터 셋
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.ageModel, 
                                                                                        model_name = 'ageModel')
    
    # 모델 정의
    model = CreateModel(cfg = config.ageModel, pretrained = True).to(device)
    criterion = get_criterion(config = config.ageModel)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.ageModel.lr, amsgrad = True)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = 0.1, eps = 1e-09, patience = 3)
    
    train(model = model,
          optimizer = optimizer, 
          criterion = criterion, 
          train_loader = train_loader, 
          val_loader = val_loader, 
          scheduler = scheduler, 
          config = config.ageModel,
         version = version)
    
    fold_end_time = time.time()
    
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time} \n')
    
    # 메모리 정리
    gc.collect()
    torch.cuda.empty_cache()

total_end_time = time.time()
print(f'총 훈련 시간: {total_end_time - total_start_time}')

In [94]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[(train_df['masks'] == 0) & (train_df['genders'] == 0)].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

real_labels = []
pred_labels = []
idx_li = []

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # val
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.ageModel, 
                                                                                        model_name = 'ageModel')
    model = CreateModel(cfg = config.ageModel, pretrained = False).to(device)
    model.load_state_dict(torch.load(os.path.join(config.ageModel.model_dir, f'{version}_{fold_num}fold_{config.ageModel.model_name}.pt')))
    val_label_pred_li, val_ensemble_pred_li, real_pred_li = get_val_pred_li(model = model, data_loader = val_loader)
    
    real_labels += real_pred_li
    pred_labels += val_label_pred_li
    idx_li += val_df['idx'].tolist()
    
    fold_end_time = time.time()
    
    _acc = get_acc_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    _f1_score = get_f1_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time}, acc: {_acc}, f1_score: {_f1_score} \n')

1fold 훈련 시간: 4.29369854927063, acc: 0.8923809523809524, f1_score: 0.7496901770844241 

2fold 훈련 시간: 4.093985557556152, acc: 0.9234449760765551, f1_score: 0.8589315311019816 

3fold 훈련 시간: 3.957416534423828, acc: 0.8846153846153846, f1_score: 0.7445978959283776 

4fold 훈련 시간: 4.2597925662994385, acc: 0.9138755980861244, f1_score: 0.7764910597216935 

5fold 훈련 시간: 4.192328691482544, acc: 0.9129186602870814, f1_score: 0.8117141506920249 



In [95]:
train_f1 = get_f1_score(y_true = real_labels, y_pred = pred_labels)
train_acc = get_acc_score(y_true = real_labels, y_pred = pred_labels)
train_confusion_matrix = pd.DataFrame((confusion_matrix(y_true = real_labels, y_pred = pred_labels)))
print(f'train confusion_matrix')
display(train_confusion_matrix.style.background_gradient(cmap='YlOrRd', axis = 1))
print(f'train fi : {train_f1:.4f}, train acc: {train_acc:.4f} \n')

train confusion_matrix


Unnamed: 0,0,1,2
0,2716,49,0
1,97,1831,117
2,0,231,184


train fi : 0.7896, train acc: 0.9055 



In [None]:
get_eval_img_show(train_df = train_df, idx_li = idx_li, pred_labels = pred_labels, config = config.ageModel)

## 6-2. Mask1 - male

In [96]:
version = 'age_mask1_male'

In [None]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[(train_df['masks'] == 1) & (train_df['genders'] == 0)].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

total_start_time = time.time()

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # trn, val 데이터 셋
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.ageModel, 
                                                                                        model_name = 'ageModel')
    
    # 모델 정의
    model = CreateModel(cfg = config.ageModel, pretrained = True).to(device)
    criterion = get_criterion(config = config.ageModel)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.ageModel.lr, amsgrad = True)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = 0.1, eps = 1e-09, patience = 3)
    
    train(model = model,
          optimizer = optimizer, 
          criterion = criterion, 
          train_loader = train_loader, 
          val_loader = val_loader, 
          scheduler = scheduler, 
          config = config.ageModel,
         version = version)
    
    fold_end_time = time.time()
    
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time} \n')
    
    # 메모리 정리
    gc.collect()
    torch.cuda.empty_cache()

total_end_time = time.time()
print(f'총 훈련 시간: {total_end_time - total_start_time}')

In [97]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[(train_df['masks'] == 1) & (train_df['genders'] == 0)].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

real_labels = []
pred_labels = []
idx_li = []

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # val
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.ageModel, 
                                                                                        model_name = 'ageModel')
    model = CreateModel(cfg = config.ageModel, pretrained = False).to(device)
    model.load_state_dict(torch.load(os.path.join(config.ageModel.model_dir, f'{version}_{fold_num}fold_{config.ageModel.model_name}.pt')))
    val_label_pred_li, val_ensemble_pred_li, real_pred_li = get_val_pred_li(model = model, data_loader = val_loader)
    
    real_labels += real_pred_li
    pred_labels += val_label_pred_li
    idx_li += val_df['idx'].tolist()
    
    fold_end_time = time.time()
    
    _acc = get_acc_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    _f1_score = get_f1_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time}, acc: {_acc}, f1_score: {_f1_score} \n')

1fold 훈련 시간: 1.7631845474243164, acc: 0.9095238095238095, f1_score: 0.7404122134556917 

2fold 훈련 시간: 1.8389174938201904, acc: 0.8947368421052632, f1_score: 0.7819961326788635 

3fold 훈련 시간: 1.7746381759643555, acc: 0.8894230769230769, f1_score: 0.7287547452152164 

4fold 훈련 시간: 2.017698287963867, acc: 0.9090909090909091, f1_score: 0.7573305307409354 

5fold 훈련 시간: 1.7591745853424072, acc: 0.9138755980861244, f1_score: 0.7897596656217346 



In [98]:
train_f1 = get_f1_score(y_true = real_labels, y_pred = pred_labels)
train_acc = get_acc_score(y_true = real_labels, y_pred = pred_labels)
train_confusion_matrix = pd.DataFrame((confusion_matrix(y_true = real_labels, y_pred = pred_labels)))
print(f'train confusion_matrix')
display(train_confusion_matrix.style.background_gradient(cmap='YlOrRd', axis = 1))
print(f'train fi : {train_f1:.4f}, train acc: {train_acc:.4f} \n')

train confusion_matrix


Unnamed: 0,0,1,2
0,541,12,0
1,20,377,12
2,0,57,26


train fi : 0.7610, train acc: 0.9033 



In [None]:
get_eval_img_show(train_df = train_df, idx_li = idx_li, pred_labels = pred_labels, config = config.ageModel)

## 6-3. Mask2 - male

In [99]:
version = 'age_mask2_male'

In [None]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[(train_df['masks'] == 2) & (train_df['genders'] == 0)].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

total_start_time = time.time()

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # trn, val 데이터 셋
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.ageModel, 
                                                                                        model_name = 'ageModel')
    
    # 모델 정의
    model = CreateModel(cfg = config.ageModel, pretrained = True).to(device)
    criterion = get_criterion(config = config.ageModel)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.ageModel.lr, amsgrad = True)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = 0.1, eps = 1e-09, patience = 3)
    
    train(model = model,
          optimizer = optimizer, 
          criterion = criterion, 
          train_loader = train_loader, 
          val_loader = val_loader, 
          scheduler = scheduler, 
          config = config.ageModel,
         version = version)
    
    fold_end_time = time.time()
    
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time} \n')
    
    # 메모리 정리
    gc.collect()
    torch.cuda.empty_cache()

total_end_time = time.time()
print(f'총 훈련 시간: {total_end_time - total_start_time}')

In [100]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[(train_df['masks'] == 2) & (train_df['genders'] == 0)].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

real_labels = []
pred_labels = []
idx_li = []

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # val
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.ageModel, 
                                                                                        model_name = 'ageModel')
    model = CreateModel(cfg = config.ageModel, pretrained = False).to(device)
    model.load_state_dict(torch.load(os.path.join(config.ageModel.model_dir, f'{version}_{fold_num}fold_{config.ageModel.model_name}.pt')))
    val_label_pred_li, val_ensemble_pred_li, real_pred_li = get_val_pred_li(model = model, data_loader = val_loader)
    
    real_labels += real_pred_li
    pred_labels += val_label_pred_li
    idx_li += val_df['idx'].tolist()
    
    fold_end_time = time.time()
    
    _acc = get_acc_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    _f1_score = get_f1_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time}, acc: {_acc}, f1_score: {_f1_score} \n')

1fold 훈련 시간: 1.8039555549621582, acc: 0.9047619047619048, f1_score: 0.7136909636909637 

2fold 훈련 시간: 1.7869985103607178, acc: 0.9186602870813397, f1_score: 0.8168999739141394 

3fold 훈련 시간: 1.7820312976837158, acc: 0.8990384615384616, f1_score: 0.7582693226505937 

4fold 훈련 시간: 1.765974521636963, acc: 0.9186602870813397, f1_score: 0.8106365709091291 

5fold 훈련 시간: 1.7857084274291992, acc: 0.9330143540669856, f1_score: 0.8380647130647131 



In [101]:
train_f1 = get_f1_score(y_true = real_labels, y_pred = pred_labels)
train_acc = get_acc_score(y_true = real_labels, y_pred = pred_labels)
train_confusion_matrix = pd.DataFrame((confusion_matrix(y_true = real_labels, y_pred = pred_labels)))
print(f'train confusion_matrix')
display(train_confusion_matrix.style.background_gradient(cmap='YlOrRd', axis = 1))
print(f'train fi : {train_f1:.4f}, train acc: {train_acc:.4f} \n')

train confusion_matrix


Unnamed: 0,0,1,2
0,546,7,0
1,20,379,10
2,0,52,31


train fi : 0.7903, train acc: 0.9148 



In [None]:
get_eval_img_show(train_df = train_df, idx_li = idx_li, pred_labels = pred_labels, config = config.ageModel)

## 6-4. Mask0 - female

In [102]:
version = 'age_mask0_female'

In [None]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[(train_df['masks'] == 0) & (train_df['genders'] == 1)].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

total_start_time = time.time()

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # trn, val 데이터 셋
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.ageModel, 
                                                                                        model_name = 'ageModel')
    
    # 모델 정의
    model = CreateModel(cfg = config.ageModel, pretrained = True).to(device)
    criterion = get_criterion(config = config.ageModel)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.ageModel.lr, amsgrad = True)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = 0.1, eps = 1e-09, patience = 3)
    
    train(model = model,
          optimizer = optimizer, 
          criterion = criterion, 
          train_loader = train_loader, 
          val_loader = val_loader, 
          scheduler = scheduler, 
          config = config.ageModel,
         version = version)
    
    fold_end_time = time.time()
    
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time} \n')
    
    # 메모리 정리
    gc.collect()
    torch.cuda.empty_cache()

total_end_time = time.time()
print(f'총 훈련 시간: {total_end_time - total_start_time}')

In [103]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[(train_df['masks'] == 0) & (train_df['genders'] == 1)].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

real_labels = []
pred_labels = []
idx_li = []

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # val
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.ageModel, 
                                                                                        model_name = 'ageModel')
    model = CreateModel(cfg = config.ageModel, pretrained = False).to(device)
    model.load_state_dict(torch.load(os.path.join(config.ageModel.model_dir, f'{version}_{fold_num}fold_{config.ageModel.model_name}.pt')))
    val_label_pred_li, val_ensemble_pred_li, real_pred_li = get_val_pred_li(model = model, data_loader = val_loader)
    
    real_labels += real_pred_li
    pred_labels += val_label_pred_li
    idx_li += val_df['idx'].tolist()
    
    fold_end_time = time.time()
    
    _acc = get_acc_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    _f1_score = get_f1_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time}, acc: {_acc}, f1_score: {_f1_score} \n')

1fold 훈련 시간: 6.05460000038147, acc: 0.9202416918429003, f1_score: 0.8284541606241326 

2fold 훈련 시간: 5.732055425643921, acc: 0.9148036253776435, f1_score: 0.8170035956649967 

3fold 훈련 시간: 6.138573169708252, acc: 0.9283132530120481, f1_score: 0.8185670628749535 

4fold 훈련 시간: 6.151195764541626, acc: 0.922289156626506, f1_score: 0.8539718186586693 

5fold 훈련 시간: 6.209972143173218, acc: 0.9208459214501511, f1_score: 0.8263663743639814 



In [104]:
train_f1 = get_f1_score(y_true = real_labels, y_pred = pred_labels)
train_acc = get_acc_score(y_true = real_labels, y_pred = pred_labels)
train_confusion_matrix = pd.DataFrame((confusion_matrix(y_true = real_labels, y_pred = pred_labels)))
print(f'train confusion_matrix')
display(train_confusion_matrix.style.background_gradient(cmap='YlOrRd', axis = 1))
print(f'train fi : {train_f1:.4f}, train acc: {train_acc:.4f} \n')

train confusion_matrix


Unnamed: 0,0,1,2
0,3511,139,0
1,144,3837,109
2,0,260,285


train fi : 0.8300, train acc: 0.9213 



In [77]:
get_eval_img_show(train_df = train_df, idx_li = idx_li, pred_labels = pred_labels, config = config.ageModel)

## 6-5. Mask1 - female

In [105]:
version = 'age_mask1_female'

In [None]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[(train_df['masks'] == 1) & (train_df['genders'] == 1)].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

total_start_time = time.time()

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # trn, val 데이터 셋
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.ageModel, 
                                                                                        model_name = 'ageModel')
    
    # 모델 정의
    model = CreateModel(cfg = config.ageModel, pretrained = True).to(device)
    criterion = get_criterion(config = config.ageModel)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.ageModel.lr, amsgrad = True)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = 0.1, eps = 1e-09, patience = 3)
    
    train(model = model,
          optimizer = optimizer, 
          criterion = criterion, 
          train_loader = train_loader, 
          val_loader = val_loader, 
          scheduler = scheduler, 
          config = config.ageModel,
         version = version)
    
    fold_end_time = time.time()
    
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time} \n')
    
    # 메모리 정리
    gc.collect()
    torch.cuda.empty_cache()

total_end_time = time.time()
print(f'총 훈련 시간: {total_end_time - total_start_time}')

In [106]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[(train_df['masks'] == 1) & (train_df['genders'] == 1)].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

real_labels = []
pred_labels = []
idx_li = []

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # val
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.ageModel, 
                                                                                        model_name = 'ageModel')
    model = CreateModel(cfg = config.ageModel, pretrained = False).to(device)
    model.load_state_dict(torch.load(os.path.join(config.ageModel.model_dir, f'{version}_{fold_num}fold_{config.ageModel.model_name}.pt')))
    val_label_pred_li, val_ensemble_pred_li, real_pred_li = get_val_pred_li(model = model, data_loader = val_loader)
    
    real_labels += real_pred_li
    pred_labels += val_label_pred_li
    idx_li += val_df['idx'].tolist()
    
    fold_end_time = time.time()
    
    _acc = get_acc_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    _f1_score = get_f1_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time}, acc: {_acc}, f1_score: {_f1_score} \n')

1fold 훈련 시간: 1.9244873523712158, acc: 0.9214501510574018, f1_score: 0.8282024396211248 

2fold 훈련 시간: 1.9555401802062988, acc: 0.9003021148036254, f1_score: 0.7628694918346867 

3fold 훈련 시간: 1.9021220207214355, acc: 0.9246987951807228, f1_score: 0.8256197424631401 

4fold 훈련 시간: 1.94089937210083, acc: 0.927710843373494, f1_score: 0.8670512162037586 

5fold 훈련 시간: 1.9683763980865479, acc: 0.9123867069486404, f1_score: 0.8002939190746429 



In [107]:
train_f1 = get_f1_score(y_true = real_labels, y_pred = pred_labels)
train_acc = get_acc_score(y_true = real_labels, y_pred = pred_labels)
train_confusion_matrix = pd.DataFrame((confusion_matrix(y_true = real_labels, y_pred = pred_labels)))
print(f'train confusion_matrix')
display(train_confusion_matrix.style.background_gradient(cmap='YlOrRd', axis = 1))
print(f'train fi : {train_f1:.4f}, train acc: {train_acc:.4f} \n')

train confusion_matrix


Unnamed: 0,0,1,2
0,698,32,0
1,31,771,16
2,0,58,51


train fi : 0.8183, train acc: 0.9173 



In [None]:
get_eval_img_show(train_df = train_df, idx_li = idx_li, pred_labels = pred_labels, config = config.ageModel)

## 6-6. Mask2 - female

In [108]:
version = 'age_mask2_female'

In [None]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[(train_df['masks'] == 2) & (train_df['genders'] == 1)].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

total_start_time = time.time()

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # trn, val 데이터 셋
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.ageModel, 
                                                                                        model_name = 'ageModel')
    
    # 모델 정의
    model = CreateModel(cfg = config.ageModel, pretrained = True).to(device)
    criterion = get_criterion(config = config.ageModel)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.ageModel.lr, amsgrad = True)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = 0.1, eps = 1e-09, patience = 3)
    
    train(model = model,
          optimizer = optimizer, 
          criterion = criterion, 
          train_loader = train_loader, 
          val_loader = val_loader, 
          scheduler = scheduler, 
          config = config.ageModel,
         version = version)
    
    fold_end_time = time.time()
    
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time} \n')
    
    # 메모리 정리
    gc.collect()
    torch.cuda.empty_cache()

total_end_time = time.time()
print(f'총 훈련 시간: {total_end_time - total_start_time}')

In [109]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)
train_df = train_df[(train_df['masks'] == 2) & (train_df['genders'] == 1)].reset_index(drop = True)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

real_labels = []
pred_labels = []
idx_li = []

for fold_num in range(1, config.oof + 1):
    fold_start_time = time.time()
    
    # val
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[list(set(val_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_df = train_df.set_index('id').loc[list(set(trn_id_df['id'].tolist()) & set(train_df['id'].tolist())), :].reset_index(drop = True)
    
    trn_dataset, train_loader, val_dataset, val_loader = get_trn_val_dataset_dataloader(trn_df = trn_df, 
                                                                                        val_df = val_df, 
                                                                                        cfg = config.ageModel, 
                                                                                        model_name = 'ageModel')
    model = CreateModel(cfg = config.ageModel, pretrained = False).to(device)
    model.load_state_dict(torch.load(os.path.join(config.ageModel.model_dir, f'{version}_{fold_num}fold_{config.ageModel.model_name}.pt')))
    val_label_pred_li, val_ensemble_pred_li, real_pred_li = get_val_pred_li(model = model, data_loader = val_loader)
    
    real_labels += real_pred_li
    pred_labels += val_label_pred_li
    idx_li += val_df['idx'].tolist()
    
    fold_end_time = time.time()
    
    _acc = get_acc_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    _f1_score = get_f1_score(y_true = real_pred_li, y_pred = val_label_pred_li)
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time}, acc: {_acc}, f1_score: {_f1_score} \n')

1fold 훈련 시간: 2.0049831867218018, acc: 0.9335347432024169, f1_score: 0.8605019493177388 

2fold 훈련 시간: 1.97538423538208, acc: 0.9274924471299094, f1_score: 0.8191430020283975 

3fold 훈련 시간: 1.9696776866912842, acc: 0.9337349397590361, f1_score: 0.8273538653231486 

4fold 훈련 시간: 1.925302267074585, acc: 0.9186746987951807, f1_score: 0.8328889967064687 

5fold 훈련 시간: 1.987750768661499, acc: 0.918429003021148, f1_score: 0.7872258465361913 



In [110]:
train_f1 = get_f1_score(y_true = real_labels, y_pred = pred_labels)
train_acc = get_acc_score(y_true = real_labels, y_pred = pred_labels)
train_confusion_matrix = pd.DataFrame((confusion_matrix(y_true = real_labels, y_pred = pred_labels)))
print(f'train confusion_matrix')
display(train_confusion_matrix.style.background_gradient(cmap='YlOrRd', axis = 1))
print(f'train fi : {train_f1:.4f}, train acc: {train_acc:.4f} \n')

train confusion_matrix


Unnamed: 0,0,1,2
0,709,21,0
1,31,776,11
2,0,59,50


train fi : 0.8267, train acc: 0.9264 



In [None]:
get_eval_img_show(train_df = train_df, idx_li = idx_li, pred_labels = pred_labels, config = config.ageModel)

# 7. Ensemble

In [111]:
def get_pred_li(version, fold_num, config, val_loader):
    model = CreateModel(cfg = config, pretrained = False).to(device)
    model.load_state_dict(torch.load(os.path.join(config.model_dir, f'{version}_{fold_num}fold_{config.model_name}.pt')))
    val_label_pred_li, val_ensemble_pred_li, real_pred_li = get_val_pred_li(model = model, data_loader = val_loader)
    
    return val_label_pred_li

def get_eval_data_loader(val_df, cfg, model_name):
    val_dataset = CustomDataset(df = val_df,
                        cfg = cfg,
                        transform = transform[model_name],
                        mode = True,)

    val_loader = DataLoader(val_dataset,
                       batch_size = cfg.batch_size,
                       num_workers = cfg.num_workers,
                       shuffle = False,)

    return val_loader

## 7-1. 평가

In [112]:
pre_df = preprocessing_df(df = df, swap_gender_li = swap_gender_li)
train_df = make_train_df(df = pre_df, swap_mask_li = swap_mask_li, cfg = config)

all_idx_li = pre_df.index.tolist()
val_idx_li = get_val_idx(df = pre_df, target_col = config.data_split_col)

total_start_time = time.time()
pred_val_df_li = []
val_df_li = []
for fold_num in range(1, config.oof + 1):    
    fold_start_time = time.time()
    
    # trn, val 데이터 셋
    val_idx = next(val_idx_li)
    trn_idx = list(set(all_idx_li) - set(val_idx.tolist()))
    
    val_id_df = pre_df.iloc[val_idx, :]
    trn_id_df = pre_df.iloc[trn_idx, :]
    
    val_df = train_df.set_index('id').loc[val_id_df['id'].tolist(), :].reset_index(drop = True)
    trn_df = train_df.set_index('id').loc[trn_id_df['id'].tolist(), :].reset_index(drop = True)
    
    val_df_li.append(val_df.copy())
    
    # mask 모델 정의
    version = 'mask'
    
    val_loader = get_eval_data_loader(val_df = val_df,
                                   cfg = config.maskModel, 
                                   model_name = 'maskModel')
    
    val_masks_pred_li = get_pred_li(version = version,
                                    fold_num = fold_num, 
                                    config = config.maskModel, 
                                    val_loader = val_loader)
    
    masks_pred_val_df = val_df.copy()
    masks_pred_val_df['masks'] = val_masks_pred_li
    
    # gender_mask0 모델 정의
    for version, val_df in [
        ['gender_mask0', masks_pred_val_df[masks_pred_val_df['masks'] == 0].reset_index(drop = True)],
        ['gender_mask1', masks_pred_val_df[masks_pred_val_df['masks'] == 1].reset_index(drop = True)],
        ['gender_mask2', masks_pred_val_df[masks_pred_val_df['masks'] == 2].reset_index(drop = True)],
        ]:
        
        val_loader = get_eval_data_loader(val_df = val_df,
                                   cfg = config.genderModel, 
                                   model_name = 'genderModel')
        
        val_genders_pred_li = get_pred_li(version = version,
                                    fold_num = fold_num, 
                                    config = config.genderModel, 
                                    val_loader = val_loader)
        
        genders_pred_val_df = val_df.copy()
        genders_pred_val_df['genders'] = val_genders_pred_li
        
        if version == 'gender_mask0':
            for version, val_df in [
                ['age_mask0_male', genders_pred_val_df[genders_pred_val_df['genders'] == 0].reset_index(drop = True)],
                ['age_mask0_female', genders_pred_val_df[genders_pred_val_df['genders'] == 1].reset_index(drop = True)],
                ]:
                
                if not val_df.index.tolist(): continue
                val_loader = get_eval_data_loader(val_df = val_df,
                                                   cfg = config.ageModel, 
                                                   model_name = 'ageModel')

                val_ages_pred_li = get_pred_li(version = version,
                                                fold_num = fold_num, 
                                                config = config.ageModel, 
                                                val_loader = val_loader)
                
                ages_pred_val_df = val_df.copy()
                ages_pred_val_df['ages'] = val_ages_pred_li
                pred_val_df_li.append(ages_pred_val_df.copy())
                
        elif version == 'gender_mask1':
            for version, val_df in [
                ['age_mask1_male', genders_pred_val_df[genders_pred_val_df['genders'] == 0].reset_index(drop = True)],
                ['age_mask1_female', genders_pred_val_df[genders_pred_val_df['genders'] == 1].reset_index(drop = True)],
                ]:
                
                if not val_df.index.tolist(): continue
                val_loader = get_eval_data_loader(val_df = val_df,
                                                   cfg = config.ageModel, 
                                                   model_name = 'ageModel')
                
                val_ages_pred_li = get_pred_li(version = version,
                                                fold_num = fold_num, 
                                                config = config.ageModel, 
                                                val_loader = val_loader)
                
                ages_pred_val_df = val_df.copy()
                ages_pred_val_df['ages'] = val_ages_pred_li
                pred_val_df_li.append(ages_pred_val_df.copy())
        
        elif version == 'gender_mask2':
            for version, val_df in [
                ['age_mask2_male', genders_pred_val_df[genders_pred_val_df['genders'] == 0].reset_index(drop = True)],
                ['age_mask2_female', genders_pred_val_df[genders_pred_val_df['genders'] == 1].reset_index(drop = True)],
                ]:
                
                if not val_df.index.tolist(): continue
                val_loader = get_eval_data_loader(val_df = val_df,
                                                   cfg = config.ageModel, 
                                                   model_name = 'ageModel')
                
                val_ages_pred_li = get_pred_li(version = version,
                                                fold_num = fold_num, 
                                                config = config.ageModel, 
                                                val_loader = val_loader)
                
                ages_pred_val_df = val_df.copy()
                ages_pred_val_df['ages'] = val_ages_pred_li
                pred_val_df_li.append(ages_pred_val_df.copy())
    
    fold_end_time = time.time()
    
    print(f'{fold_num}fold 훈련 시간: {fold_end_time - fold_start_time} \n')

total_end_time = time.time()
print(f'총 훈련 시간: {total_end_time - total_start_time}')

1fold 훈련 시간: 44.12687706947327 

2fold 훈련 시간: 45.03801417350769 

3fold 훈련 시간: 44.29997134208679 

4fold 훈련 시간: 44.54051399230957 

5fold 훈련 시간: 44.050636768341064 

총 훈련 시간: 222.0612986087799


In [113]:
val_df = pd.concat(val_df_li).sort_values('idx').reset_index(drop = True)
pred_val_df = pd.concat(pred_val_df_li).sort_values('idx').reset_index(drop = True)
pred_val_df['labels'] = pred_val_df.apply(lambda x : get_labels(masks = x['masks'], genders = x['genders'], ages = x['ages']),axis = 1)

In [114]:
real_labels = val_df['labels'].tolist()
pred_labels = pred_val_df['labels'].tolist()
idx_li = val_df['idx'].tolist()

In [115]:
train_f1 = get_f1_score(y_true = real_labels, y_pred = pred_labels)
train_acc = get_acc_score(y_true = real_labels, y_pred = pred_labels)
train_confusion_matrix = pd.DataFrame((confusion_matrix(y_true = real_labels, y_pred = pred_labels)))
print(f'train confusion_matrix')
display(train_confusion_matrix.style.background_gradient(cmap='YlOrRd', axis = 1))
print(f'train fi : {train_f1:.4f}, train acc: {train_acc:.4f} \n')

train confusion_matrix


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,2668,49,0,37,11,0,0,0,0,0,0,0,0,0,0,0,0,0
1,93,1764,115,5,60,3,0,5,0,0,0,0,0,0,0,0,0,0
2,0,228,181,0,4,1,0,1,0,0,0,0,0,0,0,0,0,0
3,37,1,0,3476,133,0,0,0,0,3,0,0,0,0,0,0,0,0
4,17,26,0,140,3796,109,0,0,0,0,2,0,0,0,0,0,0,0
5,0,9,2,0,257,277,0,0,0,0,0,0,0,0,0,0,0,0
6,4,0,0,0,0,0,522,12,0,12,3,0,0,0,0,0,0,0
7,0,2,0,0,0,0,20,360,12,0,14,1,0,0,0,0,0,0
8,0,0,1,0,0,0,0,57,25,0,0,0,0,0,0,0,0,0
9,0,0,0,3,0,0,10,1,0,686,30,0,0,0,0,0,0,0


train fi : 0.7824, train acc: 0.8986 



## 7-2. Mask

In [None]:
def get_submission_pred_li(model, data_loader):
    model.eval()
    label_pred_li = []
    ensemble_pred_li = []
    
    with torch.no_grad():
        for images in data_loader:
            images = images.to(device)
            output = model(images)
            
            label = output.argmax(dim=-1)
            label_pred_li.extend(label.cpu().numpy())
            
            ensemble_label = output.softmax(1)
            ensemble_pred_li.append(ensemble_label.cpu().numpy())
            
#     label_pred_li = [label_cats2labels[i] for i in label_pred_li]
    
    return label_pred_li, np.concatenate(ensemble_pred_li)

In [116]:

'''

각각 Task 나눠서 순서대로 예측하는 코드 만들어야 함

'''
version = 'mask'

submission_dataset = CustomDataset(df = submission,
                                   cfg = config,
                                   transform = transform['maskModel'],
                                   mode = False,
                                  )

submission_loader = DataLoader(submission_dataset,
                                batch_size = config.maskModel.batch_size,
                                num_workers = config.maskModel.num_workers,
                                shuffle = False,
                             )

mask_oof = np.zeros((submission.shape[0], config.maskModel.num_classes))

for fold_num in range(1, config.oof + 1):
    model = CreateModel(cfg = config.maskModel, pretrained = False).to(device)
    model.load_state_dict(torch.load(os.path.join(config.maskModel.model_dir, f'{version}_{fold_num}fold_{config.maskModel.model_name}.pt')))
    pred_li, ensemble_pred_li = get_submission_pred_li(model = model, data_loader = submission_loader)
    
    mask_oof += ensemble_pred_li / config.oof

In [123]:
submission['masks'] = mask_oof.argmax(1)

## 7-3. Gender

In [126]:
masks_version_li = [[0, 'gender_mask0'], [1, 'gender_mask1'], [2, 'gender_mask2']]
gender_mask0_oof = np.zeros((submission[submission['masks'] == 0].shape[0], config.genderModel.num_classes))
gender_mask1_oof = np.zeros((submission[submission['masks'] == 1].shape[0], config.genderModel.num_classes))
gender_mask2_oof = np.zeros((submission[submission['masks'] == 2].shape[0], config.genderModel.num_classes))

for fold_num in range(1, config.oof + 1):
    for masks_version in masks_version_li:
        masks = masks_version[0]
        version = masks_version[1]
        
        _submission = submission[submission['masks'] == masks].reset_index(drop = True)

        submission_dataset = CustomDataset(df = _submission,
                                           cfg = config,
                                           transform = transform['genderModel'],
                                           mode = False,
                                          )

        submission_loader = DataLoader(submission_dataset,
                                        batch_size = config.genderModel.batch_size,
                                        num_workers = config.genderModel.num_workers,
                                        shuffle = False,
                                     )
        model = CreateModel(cfg = config.genderModel, pretrained = False).to(device)
        model.load_state_dict(torch.load(os.path.join(config.genderModel.model_dir, f'{version}_{fold_num}fold_{config.genderModel.model_name}.pt')))
        pred_li, ensemble_pred_li = get_submission_pred_li(model = model, data_loader = submission_loader)
        
        if version == 'gender_mask0':
            gender_mask0_oof += ensemble_pred_li / config.oof
        elif version == 'gender_mask1':
            gender_mask1_oof += ensemble_pred_li / config.oof
        elif version == 'gender_mask2':
            gender_mask2_oof += ensemble_pred_li / config.oof

In [127]:
gender_mask0_submission = submission[submission['masks'] == 0].reset_index(drop = True)
gender_mask1_submission = submission[submission['masks'] == 1].reset_index(drop = True)
gender_mask2_submission = submission[submission['masks'] == 2].reset_index(drop = True)

gender_mask0_submission['genders'] = gender_mask0_oof.argmax(1)
gender_mask1_submission['genders'] = gender_mask1_oof.argmax(1)
gender_mask2_submission['genders'] = gender_mask2_oof.argmax(1)

gender_submission = pd.concat([gender_mask0_submission, gender_mask1_submission, gender_mask2_submission]).sort_values('idx').reset_index(drop = True)
submission['genders'] = gender_submission['genders'].values

## 7-4. Age

In [132]:
masks_genders_version_li = [[0, 0, 'age_mask0_male'], [1, 0, 'age_mask1_male'], [2, 0, 'age_mask2_male'], 
                           [0, 1, 'age_mask0_female'], [1, 1, 'age_mask1_female'], [2, 1, 'age_mask2_female']]

age_mask0_male_oof = np.zeros((submission[(submission['masks'] == 0) & (submission['genders'] == 0)].shape[0], config.ageModel.num_classes))
age_mask1_male_oof = np.zeros((submission[(submission['masks'] == 1) & (submission['genders'] == 0)].shape[0], config.ageModel.num_classes))
age_mask2_male_oof = np.zeros((submission[(submission['masks'] == 2) & (submission['genders'] == 0)].shape[0], config.ageModel.num_classes))
age_mask0_female_oof = np.zeros((submission[(submission['masks'] == 0) & (submission['genders'] == 1)].shape[0], config.ageModel.num_classes))
age_mask1_female_oof = np.zeros((submission[(submission['masks'] == 1) & (submission['genders'] == 1)].shape[0], config.ageModel.num_classes))
age_mask2_female_oof = np.zeros((submission[(submission['masks'] == 2) & (submission['genders'] == 1)].shape[0], config.ageModel.num_classes))

for fold_num in range(1, config.oof + 1):
    for masks_genders_version in masks_genders_version_li:
        masks = masks_genders_version[0]
        genders = masks_genders_version[1]
        version = masks_genders_version[2]
        
        _submission = submission[(submission['masks'] == masks) & (submission['genders'] == genders)].reset_index(drop = True)

        submission_dataset = CustomDataset(df = _submission,
                                           cfg = config,
                                           transform = transform['ageModel'],
                                           mode = False,
                                          )

        submission_loader = DataLoader(submission_dataset,
                                        batch_size = config.ageModel.batch_size,
                                        num_workers = config.ageModel.num_workers,
                                        shuffle = False,
                                     )
        model = CreateModel(cfg = config.ageModel, pretrained = False).to(device)
        model.load_state_dict(torch.load(os.path.join(config.ageModel.model_dir, f'{version}_{fold_num}fold_{config.ageModel.model_name}.pt')))
        pred_li, ensemble_pred_li = get_submission_pred_li(model = model, data_loader = submission_loader)
        
        if version == 'age_mask0_male':
            age_mask0_male_oof += ensemble_pred_li / config.oof
        elif version == 'age_mask1_male':
            age_mask1_male_oof += ensemble_pred_li / config.oof
        elif version == 'age_mask2_male':
            age_mask2_male_oof += ensemble_pred_li / config.oof
        elif version == 'age_mask0_female':
            age_mask0_female_oof += ensemble_pred_li / config.oof
        elif version == 'age_mask1_female':
            age_mask1_female_oof += ensemble_pred_li / config.oof
        elif version == 'age_mask2_female':
            age_mask2_female_oof += ensemble_pred_li / config.oof

In [133]:
age_mask0_male_submission = submission[(submission['masks'] == 0) & (submission['genders'] == 0)].reset_index(drop = True)
age_mask1_male_submission = submission[(submission['masks'] == 1) & (submission['genders'] == 0)].reset_index(drop = True)
age_mask2_male_submission = submission[(submission['masks'] == 2) & (submission['genders'] == 0)].reset_index(drop = True)
age_mask0_female_submission = submission[(submission['masks'] == 0) & (submission['genders'] == 1)].reset_index(drop = True)
age_mask1_female_submission = submission[(submission['masks'] == 1) & (submission['genders'] == 1)].reset_index(drop = True)
age_mask2_female_submission = submission[(submission['masks'] == 2) & (submission['genders'] == 1)].reset_index(drop = True)

age_mask0_male_submission['ages'] = age_mask0_male_oof.argmax(1)
age_mask1_male_submission['ages'] = age_mask1_male_oof.argmax(1)
age_mask2_male_submission['ages'] = age_mask2_male_oof.argmax(1)
age_mask0_female_submission['ages'] = age_mask0_female_oof.argmax(1)
age_mask1_female_submission['ages'] = age_mask1_female_oof.argmax(1)
age_mask2_female_submission['ages'] = age_mask2_female_oof.argmax(1)


age_submission = pd.concat([age_mask0_male_submission, age_mask1_male_submission, age_mask2_male_submission,
                              age_mask0_female_submission, age_mask1_female_submission, age_mask2_female_submission]).sort_values('idx').reset_index(drop = True)
submission['ages'] = age_submission['ages'].values

## 7-5. 제출

In [137]:
submission['labels'] = submission.apply(lambda x : get_labels(masks = x['masks'], genders = x['genders'], ages = x['ages']),axis = 1)
submission_df = submission[['ImageID', 'labels']].reset_index(drop = True)
submission_df = submission_df.rename(columns = {'labels' : 'ans'})
submission_df.to_csv(config.file_name, index=False)
submission_df.head()

Unnamed: 0,ImageID,ans
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,13
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,4
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,13
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,13
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,12
