In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import random
import wandb
import copy
import time
from datetime import datetime
from pytz import timezone
import seaborn as sns

import cv2

from torchvision.transforms import ToTensor,Compose,ColorJitter,RandomRotation,RandomHorizontalFlip,Resize,CenterCrop,Normalize,GaussianBlur,RandomCrop

import timm

import torch
import torch.nn as nn
import torch.optim as optm
import torch.nn.functional as F
import torch.distributions as dist
from torch.utils.data import Dataset, DataLoader

import torchvision

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from matplotlib import gridspec

from torchinfo import summary
from tqdm.auto import tqdm

%matplotlib inline
device = 'cuda' if torch.cuda.is_available() else 'cpu'
cwd=os.path.dirname(os.getcwd())
KST=timezone('Asia/Seoul')
date=str(datetime.now().astimezone(KST))[:10]

In [2]:
#프로젝트 이름 설정과 저장경로
project_name='ageefficientnet_with_label_smoothing_split_gender'

In [3]:
#Randomness 제어
random_seed=2021
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [4]:
transform={'trans':Compose([
    Resize((256*512//384,256)),
    CenterCrop(224),
    ToTensor(),
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
]),'regtrans':Compose([
    Resize((256*512//384,256)),
    GaussianBlur(3),
    ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    RandomCrop(224),
    ToTensor(),
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
]),'uptrans':Compose([
    Resize((256*512//384,256)),
    RandomHorizontalFlip(),
    RandomRotation(20),
    CenterCrop(224),
    ToTensor(),
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])}

def make_images(meta,img_dir,train):
    images=[]
    labels=[]
    ages=[]
    data=pd.DataFrame({'images':[],'labels':[]})
    if train:
        for idx in range(len(meta)):
            folder_path=os.path.join(img_dir, meta.path.iloc[idx])
            for img in os.listdir(folder_path):
                if '._' in img:
                    continue
                images.append(os.path.join(folder_path,img))
                labels.append((('incorrect' in img)+('normal' in img)*2)*6+(meta.gender.iloc[idx]=='female')*3+(30<=meta.age.iloc[idx])+(60<=meta.age.iloc[idx]))
                ages.append(meta.age.iloc[idx])
    else:
        for img_id in meta.ImageID:
            images.append(os.path.join(img_dir, img_id))
        labels=[0 for _ in range(len(images))]
    data['images']=images
    data['labels']=labels
    data['ages']=ages
    return data

class ImageDataset(Dataset):
    def __init__(self,transform=transform,mod='train'):
        self.mod=mod
        self.train=mod!='test'
        self.md=['info','train']
        self.path=[os.path.join(cwd,'input/data/eval'),os.path.join(cwd,'input/data/train')]
        self.meta=pd.read_csv(os.path.join(self.path[self.train], f'{self.md[self.train]}.csv'))
        self.img_dir=os.path.join(self.path[self.train],'images')
        self.classes=[('Wear','Incorrect','Not Wear'),('남','여'),('<30','>=30 and <60','>=60')]
        self.trans=transform
        self.criteria=1*(self.meta['age']>=30)+1*(self.meta['age']>=60)+3*(self.meta['gender']=='female')
        if self.train:
            self.train_data,self.val_data=train_test_split(self.meta,test_size=0.2,stratify=self.criteria)
            self.train_data=make_images(self.train_data,self.img_dir,self.train)
            self.val_data=make_images(self.val_data,self.img_dir,self.train)
        self.data=make_images(self.meta,self.img_dir,self.train)
    
    def __len__(self):
        return len(self.data)
        return len(self.train_data) if self.mod=='train' else len(self.val_data)
        
    
    def __getitem__(self, idx):
        if self.mod=='valid':
            images=self.val_data['images']
            labels=self.val_data['labels']
            ages=self.val_data['ages']
            age=ages.iloc[idx]
        elif self.mod=='train':
            images=self.train_data['images']
            labels=self.train_data['labels']
            ages=self.train_data['ages']
            age=ages.iloc[idx]
        else:
            images=self.data['images']
            labels=self.data['labels']
            ages=self.data['ages']
        age=ages.iloc[idx]
        label=labels.iloc[idx]
        image=Image.open(images.iloc[idx])
        
        if self.mod=='train':
            if True:
                image=self.trans['uptrans'](image)
                return image,label,age
            else:
                image=self.trans['regtrans'](image)
                return image,label,age
        else:
            image=self.trans['trans'](image)
            return image,label,age
        return image,label

def conf_mat(y_true,y_pred):
    cm=confusion_matrix(y_true,y_pred)
    norm_cm=cm/np.sum(cm, axis=1)[:,None]
    mindices=['wear,m,<30','wear,m,<>','wear,m,60','inc,m,<30','inc,m,<>','inc,m,60','nom,m,<30','nom,m,<>','nom,m,60']
    findices=['wear,f,<30','wear,f,<>','wear,f,60','inc,f,<30','inc,f,<>','inc,f,60','nom,f,<30','nom,f,<>','nom,f,60']
    indices=['wear,m,<30','wear,m,mask<>','wear,m,mask60','wear,f,<30','wear,f,<>','wear,f,60','inc,m,<30','inc,m,<>','inc,m,60','inc,f,<30','inc,f,<>','inc,f,60','nom,m,<30','nom,m,<>','nom,m,60','nom,f,<30','nom,f,<>','nom,f,60']
    cm=pd.DataFrame(norm_cm,index=indices,columns=indices)
    fig=plt.figure(figsize=(11,9))
    sns.heatmap(cm,annot=True)
    return fig


class SkewNormal(dist.normal.Normal):
    def __init__(self,mean=0,std=1,skew=0):
        super().__init__(loc=mean,scale=std)
        self.skew=skew
    def skewcdf(self,x):
        return 0.5 * (1 + torch.erf(self.skew*(x - self.loc) * self.scale.reciprocal() / np.sqrt(2)))

    def log_p(self,x):
        return self.log_prob(x)+torch.log(self.skewcdf(x))-torch.log(torch.tensor(2.))

def label_smoothing(label):
    label-=18
    label_0=SkewNormal(11.,7,-20).log_p
    label_11=SkewNormal(12.,7,20).log_p
    label_12=SkewNormal(41,7,-20).log_p
    label_2=dist.Normal(43.7,.65).log_prob
    return torch.exp(torch.vstack((label_0(label),torch.log(torch.exp(label_11(label))+torch.exp(label_12(label))),label_2(label))).T)

def label_smoothingNorm(label):
    label_0=dist.Normal(18.,5.5,-20).log_prob
    label_1=dist.Normal(45.,7.,20).log_prob
    label_2=dist.Normal(63,1.5).log_prob
    return torch.exp(torch.vstack((label_0(label)-label_0(torch.tensor(18)),label_1(label)-label_1(torch.tensor(45)),label_2(label)-label_2(torch.tensor(63)))).T)

In [None]:
label_smoothingNorm(torch.tensor([18,29,30,59,60]))

# 부분 모델 학습과 모델저장

In [6]:
#사용할 부분 모델 불러오기
net_age = timm.create_model('efficientnet_b3a', pretrained=True, num_classes=3)

In [7]:
#학습 설정하기
num_epochs=1000
batch_size=60
learning_rate=0.0001
models={'age':net_age}
weights={'mage': torch.tensor([1042/3/549,1042/3/410,1042/3/83]).to(device),'fage': torch.tensor([1658/3/732,1658/3/817,1658/3/109]).to(device),'mask':torch.tensor([7/15,7/3,7/3]).to(device),'gender':torch.tensor([2700/(2*1042),2700/(2*1658)]).to(device)}

In [None]:
#실행 파일 저장 경로

        
#부분 모델 사전 학습
for key in ['age']:
    
    now='Runned'+date+'_'.join(str(datetime.now().astimezone(KST))[10:16].split(':'))
    base_dir=os.path.join('.',f'saved/{project_name}/{now}_{key}')
    wandb_dir=base_dir
    checkpoint_dir=''
    model_dir=''
    result_dir=''
    dirs={'model':model_dir,'checkpoint':checkpoint_dir,'result':result_dir}
    for pth in ['model','checkpoint','result']:
        dirs[pth]=os.path.join(base_dir,pth)
        if not os.path.isdir(dirs[pth]):
            os.makedirs(dirs[pth])
    
    fweight=weights['fage']
    mweight=weights['mage']
    config={'epochs':num_epochs,'transform':'uptrans','batch_size':batch_size,'learning_rate':learning_rate,'gamma':0.75,'mweight':mweight,'fweight':fweight,'f1_avg':'weighted','norm1':[11,7,-20],'norm2':[(12,41.7),7,20],'norm3':[43.7,.65]}
    data=ImageDataset()
    wandb.init(project=project_name+'_'+key,config=config,dir=wandb_dir)
    print('training:',key)
    model=models['age']
    model.to(device)

    optim=optm.Adam(model.parameters())

    scheduler = optm.lr_scheduler.StepLR(moptim, step_size=1, gamma=config['gamma'])
    if key!='age':
        loss_fn_m=nn.CrossEntropyLoss
    else:
        loss_fn_m=nn.BCEWithLogitsLoss(pos_weight=mweight)
        loss_fn_f=nn.BCEWithLogitsLoss(pos_weight=fweight)
        
    best_f1=0.
    best_epoch=0
    stopper=0
    for e in range(num_epochs):   
        for mod in ['train']:
            print('mod:',mod)
            data.mod=mod

            if mod=='train':
                dataloader=DataLoader(data,batch_size=batch_size,shuffle=True,pin_memory=True,num_workers=2)
                #fmodel.train()
                mmodel.train()
            else:
                dataloader=DataLoader(data,shuffle=False)
                mmodel.eval()
                #fmodel.eval()

            with tqdm(dataloader) as pbar:
                running_loss=0.
                running_acc=0.
                prev_f1=0.
                tot_pred=torch.tensor([]).to(device)
                tot_label=torch.tensor([]).to(device)
                cm_pred=torch.tensor([]).to(device)
                cm_label=torch.tensor([]).to(device)
                
                for n,(image,label,age) in enumerate(pbar):
                    image=image.to(device)
                    label=label.to(device)
                    age=age.to(device)
                    gender=(label//3)%2

                    if False:
                        mlabel=torch.hstack([label[i] for i in range(len(gender)) if gender[i]==0])
                        flabel=torch.hstack([label[i] for i in range(len(gender)) if gender[i]==1])
                        mimage=torch.vstack([image[i:i+1] for i in range(len(gender)) if gender[i]==0])
                        fimage=torch.vstack([image[i:i+1] for i in range(len(gender)) if gender[i]==1])
                        mage=torch.hstack([age[i] for i in range(len(gender)) if gender[i]==0])
                        fage=torch.hstack([age[i] for i in range(len(gender)) if gender[i]==1])
                        logit_f=fmodel(fimage)
                        _,fpred=torch.max(logit_f,1)
                        logit_m=mmodel(mimage)
                        _,mpred=torch.max(logit_m,1)
                        mpred=mpred+6*(mlabel//6)
                        fpred=fpred+6*(flabel//6)+3
                        loss_f=loss_fn_f(logit_f,label_smoothing(fage))
                        loss_m=loss_fn_m(logit_m,label_smoothing(mage))
                        running_loss+=(loss_m.item()+loss_f.item())*image.size(0)
                        running_acc+=(torch.sum(mpred==mlabel)+torch.sum(fpred==flabel))/image.size(0)

                    elif True:
                        mlabel=label
                        mimage=image
                        mage=age
                        logit_m=mmodel(mimage)
                        _,mpred=torch.max(logit_m,1)
                        mpred=mpred+6*(mlabel//6)+3*gender
                        loss_m=loss_fn_m(logit_m,label_smoothing(mage))
                        running_loss+=(loss_m.item())*image.size(0)
                        running_acc+=(torch.sum(mpred==mlabel))/image.size(0)

                    else:
                        flabel=torch.hstack([label[i] for i in range(len(gender)) if gender[i]==1])
                        fimage=torch.vstack([image[i:i+1] for i in range(len(gender)) if gender[i]==1])
                        fage=torch.hstack([age[i] for i in range(len(gender)) if gender[i]==1])
                        logit_f=fmodel(fimage)
                        _,fpred=torch.max(logit_f,1)
                        fpred=fpred+6*(flabel//6)+3
                        loss_f=loss_fn_f(logit_f,label_smoothing(fage))
                        running_loss+=(loss_f.item())*image.size(0)
                        running_acc+=(torch.sum(fpred==flabel))/image.size(0)
                    


                        
                        
                    if True:
                        #foptim.zero_grad()
                        #loss_f.backward()
                        #foptim.step()
                        moptim.zero_grad()
                        loss_m.backward()
                        moptim.step()
                        tot_pred=torch.hstack((tot_pred,mpred))
                        tot_label=torch.hstack((tot_label,mlabel))
                    elif False:
                        tot_pred=torch.hstack((tot_pred,mpred))
                        tot_label=torch.hstack((tot_label,mlabel))
                    else:
                        tot_pred=torch.hstack((tot_pred,fpred))
                        tot_label=torch.hstack((tot_label,flabel))
                        
                    
                    if data.train:
                        pbar.set_postfix({'epoch' : e, 'loss' : running_loss/(n+1), 'accuracy' : float(running_acc)/(n+1),'F1 score':f1_score(tot_label.cpu(),tot_pred.cpu(),average='weighted')})
                        
                            
                if data.train:
                    epoch_loss=running_loss/len(dataloader.dataset)
                    epoch_acc=running_acc*image.size(0)/len(dataloader.dataset)
                    epoch_f1_w=f1_score(tot_label.cpu(),tot_pred.cpu(),average='weighted')
                    epoch_f1_m=f1_score(tot_label.cpu(),tot_pred.cpu(),average='macro')
                    print(f"현재 epoch-{e}의 평균 Loss : {epoch_loss:.3f}, 평균 Accuracy : {epoch_acc:.3f}, F1 score macro: {epoch_f1_m:.3f}, F1 score weight: {epoch_f1_m:.3f}")
                    if mod=='train':
                        #scheduler_f.step()
                        scheduler_m.step()
                        wandb.log({'training loss' : epoch_loss, 'training accuracy':epoch_acc,'training f1 score macro':epoch_f1_m,'training f1 score':epoch_f1_w})
                    
        if mod=='train':
            cm=conf_mat(tot_label.cpu(),tot_pred.cpu())
            cm.savefig(f'./saved/{project_name}/{now}_{key}/checkpoint/checkpoint_{key}_2{str(e).zfill(3)}.jpg')
            wandb.log({f'{str(e).zfill(3)}_confusion_matrix':wandb.Image(cm),'loss' : epoch_loss, 'accuracy':epoch_acc,'f1 score macro':epoch_f1_m,'f1 score':epoch_f1_w})
            plt.close()
            if (prev_f1<epoch_f1_w) and not (best_f1<epoch_f1_w):
                torch.save({'epoch':e,'loss_m':loss_m,'mmodel_state_dict':mmodel.state_dict(),'moptimizer_state_dict':moptim.state_dict()},f"{dirs['checkpoint']}/{'2'+str(e).zfill(3)}_checkpoint_{key}_{epoch_f1_w:.5f}_{epoch_acc:.3f}_{epoch_loss:.3f}.pt")
            if best_f1<epoch_f1_w:
                stopper=0
                best_epoch=e
                best_f1=epoch_f1_w
                torch.save({'epoch':e,'loss_m':loss_m,'mmodel_state_dict':mmodel.state_dict(),'moptimizer_state_dict':moptim.state_dict()},f"{dirs['checkpoint']}/{'2'+str(e).zfill(3)}_checkpoint_{key}_best_{epoch_f1_w:.5f}_{epoch_loss:.3f}_{epoch_acc:.3f}.pt")
            elif stopper==5:
                break
            else:
                stopper+=1
            prev_f1=epoch_f1_w
    print(best_epoch)
    wandb.finish()

VBox(children=(Label(value=' 0.06MB of 0.06MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
training loss,1.8011
training accuracy,0.43127
training f1 score macro,0.27198
training f1 score,0.38175
_runtime,102.0
_timestamp,1630376557.0
_step,1.0
loss,1.8011
accuracy,0.43127
f1 score macro,0.27198


0,1
training loss,▁
training accuracy,▁
training f1 score macro,▁
training f1 score,▁
_runtime,▁█
_timestamp,▁█
_step,▁█
loss,▁
accuracy,▁
f1 score macro,▁


[34m[1mwandb[0m: wandb version 0.12.1 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade

CondaEnvException: Unable to determine environment

Please re-run this command with one of the following options:

* Provide an environment name via --name or -n
* Re-run this command inside an activated conda environment.



training: age
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-0의 평균 Loss : 0.041, 평균 Accuracy : 0.786, F1 score macro: 0.706, F1 score weight: 0.706
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-1의 평균 Loss : 0.030, 평균 Accuracy : 0.938, F1 score macro: 0.899, F1 score weight: 0.899
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-2의 평균 Loss : 0.029, 평균 Accuracy : 0.970, F1 score macro: 0.958, F1 score weight: 0.958
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-3의 평균 Loss : 0.028, 평균 Accuracy : 0.986, F1 score macro: 0.982, F1 score weight: 0.982
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-4의 평균 Loss : 0.028, 평균 Accuracy : 0.990, F1 score macro: 0.986, F1 score weight: 0.986
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-5의 평균 Loss : 0.028, 평균 Accuracy : 0.993, F1 score macro: 0.993, F1 score weight: 0.993
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-6의 평균 Loss : 0.028, 평균 Accuracy : 0.995, F1 score macro: 0.995, F1 score weight: 0.995
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-7의 평균 Loss : 0.028, 평균 Accuracy : 0.996, F1 score macro: 0.996, F1 score weight: 0.996
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-8의 평균 Loss : 0.028, 평균 Accuracy : 0.997, F1 score macro: 0.997, F1 score weight: 0.997
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-9의 평균 Loss : 0.028, 평균 Accuracy : 0.998, F1 score macro: 0.997, F1 score weight: 0.997
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-10의 평균 Loss : 0.028, 평균 Accuracy : 0.998, F1 score macro: 0.998, F1 score weight: 0.998
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-11의 평균 Loss : 0.028, 평균 Accuracy : 0.998, F1 score macro: 0.998, F1 score weight: 0.998
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-12의 평균 Loss : 0.028, 평균 Accuracy : 0.999, F1 score macro: 0.998, F1 score weight: 0.998
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-13의 평균 Loss : 0.028, 평균 Accuracy : 0.998, F1 score macro: 0.999, F1 score weight: 0.999
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-14의 평균 Loss : 0.028, 평균 Accuracy : 0.998, F1 score macro: 0.998, F1 score weight: 0.998
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-15의 평균 Loss : 0.028, 평균 Accuracy : 0.999, F1 score macro: 0.999, F1 score weight: 0.999
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-16의 평균 Loss : 0.028, 평균 Accuracy : 0.999, F1 score macro: 0.999, F1 score weight: 0.999
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))


현재 epoch-17의 평균 Loss : 0.028, 평균 Accuracy : 0.999, F1 score macro: 0.998, F1 score weight: 0.998
mod: train


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=315.0), HTML(value='')))