In [None]:
# !conda install -c conda-forge opencv
# !pip install tqdm
# !pip install -U albumentations
# !pip install efficientnet_pytorch
# !pip install ipywidgets
# !jupyter nbextension enable --py widgetsnbextension

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader, RandomSampler
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
import albumentations as A
from efficientnet_pytorch import EfficientNet
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
import os, random, time, copy, tqdm, cv2, re
from tensorboardX import SummaryWriter
from torch.autograd import Variable
%matplotlib inline

In [None]:
!free -mh

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
train = pd.read_csv('./input/data/train/train.csv')

In [None]:
train

In [None]:
train.info()

In [None]:
train.describe(include='all')

In [None]:
### testing...

def age(x):
    if x<30:
        return 0
#     elif x<60:
    elif x<59:
        return 1
    else:
        return 2

# age: <30: 0, >=30, <60: 1, >=60: 2
### age: <30: 0, >=30, <58: 1, >=59: 2
train['age'] = train['age'].apply(lambda x: age(x))

# gender: female: 0, male: 1
train['gender'] = train['gender'].map({'female': 0, 'male': 1})

# 불필요한 race 삭제
train = train.drop(['race'], axis=1)

In [None]:
# fix gedner error in person
# gender: female: 0, male: 1

for i in range(len(train)):
    # male => female
    if train.iloc[i, 3] in ['004432_male_Asian_43','001498-1_male_Asian_23']:
        train.iloc[i, 1] = 0
        print(train.iloc[i])
        
    # female => male
    elif train.iloc[i, 3] in ['006359_female_Asian_18','006360_female_Asian_18',
                              '006361_female_Asian_18','006362_female_Asian_18',
                              '006363_female_Asian_18','006364_female_Asian_18']:
        train.iloc[i, 1] = 1
        print(train.iloc[i])

In [None]:
def prepare_mask_data(data_path):
    '''
    Create a new table with labeled path and mask.
    '''
    data = {'path': [], 'mask': []}

    for labels in os.listdir(data_path):
        label = None
        if labels[:2] == '._': continue # skip trash
        sub = os.path.join(data_path, labels)
        
        # mask: normal: 0, incorrect: 1, mask: 2
        for img in os.listdir(sub):
            if img[0] == '.': continue # skip trash
            if img.find('normal') != -1:
                label = 0
            elif img.find('incorrect') != -1:
                label = 1
            else:
                label = 2
            data['path'].append(os.path.join(sub, img)) # save path
            data['mask'].append(label) # save mask
    
    return pd.DataFrame(data)

In [None]:
def prepare_age_gender_data(data):
    '''
    Create a new table with labeled age and gender.
    '''
    tmp = {'age': [], 'gender': []}

    for index, i in enumerate(data.iloc[:, 0].values):
        classes = i.split('/')[-2]
#         print(classes)
        for idx in train.values:
#             print(idx[3])
            if classes == idx[3]:
                tmp['gender'].append(idx[1])
                tmp['age'].append(idx[2])
                
    return pd.DataFrame(tmp)

In [None]:
# order in [mask, age, gender]
data = prepare_mask_data('./input/data/train/images')
labels = prepare_age_gender_data(data)
data = pd.concat([data, labels['age'], labels['gender']], axis=1)

In [None]:
data.info()

In [None]:
data

In [None]:
# fix mask and gender error in img
# mask: normal: 0, incorrect: 1, mask: 2
# gender: female: 0, male: 1

url = './input/data/train/images/'

for i in range(len(data)):
    # normal => incorrect
    if data.iloc[i,0] in [url+'000020_female_Asian_50/normal.jpg',
                          url+'004418_male_Asian_20/normal.jpg',
                          url+'005227_male_Asian_22/normal.jpg']:
            data.iloc[i,1] = 0
            print(data.iloc[i, 0])
            print(data.iloc[i])
            
    # incorrect => normal
    elif data.iloc[i,0] in [url+'000020_female_Asian_50/incorrect_mask.jpg',
                            url+'004418_male_Asian_20/incorrect_mask.jpg',
                            url+'005227_male_Asian_22/incorrect_mask.jpg']:
            data.iloc[i,1] = 1
            print(data.iloc[i, 0])
            print(data.iloc[i])

In [None]:
# mask, age, gender Bar Plot

mask_label = data['mask'].value_counts().sort_index()
age_label = data['age'].value_counts().sort_index()
gender_label = data['gender'].value_counts().sort_index()

plt.figure(dpi=200)
fig = plt.figure(figsize=(20, 7))

ax_mask = fig.add_subplot(1, 3, 1)
ax_age = fig.add_subplot(1, 3, 2)
ax_gender = fig.add_subplot(1, 3, 3)


# Mask
ax_mask.bar(['Normal', 'Incorrect', 'Mask'], mask_label, zorder=10)
    
for index, val in mask_label.iteritems():
    ax_mask.text(x=index, y=val+100, s=val,
                 va='bottom', ha='center',
                 fontsize=12, fontweight='bold'
                )

# Age
ax_age.bar(['- 29', '30-58', '59 - '], age_label/7, zorder=10)
    
for index, val in age_label.iteritems():
    ax_age.text(x=index, y=val/7+20, s=int(val/7),
                 va='bottom', ha='center',
                 fontsize=12, fontweight='bold'
                )

# Gender
ax_gender.bar(['Female', 'Male'], gender_label/7, color=['tomato', 'royalblue'], zorder=10)

for index, value in zip(gender_label.index, gender_label):
    ax_gender.text(index, value/7+20, s=int(value/7),
                   va='bottom', ha='center',
                   fontsize=12, fontweight='bold'
                  )

    
idx = np.arange(len(mask_label.index))

ax_mask.margins(0.1, 0.1)
ax_mask.set_title('Mask', fontsize=15, fontweight='bold')
ax_mask.grid(zorder=0, axis='y')
    
ax_age.margins(0.1, 0.1)
ax_age.set_title('Age', fontsize=15, fontweight='bold')
ax_age.grid(zorder=0, axis='y')
    
ax_gender.margins(0.1, 0.1)
ax_gender.set_title('Gender', fontsize=15, fontweight='bold')
ax_gender.grid(zorder=0, axis='y')

for s in ['top', 'right', 'left']:
    ax_mask.spines[s].set_visible(False)
    ax_age.spines[s].set_visible(False)
    ax_gender.spines[s].set_visible(False)
    

plt.show()

In [None]:
#################### CROP_NEW_IMG ####################

for idx, i in enumerate(data.values):
    tmp = i[0]
    tmp = re.sub('images', 'new_imgs', tmp)
    data.iloc[idx, 0] = tmp

In [None]:
# rename to avoid confusion
label_df = data
label_df

In [None]:
#################### MASK_AGE_GENDER_DATASETS ####################

class mask_dataset(Dataset):
    def __init__(self, data, index=None, transforms=None):
        self.index = index
        self.data = data.iloc[self.index]
        self.transforms = transforms
        
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, index):
        label = int(self.data.iloc[index, 1]) #########
        
        img_path = self.data.iloc[index, 0]
        img = cv2.imread(img_path)
        # opencv: BGR
        # matplotlib: RGB
        image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if self.transforms:
            image = self.transforms(image=image)['image']
        # opencv cv2.imread(): [height, width, channels]
        # pytorch: [channels, height, width]
        # H, W, C => C, H, W
        image = image.transpose((2,0,1))
        sample = {'image': image, 'label': label}
        
        return sample
    

class age_dataset(Dataset):
    def __init__(self, data, index=None, transforms=None):
        self.index = index
        self.data = data.iloc[self.index]
        self.transforms = transforms
        
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, index):
        label = int(self.data.iloc[index, 2]) #########
        
        img_path = self.data.iloc[index, 0]
        img = cv2.imread(img_path)
        # opencv: BGR
        # matplotlib: RGB
        image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if self.transforms:
            image = self.transforms(image=image)['image']
        # opencv cv2.imread(): [height, width, channels]
        # pytorch: [channels, height, width]
        # H, W, C => C, H, W
        image = image.transpose((2,0,1))
        sample = {'image': image, 'label': label}
        
        return sample
    

class gender_dataset(Dataset):
    def __init__(self, data, index=None, transforms=None):
        self.index = index
        self.data = data.iloc[self.index]
        self.transforms = transforms
        
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, index):
        label = int(self.data.iloc[index, 3]) #########
        
        img_path = self.data.iloc[index, 0]
        img = cv2.imread(img_path)
        # opencv: BGR
        # matplotlib: RGB
        image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if self.transforms:
            image = self.transforms(image=image)['image']
        # opencv cv2.imread(): [height, width, channels]
        # pytorch: [channels, height, width]
        # H, W, C => C, H, W
        image = image.transpose((2,0,1))
        sample = {'image': image, 'label': label}
        
        return sample

In [None]:
#################### AUGMENTATION ####################
# https://brunokrinski.github.io/awesome-data-augmentation/
# https://hoya012.github.io/blog/albumentation_tutorial/
# https://albumentations-demo.herokuapp.com/

# Data Augmentation, Normalization, Resize
# 학습을 위한 데이터 증가 및 일반화, 사이즈 조절
# 검증을 위한 일반화, 사이즈 조절

# mask transforms
mask_transforms = A.Compose([

    A.OneOf([
        A.RandomBrightness(p=1.0),
        A.HueSaturationValue(p=1.0),
        A.RandomContrast(p=1.0),
    ], p=0.5),
    
    A.OneOf([
        A.Perspective(p=1.0),
        A.Rotate(p=0.5, limit=20, border_mode=1)
    ], p=0.5),
    
    A.Compose([
        A.Resize(312, 312),
        A.Normalize()
    ])
])

# age transforms
age_transforms = A.Compose([

    A.OneOf([
        A.RandomGridShuffle(grid=(2, 2), p=1.0),
        A.Perspective(p=1.0)
    ], p=0.5),
    
    A.GaussNoise(p=0.5),
    A.Rotate(limit=20, p=0.5, border_mode=1),

    A.Compose([
        A.Resize(312, 312),
        A.Normalize()
    ])
])

# gender transforms
gender_transforms = A.Compose([

    A.OneOf([
        A.Perspective(p=1.0)
    ], p=0.5),
    
    A.GaussNoise(p=0.5),
    A.Rotate(limit=20, p=0.5, border_mode=1),

    A.Compose([
        A.Resize(312, 312),
        A.Normalize()
    ])
])

# valid transforms
valid_transforms = A.Compose([
    A.Resize(312, 312),
    A.Normalize(),
])

In [None]:
#################### RANDOMNESS ####################
# https://hoya012.github.io/blog/reproducible_pytorch/

def make_seed(seed):
    '''
    Set the seed of the random number generator to a fixed value.
    Fixed values for reproducing results.
    '''
    torch.manual_seed(seed) # CPU 연산 무작위 고정
    torch.cuda.manual_seed(seed) # GPU 연산 무작위 고정
    torch.cuda.manual_seed_all(seed) # multi-GPU 연산 무작위 고정
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

seed = 777
make_seed(seed)

In [None]:
# for i in range(20):
#     print(f'{i:02d}: {data.iloc[i, 0]}')

In [None]:
#################### KFOLD ####################
### testing...

def get_index():
    '''
    KFold, Split indexes by people.
    '''
    label_idx = [i for i in range(2700)]
    kfold = KFold(n_splits=5, shuffle=True, random_state=777)
    train_num, valid_num = list(), list()
    
    for idx, (train_idx, valid_idx) in enumerate(kfold.split(label_idx)):
#         print(idx)
        train_idx = train_idx * 7
        valid_idx = valid_idx * 7
        train_tmp, valid_tmp = list(), list()
        
        for i in train_idx:
            for j in range(7):
                train_tmp.append(i + j)
                
        for i in valid_idx:
            for j in range(7):
                valid_tmp.append(i + j)
                
        train_num.append(train_tmp)
        valid_num.append(valid_tmp)
        
    return train_num, valid_num


def get_index_label():
    '''
    KFold, Split indexes by label.
    '''
    label_idx = [i for i in range(2700 * 7)]
    kfold = KFold(n_splits=5, shuffle=True, random_state=777)
    train_num, valid_num = list(), list()
    
    for idx, (train_idx, valid_idx) in enumerate(kfold.split(label_idx)):
#         print(idx)
        train_num.append(train_idx)
        valid_num.append(valid_idx)
        
    return train_num, valid_num

In [None]:
# a, b = get_index()
# c, d = get_index_label()
# print(f'a: {a}')
# print(f'b: {b}')
# print(f'c: {c}')
# print(f'd: {d}')

In [None]:
#################### WEIGHT ####################
# https://discuss.pytorch.org/t/weights-in-weighted-loss-nn-crossentropyloss/69514/2

### testing...

def normal_weights(data):
    normed_weights=torch.FloatTensor([1-(x/sum(data))for x in data]).to(device)
    return normed_weights

weighted_mask = normal_weights([1, 1, 5])
# weighted_age = normal_weights([1281,1227,192])
weighted_age = normal_weights([1281,1142,277])
weighted_gender = normal_weights([1654,1046])

print(weighted_mask)
print(weighted_age)
print(weighted_gender)

In [None]:
#################### FOCALLOSS ####################
# https://nuguziii.github.io/dev/dev-002/
# https://ropiens.tistory.com/83
# https://amaarora.github.io/2020/06/29/FocalLoss.html
# https://wordbe.tistory.com/entry/ML-Cross-entropyCategorical-Binary%EC%9D%98-%EC%9D%B4%ED%95%B4
# https://headbreakz.tistory.com/entry/%EC%9D%B8%EA%B3%B5%EC%A7%80%EB%8A%A5-Batch-Mini-Batch
# https://anweh.tistory.com/21
# https://wikidocs.net/60572
# https://velog.io/@skyhelper/CrossEntorpyLoss-NLLLoss-%EB%AC%B4%EC%97%87%EC%9D%B4-%EB%8B%A4%EB%A5%B8%EA%B0%80
# http://www.gisdeveloper.co.kr/?p=8668
# https://pytorch.org/docs/1.9.0/generated/torch.nn.functional.nll_loss.html

class FocalLoss(nn.Module):
    def __init__(self, weight=None,
                 gamma=2., reduction='mean'):
        nn.Module.__init__(self)
        self.weight = weight
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, input_tensor, target_tensor):
        log_prob = F.log_softmax(input_tensor, dim=-1)
        prob = torch.exp(log_prob)
        return F.nll_loss(
            ((1 - prob) ** self.gamma) * log_prob,
            target_tensor,
            weight=self.weight,
            reduction=self.reduction
        )

In [None]:
#################### TRAIN_MASK ####################

### EfficientNet ###
# https://github.com/lukemelas/EfficientNet-PyTorch
# https://hoya012.github.io/blog/EfficientNet-review/
# https://keep-steady.tistory.com/35

### Learning rate Scheduler ###
# https://sanghyu.tistory.com/113
# https://sungwookkang.com/1415
# https://zsunn.tistory.com/entry/AI-%EB%94%A5%EB%9F%AC%EB%8B%9D-%EA%B0%9C%EC%9A%94-%EC%88%9C%EC%A0%84%ED%8C%8C-%EC%97%AD%EC%A0%84%ED%8C%8C-%EA%B7%B8%EB%A6%AC%EA%B3%A0-%ED%99%9C%EC%84%B1%ED%99%94-%ED%95%A8%EC%88%98
# https://wikidocs.net/37406
# https://deep-deep-deep.tistory.com/56

### F1-Score ###
# https://datascienceschool.net/03%20machine%20learning/09.04%20%EB%B6%84%EB%A5%98%20%EC%84%B1%EB%8A%A5%ED%8F%89%EA%B0%80.html
# https://brunch.co.kr/@chris-song/54
# https://eunsukimme.github.io/ml/2019/10/21/Accuracy-Recall-Precision-F1-score/

### transfer learning & fine tuning ###
# https://hyjykelly.tistory.com/50
# https://velog.io/@yookyungkho/%EB%94%A5%EB%9F%AC%EB%8B%9D%EC%9D%98-%EA%B3%A0%EC%A7%88%EB%B3%91-Overfitting%EA%B3%BC%EC%A0%81%ED%95%A9-%ED%95%B4%EA%B2%B0-%ED%8C%81
# https://inhovation97.tistory.com/31
# https://jungnamgyu.tistory.com/34
# https://inhovation97.tistory.com/32


def train_mask(num_epochs=10, batch_size=16, learnig_rate=3e-4, ef_net='efficientnet-b4', nums=i):
    writer = SummaryWriter(log_dir=f'./results/b3_mask_64_1e_2-{nums}')
#     writer = SummaryWriter(log_dir=f'./results/pp_b3_mask_64_1e_7-{nums}')
    
    best_loss = 100
#     best_f1 = 0.0
    
    criterion = nn.CrossEntropyLoss(weight=weighted_mask).to(device)
    
    # 모델(구조)만 가져오기
    # model_mask = EfficientNet.from_name(ef_net, num_classes=3).to(device)
    # 이미 학습된 weight까지 가져오기
    # num_classes: 데이터 분류 사이즈
    model_mask = EfficientNet.from_pretrained(ef_net, num_classes=3).to(device)
    optimizer_mask = optim.Adam(model_mask.parameters(), lr=learnig_rate)
    
    # 성능 향상 없을 때, learning rate 감소
    # scheduler = optim.lr_scheduler.StepLR(optimizer_mask, step_size=3, gamma=0.1)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer_mask, mode='min', factor=0.2, patience=3
    )

    # KFold
#     kfold = KFold(n_splits=5, shuffle=True)
#     for fold, [train_ids, valid_ids] in enumerate(kfold.split(label_df)):
#         if fold==1:
#             return model_mask.state_dict()

#     train_ids, valid_ids = get_index()
    train_ids, valid_ids = get_index_label()

#     train_data = mask_dataset(label_df, index=[train_ids], transforms=mask_transforms)
#     valid_data = mask_dataset(label_df, index=valid_ids, transforms=valid_transforms)
    train_data = mask_dataset(label_df, index=[i for i in range(2700*7)], transforms=mask_transforms)
    valid_data = mask_dataset(label_df, index=valid_ids[4], transforms=valid_transforms)

    # RandomSampler: Samples elements randomly
    train_sampler = RandomSampler(train_data)
    valid_sampler = RandomSampler(valid_data)

    # num_workers: 데이터 로드 멀티 프로세싱
    train_dataloader = DataLoader(
        train_data, batch_size=batch_size,
        shuffle=False, sampler=train_sampler, num_workers=4
    )
    valid_dataloader = DataLoader(
        valid_data, batch_size=batch_size,
        shuffle=False, sampler=valid_sampler, num_workers=4
    )

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 20)

        # 각 epoch: 학습 단게, 검증 단계
        for phase in ['train', 'valid']:
            if phase == 'train':
                # 모델, 학습 모드 설정
                model_mask.train()

            elif phase == 'valid':
                # 모델, 평가 모드 설정
                model_mask.eval()

            running_loss, running_corrects = 0.0, 0.0
            num_cnt, f1, f1_scr = 0.0, 0.0, 0.0

            if phase == 'train':
                # 데이터 반복
                for sample in tqdm.tqdm(train_dataloader):
                    inputs = sample['image'].to(device)
                    label = sample['label'].to(device)
                    # 매개변수 경사도 0으로 설정
                    optimizer_mask.zero_grad()

                    # forward propagation (순전파)
                    # 학습 시에서만, 연산 기록 추적
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model_mask(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, label)
                        # 학습 단계에서만, 역전파 + 최적화
                        loss.backward() # back propagation
                        optimizer_mask.step() # optimize
                        # 통계
                        num_cnt += batch_size
                        running_loss += loss.item()
                        running_corrects += torch.sum(preds==label.data)
                        f1_scr += f1_score(label.detach().cpu(), preds.detach().cpu(), average='macro')

            elif phase == 'valid':
                # 데이터 반복
                for sample in tqdm.tqdm(train_dataloader):
                    inputs = sample['image'].to(device)
                    label = sample['label'].to(device)
                    # 매개변수 경사도 0으로 설정
                    optimizer_mask.zero_grad()

                    # forward propagation (순전파)
                    with torch.no_grad():
                        outputs = model_mask(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, label)
                        # 통계
                        num_cnt += batch_size
                        running_loss += loss.item()
                        running_corrects += torch.sum(preds==label.data)
                        f1_scr += f1_score(label.detach().cpu(), preds.detach().cpu(),average='macro')


#             if phase == 'train':
#                 scheduler.step(running_loss)

            epoch_loss = float(running_loss)
            epoch_acc = float(running_corrects/num_cnt*100)
            f1 = f1_scr/(num_cnt/batch_size)

            # record in tensorboard
            writer.add_scalar(f'Loss/{phase}', epoch_loss, epoch)
            writer.add_scalar(f'Accuracy/{phase}', epoch_acc, epoch)
            writer.add_scalar(f'F1-Score/{phase}', f1, epoch)

            print(
                f'{phase} loss_mask: {epoch_loss:.4f} Acc_mask: {epoch_acc:.4f} F1_score: {f1:.4f}'
            )

            # deep copy model
            if phase == 'valid' and epoch_loss < best_loss:
                best_loss = epoch_loss
#                 if phase == 'valid' and f1 >= best_f1:
#                     best_f1 = f1
                best_model_mask = copy.deepcopy(model_mask.state_dict())
                torch.save(best_model_mask, f'best_model_mask{nums}.pth')
                print('best_model_mask save!')
        print('-' * 20)
                
    return model_mask.state_dict()

# FIX num_eopchs=1 - 20
# FIX batch_size=32, 64
# FIX learnig_rate=3e-4, 1e-4
# FIX ef_net='efficientnet-b4', 'efficientnet-b3'
for i in range(5):
    print('=' * 20)
    print(f'=== {i+1}/5 counts ===')
    model_mask_current = train_mask(num_epochs=2, batch_size=64, learnig_rate=1e-4, ef_net='efficientnet-b3', nums=i)

# model_mask_current = train_mask(num_epochs=10, batch_size=64, learnig_rate=1e-4, ef_net='efficientnet-b3', nums=0)

In [None]:
#################### TRAIN_AGE ####################

def train_age(num_epochs=10, batch_size=16, learnig_rate=3e-4, ef_net='efficientnet-b4', nums=i):
    writer = SummaryWriter(log_dir=f'./results/b3_age_64_1e_4-{nums}')
#     writer = SummaryWriter(log_dir=f'./results/pp_b3_age_32_1e_10-{nums}')
    
    best_loss = 100
#     best_f1 = 0.0
    
    # FIX focal loss 사용해보기
#     criterion = nn.CrossEntropyLoss().to(device)
    criterion = FocalLoss(weight=weighted_age).to(device)
    
    # 모델(구조)만 가져오기
    # model_age = EfficientNet.from_name(ef_net, num_classes=3).to(device)
    # 이미 학습된 weight까지 가져오기
    # num_classes: 데이터 분류 사이즈
    model_age = EfficientNet.from_pretrained(ef_net, num_classes=3).to(device)
    optimizer_age = optim.Adam(model_age.parameters(), lr=learnig_rate)
    # 성능 향상 없을 때, learning rate 감소
    # scheduler = optim.lr_scheduler.StepLR(optimizer_age, step_size=2, gamma=0.1)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer_age, mode='min', factor=0.2, patience=3
    )

    # KFold
#     kfold = KFold(n_splits=5, shuffle=True)
#     for fold, [train_ids, valid_ids] in enumerate(kfold.split(label_df)):
#         if fold==1:
#             return model_age.state_dict()
        
#     train_ids, valid_ids = get_index()
    train_ids, valid_ids = get_index_label()

#         train_data = age_dataset(label_df, index=[train_ids], transforms=age_transforms)
#         valid_data = age_dataset(label_df, index=valid_ids, transforms=valid_transforms)
    train_data = age_dataset(label_df, index=[i for i in range(2700*7)], transforms=age_transforms)
    valid_data = age_dataset(label_df, index=valid_ids[4], transforms=valid_transforms)

    # RandomSampler: Samples elements randomly
    train_sampler = RandomSampler(train_data)
    valid_sampler = RandomSampler(valid_data)

    # num_workers: 데이터 로드 멀티 프로세싱
    train_dataloader = DataLoader(
        train_data, batch_size=batch_size,
        shuffle=False, sampler=train_sampler, num_workers=4
    )
    valid_dataloader = DataLoader(
        valid_data, batch_size=batch_size,
        shuffle=False,  sampler=valid_sampler, num_workers=4
    )

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 20)

        # 각 epoch: 학습 단게, 검증 단계
        for phase in ['train', 'valid']:
            if phase == 'train':
                # 모델, 학습 모드 설정
                model_age.train()

            elif phase == 'valid':
                # 모델, 평가 모드 설정
                model_age.eval()

            running_loss, running_corrects = 0.0, 0.0
            num_cnt, f1, f1_scr = 0.0, 0.0, 0.0

            if phase == 'train':
                # 데이터 반복
                for sample in tqdm.tqdm(train_dataloader):
                    inputs = sample['image'].to(device)
                    label = sample['label'].to(device)
                    # 매개변수 경사도 0으로 설정
                    optimizer_age.zero_grad()

                    # forward propagation (순전파)
                    # 학습 시에서만, 연산 기록 추적
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model_age(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, label)
                        # 학습 단계에서만, 역전파 + 최적화
                        loss.backward() # back propagation
                        optimizer_age.step() # optimize
                        # 통계
                        num_cnt += batch_size
                        running_loss += loss.item()
                        running_corrects += torch.sum(preds==label.data)
                        f1_scr += f1_score(label.detach().cpu(), preds.detach().cpu(),average='macro')

            elif phase == 'valid':
                # 데이터 반복
                for sample in tqdm.tqdm(train_dataloader):
                    inputs = sample['image'].to(device)
                    label = sample['label'].to(device)
                    # 매개변수 경사도 0으로 설정
                    optimizer_age.zero_grad()

                    # forward propagation (순전파)
                    with torch.no_grad():
                        outputs = model_age(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, label)
                        # 통계
                        num_cnt += batch_size
                        running_loss += loss.item()
                        running_corrects += torch.sum(preds==label.data)
                        f1_scr += f1_score(label.detach().cpu(), preds.detach().cpu(),average='macro')

#             if phase == 'train':
#                 scheduler.step(running_loss)

            epoch_loss = float(running_loss)
            epoch_acc = float(running_corrects/num_cnt*100)
            f1 = f1_scr/(num_cnt/batch_size)

            # record in tensorboard
            writer.add_scalar(f'Loss/{phase}', epoch_loss, epoch)
            writer.add_scalar(f'Accuracy/{phase}', epoch_acc, epoch)
            writer.add_scalar(f'F1-Score/{phase}', f1, epoch)

            print(
                f'{phase} loss_age: {epoch_loss:.4f} Acc_age: {epoch_acc:.4f} F1_score: {f1:.4f}'
            )

            # deep copy model
            if phase == 'valid' and epoch_loss < best_loss:
                best_loss = epoch_loss
#                 if phase == 'valid' and f1 >= best_f1:
#                     best_f1 = f1
                best_model_age = copy.deepcopy(model_age.state_dict())
                torch.save(best_model_age, f'best_model_age{nums}.pth')
                print('best_model_age save!')
        print('-' * 20)

    return model_age.state_dict()

# FIX num_eopchs=1 - 20
# FIX batch_size=32, 64
# FIX learnig_rate=3e-4, 1e-4
# FIX ef_net='efficientnet-b4', 'efficientnet-b3'
for i in range(5):
    print('=' * 20)
    print(f'=== {i+1}/5 counts ===')
    model_age_current = train_age(num_epochs=4, batch_size=64, learnig_rate=1e-4, ef_net='efficientnet-b3', nums=i)

# model_age_current = train_age(num_epochs=10, batch_size=64, learnig_rate=1e-4, ef_net='efficientnet-b3', nums=0)

In [None]:
# import gc
# gc.collect()
# torch.cuda.empty_cache() # GPU 캐시 데이터 삭제
!free -mh

In [None]:
#################### TRAIN_GENDER ####################

def train_gender(num_epochs=10, batch_size=16, learnig_rate=3e-4, ef_net='efficientnet-b4', nums=i):
    writer = SummaryWriter(log_dir=f'./results/b3_gender_64_1e_4-{nums}')
#     writer = SummaryWriter(log_dir=f'./results/pp_b3_gender_64_1e_7-{nums}')
        
    best_loss = 100
#     best_f1 = 0.0
    
    criterion = nn.CrossEntropyLoss(weight=weighted_gender).to(device)
    
    # 모델(구조)만 가져오기
    # model_gender = EfficientNet.from_name(ef_net, num_classes=2).to(device)
    # 이미 학습된 weight까지 가져오기
    # num_classes: 데이터 분류 사이즈
    model_gender = EfficientNet.from_pretrained(ef_net, num_classes=2).to(device)
    optimizer_gender = optim.Adam(model_gender.parameters(), lr=learnig_rate)
    
    # 성능 향상 없을 때, learning rate 감소
    # scheduler = optim.lr_scheduler.StepLR(optimizer_gender, step_size=3, gamma=0.1)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer_gender, mode='min', factor=0.2, patience=3
    )

    # KFold
#     kfold = KFold(n_splits=5, shuffle=True)
#     for fold, [train_ids, valid_ids] in enumerate(kfold.split(label_df)):
#         if fold==1:
#             return model_gender.state_dict()

#     train_ids, valid_ids = get_index()
    train_ids, valid_ids = get_index_label()

#         train_data = gender_dataset(label_df, index=[train_ids], transforms=gender_transforms)
#         valid_data = gender_dataset(label_df, index=valid_ids, transforms=valid_transforms)
    train_data = gender_dataset(label_df, index=[i for i in range(2700*7)], transforms=gender_transforms)
    valid_data = gender_dataset(label_df, index=valid_ids[4], transforms=valid_transforms)

    # RandomSampler: Samples elements randomly
    train_sampler = RandomSampler(train_data)
    valid_sampler = RandomSampler(valid_data)

    # num_workers: 데이터 로드 멀티 프로세싱
    train_dataloader = DataLoader(
        train_data, batch_size=batch_size,
        shuffle=False, sampler=train_sampler, num_workers=4
    )
    valid_dataloader = DataLoader(
        valid_data, batch_size=batch_size,
        shuffle=False, sampler=valid_sampler, num_workers=4
    )

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 20)

        # 각 epoch: 학습 단게, 검증 단계
        for phase in ['train', 'valid']:
            if phase == 'train':
                # 모델, 학습 모드 설정
                model_gender.train()

            elif phase == 'valid':
                # 모델, 평가 모드 설정
                model_gender.eval()

            running_loss, running_corrects = 0.0, 0.0
            num_cnt, f1, f1_scr = 0.0, 0.0, 0.0

            if phase == 'train':
                # 데이터 반복
                for sample in tqdm.tqdm(train_dataloader):
                    inputs = sample['image'].to(device)
                    label = sample['label'].to(device)
                    # 매개변수 경사도 0으로 설정
                    optimizer_gender.zero_grad()

                    # forward propagation (순전파)
                    # 학습 시에서만, 연산 기록 추적
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model_gender(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, label)
                        # 학습 단계에서만, 역전파 + 최적화
                        loss.backward() # back propagation
                        optimizer_gender.step() # optimize
                        # 통계
                        num_cnt += batch_size
                        running_loss += loss.item()
                        running_corrects += torch.sum(preds==label.data)
                        f1_scr += f1_score(label.detach().cpu(), preds.detach().cpu(), average='macro')

            elif phase == 'valid':
                # 데이터 반복
                for sample in tqdm.tqdm(train_dataloader):
                    inputs = sample['image'].to(device)
                    label = sample['label'].to(device)
                    # 매개변수 경사도 0으로 설정
                    optimizer_gender.zero_grad()

                    # forward propagation (순전파)
                    with torch.no_grad():
                        outputs = model_gender(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, label)
                        # 통계
                        num_cnt += batch_size
                        running_loss += loss.item()
                        running_corrects += torch.sum(preds==label.data)
                        f1_scr += f1_score(label.detach().cpu(), preds.detach().cpu(),average='macro')


#             if phase == 'train':
#                 scheduler.step(running_loss)

            epoch_loss = float(running_loss)
            epoch_acc = float(running_corrects/num_cnt*100)
            f1 = f1_scr/(num_cnt/batch_size)

            # record in tensorboard
            writer.add_scalar(f'Loss/{phase}', epoch_loss, epoch)
            writer.add_scalar(f'Accuracy/{phase}', epoch_acc, epoch)
            writer.add_scalar(f'F1-Score/{phase}', f1, epoch)

            print(
                f'{phase} loss_gender: {epoch_loss:.4f} Acc_gender: {epoch_acc:.4f} F1_score: {f1:.4f}'
            )

            # deep copy model
            if phase == 'valid' and epoch_loss < best_loss:
                best_loss = epoch_loss
#                 if phase == 'valid' and f1 >= best_f1:
#                     best_f1 = f1
                best_model_gender = copy.deepcopy(model_gender.state_dict())
                torch.save(best_model_gender, f'best_model_gender{nums}.pth')
                print('best_model_gender save!')
        print('-' * 20)
                
    return model_gender.state_dict()

# FIX num_eopchs=1 - 20
# FIX batch_size=32, 64
# FIX learnig_rate=3e-4, 1e-4
# FIX ef_net='efficientnet-b4', 'efficientnet-b3'
for i in range(5):
    print('=' * 20)
    print(f'=== {i+1}/5 counts ===')
    model_gender_current = train_gender(num_epochs=4, batch_size=64, learnig_rate=1e-4, ef_net='efficientnet-b3', nums=i)
    
# model_gender_current = train_gender(num_epochs=10, batch_size=64, learnig_rate=1e-4, ef_net='efficientnet-b3', nums=0)

In [None]:
# order in [mask, age, gender]
# mask: wear: 2, incorrect: 1, normal: 0
# age: - 29: 0, 30 - 59: 1, 60 - : 2
# gender: female: 0, male: 1

classes = {
    '201': 0, '211': 1, '221': 2,
    '200': 3, '210': 4, '220': 5,
    '101': 6, '111': 7, '121': 8,
    '100': 9, '110': 10, '120': 11,
    '001': 12, '011': 13, '021': 14,
    '000': 15, '010': 16, '020': 17
}

In [None]:
#################### TEST_DATASETS ####################

class dataset_test(Dataset):
    def __init__(self, data, transforms=None):
        self.data = data
        self.transforms=transforms
        self.path = './input/data/eval/new_imgs'
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 0]
        img = cv2.imread(os.path.join(self.path, img_path))
        image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transforms:
            image = self.transforms(image=image)['image']
        image = image.transpose((2,0,1))
        sample = {'image': image}
        
        return sample

In [None]:
#################### TEST_PREPARE ####################
### ???
### 성능 더 좋아지기는 한데, 이렇게 해도 되는 건지는 의문...

for i in range(5):
    globals()[f'model_mask{i}'] = EfficientNet.from_name('efficientnet-b3', num_classes=3).to(device)
    globals()[f'model_age{i}'] = EfficientNet.from_name('efficientnet-b3', num_classes=3).to(device)
    globals()[f'model_gender{i}'] = EfficientNet.from_name('efficientnet-b3', num_classes=2).to(device)
    
# model_mask0 = EfficientNet.from_name('efficientnet-b3', num_classes=3).to(device)
# model_mask1 = EfficientNet.from_name('efficientnet-b3', num_classes=3).to(device)
# model_mask2 = EfficientNet.from_name('efficientnet-b3', num_classes=3).to(device)
# model_mask3 = EfficientNet.from_name('efficientnet-b3', num_classes=3).to(device)
# model_mask4 = EfficientNet.from_name('efficientnet-b3', num_classes=3).to(device)

# model_age0 = EfficientNet.from_name('efficientnet-b3', num_classes=3).to(device)
# model_age1 = EfficientNet.from_name('efficientnet-b3', num_classes=3).to(device)
# model_age2 = EfficientNet.from_name('efficientnet-b3', num_classes=3).to(device)
# model_age3 = EfficientNet.from_name('efficientnet-b3', num_classes=3).to(device)
# model_age4 = EfficientNet.from_name('efficientnet-b3', num_classes=3).to(device)

# model_gender0 = EfficientNet.from_name('efficientnet-b3', num_classes=2).to(device)
# model_gender1 = EfficientNet.from_name('efficientnet-b3', num_classes=2).to(device)
# model_gender2 = EfficientNet.from_name('efficientnet-b3', num_classes=2).to(device)
# model_gender3 = EfficientNet.from_name('efficientnet-b3', num_classes=2).to(device)
# model_gender4 = EfficientNet.from_name('efficientnet-b3', num_classes=2).to(device)

for i in range(5):
    eval(f'model_mask{i}').load_state_dict(torch.load(f'./best_model_mask{i}.pth'))
    eval(f'model_age{i}').load_state_dict(torch.load(f'./best_model_age{i}.pth'))
    eval(f'model_gender{i}').load_state_dict(torch.load(f'./best_model_gender{i}.pth'))

# model_mask0.eval()
# model_age0.eval()
# model_gender0.eval()

models_mask = list()
# models_mask.extend([model_mask0, model_mask1, model_mask2, model_mask3, model_mask4])
models_mask.extend([eval(f'model_mask{i}') for i in range(5)])

models_age = list()
# models_age.extend([model_age0, model_age1, model_age2, model_age3, model_age4])
models_age.extend([eval(f'model_age{i}') for i in range(5)])

models_gender = list()
# models_gender.extend([model_gender0, model_gender1, model_gender2, model_gender3, model_gender4])
models_gender.extend([eval(f'model_gender{i}') for i in range(5)])

In [None]:
#################### TEST ####################

test_data = pd.read_csv('./input/data/eval/info.csv')
submission = pd.read_csv('./input/data/eval/info.csv')

test_dataset = dataset_test(test_data, transforms=valid_transforms)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)
total_result = list()

for sample in tqdm.tqdm(test_dataloader):
    with torch.no_grad():
        inputs = sample['image'].to(device)
        
#         output_mask = model_mask0(inputs)
#         output_age = model_age0(inputs)
#         output_gender = model_gender0(inputs)

        output_mask = 0
        output_age = 0
        output_gender = 0
        
        for model in models_mask:
            model.eval()
            output_mask += model(inputs)
            
        for model in models_age:
            model.eval()
            output_age += model(inputs)
            
        for model in models_gender:
            model.eval()
            output_gender += model(inputs)
        
        _, preds_mask = torch.max(output_mask, 1)
        _, preds_age = torch.max(output_age, 1)
        _, preds_gender = torch.max(output_gender, 1)
        
        for mask, age, gender in zip(preds_mask, preds_age, preds_gender):
            ans = list()
            ans.append(mask.detach().cpu().numpy().astype('|S1').tobytes().decode('utf-8'))
            ans.append(age.detach().cpu().numpy().astype('|S1').tobytes().decode('utf-8'))
            ans.append(gender.detach().cpu().numpy().astype('|S1').tobytes().decode('utf-8'))
            dt = ''.join(ans)
            
            total_result.append(classes[dt])
            
submission['ans'] = total_result
submission.to_csv('./submission10.csv', index=False)
print('Done!')

In [None]:
submission

In [None]:
#################### VALID_DATASETS ####################

class dataset_valid(Dataset):
    def __init__(self, data, transforms=None):
        self.data = data
        self.transforms = transforms
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 0]
        label_mask = int(self.data.iloc[idx, 1])
        label_age = int(self.data.iloc[idx, 2])
        label_gender = int(self.data.iloc[idx, 3])
        
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        
        if self.transforms:
            image = self.transforms(image=img)['image']
        image = image.transpose((2, 0, 1))
        
        sample = {'image':image, 'label_mask':label_mask,'label_age':label_age,'label_gender':label_gender}
        
        return sample

In [None]:
#################### F1_Score ####################

valid_test_data = dataset_valid(data.iloc[int(len(data)*0.8):, :], transforms=valid_transforms)
valid_dataloader = DataLoader(valid_test_data, batch_size=16, shuffle=False, num_workers=4)

f1 = 0
f1_sc = 0

with torch.no_grad():
    for sample in tqdm.tqdm(valid_dataloader):
        inputs = sample['image'].to(device)
        
        output_mask = model_mask0(inputs)
        output_age = model_age0(inputs)
        output_gender = model_gender0(inputs)
        
#         output_mask = 0
#         output_age = 0
#         output_gender = 0
        
#         for model in models_mask:
#             output_mask += model(inputs)
            
#         for model in models_age:
#             output_age += model(inputs)
            
#         for model in models_gender:
#             output_gender += model(inputs)
    
        _, preds_mask = torch.max(output_mask, 1)
        _, preds_age = torch.max(output_age, 1)
        _, preds_gender = torch.max(output_gender, 1)
        
        for mask, age, gender, label_mask, label_age, label_gender in zip(
            preds_mask, preds_age, preds_gender,
            sample['label_mask'], sample['label_age'], sample['label_gender']
        ):
            ans = list()
            label = list()
            
            ans.append(mask.detach().cpu().numpy().astype('|S1').tobytes().decode('utf-8'))
            ans.append(age.detach().cpu().numpy().astype('|S1').tobytes().decode('utf-8'))
            ans.append(gender.detach().cpu().numpy().astype('|S1').tobytes().decode('utf-8'))
            dt = ''.join(ans)   
            
            label.append(label_mask.detach().cpu().numpy().astype('|S1').tobytes().decode('utf-8'))
            label.append(label_age.detach().cpu().numpy().astype('|S1').tobytes().decode('utf-8'))
            label.append(label_gender.detach().cpu().numpy().astype('|S1').tobytes().decode('utf-8'))
            lb = ''.join(label)
            
            ans = np.zeros((18, ))
            pred = np.zeros((18, ))
            
            ans[classes[lb]] = 1
            pred[classes[dt]] = 1
            
            f1_sc += f1_score(ans, pred, average='macro')

print(f1_sc/(len(data) * 0.2))