## Multi Label(Head, Branch) Classifier
- 하나의 Convolution 모델에서 3개의 FC Layer 브랜치를 만들어보자

In [1]:
import os
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt 

import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)


cuda


In [2]:
# Custom Model Template
class Res50(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        
        """
        1. 위와 같이 생성자의 parameter 에 num_claases 를 포함해주세요.
        2. 나만의 모델 아키텍쳐를 디자인 해봅니다.
        3. 모델의 output_dimension 은 num_classes 로 설정해주세요.
        """
        self.pretrain_model = torchvision.models.resnext50_32x4d(pretrained=True)
        self.pretrain_model.fc = torch.nn.Linear(in_features=2048, out_features=num_classes, bias=True) # resnet18.fc의 in_features의 크기는?
        # torch.nn.init.xavier_uniform_(pretrain_model.fc.weight)
        # stdv = 1.0/np.sqrt(512) # fully connected layer의 bias를 resnet18.fc in_feature의 크기의 1/root(n) 크기의 uniform 분산 값 중 하나로 설정해주세요! - Why? https://stackoverflow.com/questions/49433936/how-to-initialize-weights-in-pytorch
        # pretrain_model.fc.bias.data.uniform_(-stdv, stdv)

    def forward(self, x):
        """
        1. 위에서 정의한 모델 아키텍쳐를 forward propagation 을 진행해주세요
        2. 결과로 나온 output 을 return 해주세요
        """
        x = self.pretrain_model.forward(x)
        return x


# Custom Model Template
class MultiRes50(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        
        """
        1. 위와 같이 생성자의 parameter 에 num_claases 를 포함해주세요.
        2. 나만의 모델 아키텍쳐를 디자인 해봅니다.
        3. 모델의 output_dimension 은 num_classes 로 설정해주세요.
        """
        self.res50 = Res50(num_classes)
        self.res50.load_state_dict(torch.load("/opt/ml/workspace/code/model/res50CusDS3/best.pth"))
        self.res50.to("cpu")
        
        self.res50 = nn.Sequential(*list(self.res50.pretrain_model.children())[:-1])

        self.mask = nn.Linear(2048, 3, bias=True)
        self.mask.load_state_dict(torch.load("/opt/ml/workspace/code/resnext50_32x4dfc3ways_maskv2.pt"))

        self.age = nn.Linear(2060, 3, bias=True)
        self.gender = nn.Linear(2060, 3, bias=True)

        
    def forward(self, x):
        """
        1. 위에서 정의한 모델 아키텍쳐를 forward propagation 을 진행해주세요
        2. 결과로 나온 output 을 return 해주세요
        """
        x = self.res50.forward(x)
        x = torch.flatten(x, start_dim=1)
        m = self.mask(x)

        pred_mask = torch.argmax(m, dim=-1).cpu().numpy()[0]

        base = torch.ones(x.shape[0],12)
        if pred_mask == 0: 
            base = base * 0
            x = torch.cat([x, base.to(device)], dim=1)
        elif pred_mask == 1:
            base = base * 10
            x = torch.cat([x, base.to(device)], dim=1)
        elif pred_mask == 2:
            base = base * -10
            x = torch.cat([x, base.to(device)], dim=1)
        a = self.age(x)
        g = self.gender(x)
        
        return {"mask":m, "age":a, "gender":g}

In [3]:
def dfs_freeze(model):
    for name, child in model.named_children():
        for param in child.parameters():
            #print(param)
            param.requires_grad = False
            #print(param)
        dfs_freeze(child)

In [4]:
mask_model = MultiRes50(18)

In [5]:
dfs_freeze(mask_model.res50)
dfs_freeze(mask_model.mask)

In [6]:
# from torchsummary import summary
# summary(mask_model.to(device),input_size=(3,320,320))

- 기존 train data 1가지
- mask, gender, age 각각 labeling을 한 데이터프레임 3가지
- 총 4가지의 데이터프레임을 Dataset모듈에 넣을 것임

In [7]:
# # 기본 train 데이터 셋
df = pd.read_csv('train_label.csv')
# df.head(5)

In [8]:
def split_data(df):
    # # wear, incorrect, normal 3가지 클래스로 변경
    # # 0~5, 6~11, 12~17끼리 묶는다
    df_mask = df.copy()

    def mask_label(x):
        if x in [0,1,2,3,4,5]:
            return 0    # wear
        elif x in [6,7,8,9,10,11]:
            return 1    # incorrect
        else:
            return 2    # not wear

    df_mask['label'] = df_mask['label'].apply(mask_label)

    # # 남성, 여성 2가지 클래스로 변경
    # # [0,1,2,6,7,8,12,13,14], [3,4,5,9,10,11,15,16,17]
    df_gender = df.copy()

    def gender_label(x):
        if x in [0,1,2,6,7,8,12,13,14]:
            return 0    # male
        else:
            return 1    # female

    df_gender['label'] = df_gender['label'].apply(gender_label)

    # # 청년, 중년, 장년 3가지 클래스로 변경
    # # [0,3,6,9,12,15], [1,4,7,10,13,16], [2,5,8,11,14,17]
    df_age = df.copy()

    def age_label(x):
        if x in [0,3,6,9,12,15]:
            return 0    # young
        elif x in [1,4,7,10,13,16]:
            return 1    # middle
        else:
            return 2    # old

    df_age['label'] = df_age['label'].apply(age_label)

    return df_mask['label'],df_age['label'],df_gender['label']


### Dataset  정의
- mask, gender, age 별로 label 나눔

In [9]:
class MultiBranchDataset(Dataset):
    def __init__(self, df, transforms):
        self.df = df
        self.image_data = self.df['path']   # x data, 이미지
        self.image_label = self.df['label'] # y data, 레이블


        self.mask_label, self.age_label, self.gender_label = split_data(df)
        self.transform = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):

        img_path = self.df['path'].iloc[idx]
        img = Image.open(img_path)

        if self.transform:
            img = self.transform(img)
        
        dict_label = {
            'class' : self.image_label.iloc[idx],
            'mask' : self.mask_label.iloc[idx],
            'gender' : self.gender_label.iloc[idx],
            'age' : self.age_label.iloc[idx]
        }

        return img, dict_label

class CusDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df
        self.image_data = self.df['path']   # x data, 이미지
        self.image_label = self.df['label'] # y data, 레이블

        self.transform = transform

    def __getitem__(self, idx):
        image = Image.open(self.image_data.iloc[idx])
        label = self.image_label.iloc[idx]

        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label)

    def __len__(self):
        return len(self.image_label)


#### Transform & Dataset
- 모든 데이터를 한방에 학습할 때와
- train과 valid를 split할 때의 코드가 다름 (train, valid를 쪼갠거와 동일한 mask, gender, age 레이블링이 필요함)

In [10]:
# Transform Compose
data_transform = torchvision.transforms.Compose([
    # transforms.CenterCrop(320),
    torchvision.transforms.CenterCrop(320),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.2,0.2,0.2)),
])


from sklearn.model_selection import train_test_split
train, valid = train_test_split(df, test_size = 0.25, shuffle=True, stratify=df['label'], random_state=1234)
print(train.shape, valid.shape)

# train_dataset = CusDataset(train, data_transform)
# test_dataset = CusDataset(valid, data_transform)

# train data 한방에 학습시키는 경우
train_dataset = MultiBranchDataset(train,data_transform)
test_dataset = MultiBranchDataset(valid,data_transform)

(14175, 3) (4725, 3)


### DataLoader
- 여기서 3개로 나눌 필요가 없음
- 모델 train에서 loss를 따로 나눌 것임

In [11]:
train_dataloader = DataLoader(train_dataset, batch_size=50, 
        num_workers=2,
        shuffle=True,
        pin_memory=torch.cuda.is_available(),
        drop_last=True,)

test_dataloader = DataLoader(test_dataset, batch_size=50, 
        num_workers=2,
        shuffle=True,
        pin_memory=torch.cuda.is_available(),
        drop_last=True,)

In [12]:
dataloaders = {
    "train" : train_dataloader,
    "test" : test_dataloader
}

데이터로더의 label 부분을 보면 mask, gender, age별로 배치사이즈에 맞게 레이블 정보가 담아짐

In [13]:
next(iter(train_dataloader))

[tensor([[[[ 1.6569,  1.6569,  1.6569,  ...,  1.5588,  1.5588,  1.5588],
           [ 1.6569,  1.6569,  1.6569,  ...,  1.5588,  1.5588,  1.5588],
           [ 1.6569,  1.6569,  1.6569,  ...,  1.5588,  1.5588,  1.5588],
           ...,
           [ 1.1863,  1.1863,  1.2059,  ..., -2.3627, -2.4412, -2.4608],
           [ 1.0882,  1.0882,  1.1275,  ..., -2.3824, -2.4608, -2.4020],
           [ 1.0490,  1.0490,  1.0686,  ..., -2.4412, -2.4804, -2.3824]],
 
          [[ 1.6569,  1.6569,  1.6569,  ...,  1.5588,  1.5588,  1.5588],
           [ 1.6569,  1.6569,  1.6569,  ...,  1.5588,  1.5588,  1.5588],
           [ 1.6569,  1.6569,  1.6569,  ...,  1.5588,  1.5588,  1.5588],
           ...,
           [ 1.4216,  1.4216,  1.4216,  ..., -0.9510, -1.0490, -1.0686],
           [ 1.4608,  1.4608,  1.4608,  ..., -0.9902, -1.0686, -1.0098],
           [ 1.5000,  1.5000,  1.4804,  ..., -1.0490, -1.0882, -0.9902]],
 
          [[ 1.6961,  1.6961,  1.6961,  ...,  1.5980,  1.5980,  1.5980],
           [ 

#### Conv layer 빠져나오고 브랜치 만들기

### Loss 함수 정의
- 3가지 task 각각 loss 구하고 합침

## Train

In [14]:
from tqdm.notebook import tqdm 

In [15]:
leraning_book = ["age","gender"]

In [16]:
target_model = mask_model.to(device)

LEARNING_RATE = 0.0001
NUM_EPOCH = 3

optimizer = {
        "mask":torch.optim.Adam(mask_model.mask.parameters(), lr=LEARNING_RATE),
        "age":torch.optim.Adam(mask_model.age.parameters(), lr=LEARNING_RATE),
        "gender":torch.optim.Adam(mask_model.gender.parameters(), lr=LEARNING_RATE)
    }
#optimizer = torch.optim.Adam(mask_model.parameters(), lr=LEARNING_RATE)

mask_loss_fn = torch.nn.CrossEntropyLoss()
age_loss_fn = torch.nn.CrossEntropyLoss()
gender_loss_fn = torch.nn.CrossEntropyLoss()
loss_fn = {"mask":mask_loss_fn,"age":age_loss_fn,"gender":gender_loss_fn}

In [17]:
best_test_accuracy = {i:0.for i in leraning_book}
best_test_loss = {i:9999.for i in leraning_book}

for epoch in range(NUM_EPOCH):
    for phase in ["train", "test"]:
        running_loss = {i:0.for i in leraning_book}
        running_acc = {i:0.for i in leraning_book}
        if phase == "train":
            target_model.train() # 네트워크 모델을 train 모드로 두어 gradient을 계산하고, 여러 sub module (배치 정규화, 드롭아웃 등)이 train mode로 작동할 수 있도록 함
        elif phase == "test":
            target_model.eval()

        for ind, (images, labels) in enumerate(tqdm(dataloaders[phase])):
            images = images.to(device)
            labels = labels#.to(device) 

            for k in leraning_book:
                optimizer[k].zero_grad()
            loss = {}
            preds = {}
            with torch.set_grad_enabled(phase == "train"): # train 모드일 시에는 gradient를 계산하고, 아닐 때는 gradient를 계산하지 않아 연산량 최소화
                logits = target_model(images)

                for k,v in logits.items():
                    if k in leraning_book:
                        _, preds[k] = torch.max(v, 1)
                        loss[k] = loss_fn[k](v, labels[k].to(device))

                if phase == "train":
                    for k,v in loss.items():
                        if k in leraning_book:
                            loss[k].backward()
                            optimizer[k].step() # 계산된 gradient를 가지고 모델 업데이트

            for k,v in loss.items():
                if k in leraning_book:
                    running_loss[k] += loss[k].item() * images.size(0) # 한 Batch에서의 loss 값 저장
                    running_acc[k] += torch.sum(preds[k] == labels[k].data.to(device)) # 한 Batch에서의 Accuracy 값 저장

        epoch_loss = {}
        epoch_acc = {}
        # 한 epoch이 모두 종료되었을 때,
        for k in leraning_book:
            epoch_loss[k] = running_loss[k] / len(dataloaders[phase].dataset)
            epoch_acc[k] = running_acc[k] / len(dataloaders[phase].dataset)
            print(f"현재 epoch-{epoch}의 {phase}-데이터 셋에서 평균 Loss : {epoch_loss[k]:.3f}, 평균 Accuracy : {epoch_acc[k]:.3f}")
            if phase == "test" and best_test_accuracy[k] < epoch_acc[k]: # phase가 test일 때, best accuracy 계산
                best_test_accuracy[k] = epoch_acc[k]
            if phase == "test" and best_test_loss[k] > epoch_loss[k]: # phase가 test일 때, best loss 계산
                best_test_loss[k] = epoch_loss[k]
        # if i % 50 == 0:
        #     print('Epoch: {}, i: {},Loss: {:.6f}'.format(epoch, i, running_loss))
        #     print(f'{i}번 배치: {running_acc}/{(i+1)*64}, 정확도: {running_acc/((i+1)*64)}')
for k in leraning_book: 
    print("학습 종료!")
    print(f"최고 accuracy : {best_test_accuracy[k]}, 최고 낮은 loss : {best_test_loss[k]}")


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=283.0), HTML(value='')))


현재 epoch-0의 train-데이터 셋에서 평균 Loss : 0.222, 평균 Accuracy : 0.953
현재 epoch-0의 train-데이터 셋에서 평균 Loss : 0.140, 평균 Accuracy : 0.977


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=94.0), HTML(value='')))


현재 epoch-0의 test-데이터 셋에서 평균 Loss : 0.069, 평균 Accuracy : 0.982
현재 epoch-0의 test-데이터 셋에서 평균 Loss : 0.029, 평균 Accuracy : 0.991


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=283.0), HTML(value='')))


현재 epoch-1의 train-데이터 셋에서 평균 Loss : 0.078, 평균 Accuracy : 0.980
현재 epoch-1의 train-데이터 셋에서 평균 Loss : 0.028, 평균 Accuracy : 0.994


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=94.0), HTML(value='')))


현재 epoch-1의 test-데이터 셋에서 평균 Loss : 0.055, 평균 Accuracy : 0.982
현재 epoch-1의 test-데이터 셋에서 평균 Loss : 0.019, 평균 Accuracy : 0.991


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=283.0), HTML(value='')))


현재 epoch-2의 train-데이터 셋에서 평균 Loss : 0.070, 평균 Accuracy : 0.981
현재 epoch-2의 train-데이터 셋에서 평균 Loss : 0.022, 평균 Accuracy : 0.995


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=94.0), HTML(value='')))


현재 epoch-2의 test-데이터 셋에서 평균 Loss : 0.052, 평균 Accuracy : 0.982
현재 epoch-2의 test-데이터 셋에서 평균 Loss : 0.016, 평균 Accuracy : 0.992
학습 종료!
최고 accuracy : 0.9822222590446472, 최고 낮은 loss : 0.05219957252185811
학습 종료!
최고 accuracy : 0.991534411907196, 최고 낮은 loss : 0.015796472365282987


### 테스트(Evaluation) 중, mask, gender, age별 output 뽑아내기
- 최종 클래스 예측(18개)으로 변환
- mask : 0, 1, 2 (wear, incorrect, not wear)
- gender : 0, 1  (male, female)
- age : 0, 1, 2  (young, middle, old)
- class : (mask * 6) + (gender * 3) + (age)

In [18]:
output = mask_model(image)
pred_mask = torch.argmax(output['mask'], dim=-1)
pred_gender = torch.argmax(output['gender'], dim=-1)
pred_age = torch.argmax(output['age'], dim=-1)

pred_class = (pred_mask * 6) + (pred_gender * 3) + (pred_age)

NameError: name 'image' is not defined

## Testing

In [25]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)

        return image

    def __len__(self):
        return len(self.img_paths)

In [26]:
# meta 데이터와 이미지 경로를 불러옵니다.
test_dir = '/opt/ml/input/data/eval'
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]

dataset = TestDataset(image_paths, data_transform)

loader = DataLoader(
    dataset,
    shuffle=False
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
device = torch.device('cuda')
test_model = target_model.to(device)
test_model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in tqdm(loader):
    with torch.no_grad():
        images = images.to(device)
        output = test_model(images)

        pred_mask = torch.argmax(output['mask'], dim=-1)
        pred_gender = torch.argmax(output['gender'], dim=-1)
        pred_age = torch.argmax(output['age'], dim=-1)
        pred_class = (pred_mask * 6) + (pred_gender * 3) + (pred_age)

        all_predictions.extend(pred_class.cpu().numpy())
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'submissionM2ways.csv'), index=False)
print('test inference is done!')

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=12600.0), HTML(value='')))


test inference is done!


### Save

In [323]:
torch.save(target_model.state_dict(), "./resnext50_32x4dfc3waysv2.pt")
torch.save(target_model.mask.state_dict(), "./resnext50_32x4dfc3ways_maskv2.pt")

### Performance

In [19]:
train_dir = '/opt/ml/input/data/train'
valid = pd.read_csv(os.path.join(train_dir, 'train_label.csv'))


In [20]:
class CusDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df
        self.image_data = self.df['path']   # x data, 이미지
        self.image_label = self.df['label'] # y data, 레이블

        self.transform = transform

    def __getitem__(self, idx):
        image = Image.open(self.image_data.iloc[idx])
        label = self.image_label.iloc[idx]

        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label)

    def __len__(self):
        return len(self.image_label)


In [21]:
def check_eval(raw_data, dataloader, model, device):
    all_predictions = []
    with torch.no_grad():
        for i , (X,y) in enumerate(tqdm(dataloader)):
            model_pred = model.forward(X.to(device))

            pred_mask = torch.argmax(model_pred['mask'], dim=-1)
            pred_gender = torch.argmax(model_pred['gender'], dim=-1)
            pred_age = torch.argmax(model_pred['age'], dim=-1)
            pred_class = (pred_mask * 6) + (pred_gender * 3) + (pred_age)

            all_predictions.extend([[valid.iloc[i]['path'], pred_class.cpu().numpy()[0],y.cpu().numpy()[0]]])
    #print(all_predictions)
    result = pd.DataFrame(all_predictions, columns=['path', 'pred', 'target'])
    return result

In [22]:
valid_dataset = CusDataset(valid, data_transform)
valid_dataloader = DataLoader(valid_dataset, batch_size=1, 
        num_workers=8,
        shuffle=True,
        pin_memory=torch.cuda.is_available(),
        drop_last=True,
    )

check_eval_df = check_eval(valid, valid_dataloader, target_model, device)
check_eval_df

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=18900.0), HTML(value='')))




Unnamed: 0,path,pred,target
0,../input/data/train/images/000001_female_Asian...,0,0
1,../input/data/train/images/000001_female_Asian...,0,0
2,../input/data/train/images/000001_female_Asian...,6,6
3,../input/data/train/images/000001_female_Asian...,4,4
4,../input/data/train/images/000001_female_Asian...,16,16
...,...,...,...
18895,../input/data/train/images/006959_male_Asian_1...,0,0
18896,../input/data/train/images/006959_male_Asian_1...,3,3
18897,../input/data/train/images/006959_male_Asian_1...,9,9
18898,../input/data/train/images/006959_male_Asian_1...,7,7


In [23]:
wrong_df = check_eval_df[check_eval_df['pred'] != check_eval_df['target']]
wrong_df = wrong_df.reset_index(drop=True)
# wrong_df.head()
print(len(wrong_df))

331


In [24]:
from sklearn.metrics import f1_score
f1_score(check_eval_df['target'], check_eval_df['pred'], average='macro')

0.9652317371733111

In [106]:
check_eval_df

Unnamed: 0,path,pred,target
0,../input/data/train/images/000001_female_Asian...,0,0
1,../input/data/train/images/000001_female_Asian...,2,0
2,../input/data/train/images/000001_female_Asian...,2,0
3,../input/data/train/images/000001_female_Asian...,0,0
4,../input/data/train/images/000001_female_Asian...,0,0
...,...,...,...
18895,../input/data/train/images/006959_male_Asian_1...,0,0
18896,../input/data/train/images/006959_male_Asian_1...,0,0
18897,../input/data/train/images/006959_male_Asian_1...,0,0
18898,../input/data/train/images/006959_male_Asian_1...,0,0


In [147]:
m = check_eval_df.copy()
a = check_eval_df.copy()
g = check_eval_df.copy()

In [178]:
def mask_label(x):
    if x in [0,1,2,3,4,5]:
        return 0    # wear
    elif x in [6,7,8,9,10,11]:
        return 1    # incorrect
    else:
        return 2    # not wear
def gender_label(x):
    if x in [0,1,2,6,7,8,12,13,14]:
        return 0    # male
    else:
        return 1    # female
def age_label(x):
    if x in [0,3,6,9,12,15]:
        return 0    # young
    elif x in [1,4,7,10,13,16]:
        return 1    # middle
    else:
        return 2    # old

In [149]:
m['target'] = m['target'].apply(mask_label)
m['pred'] = m['pred'].apply(mask_label)

a['target'] = a['target'].apply(gender_label)
a['pred'] = a['pred'].apply(gender_label)

g['target'] = g['target'].apply(age_label)
g['pred'] = g['pred'].apply(age_label)

In [154]:
mdf = m[m['pred'] != m['target']]
adf = a[a['pred'] != a['target']]
gdf = g[g['pred'] != g['target']]
len(mdf)

12

In [139]:
f1_score(m['target'], m['pred'], average='macro')

0.9990126473410653

In [140]:
f1_score(a['target'], a['pred'], average='macro')

0.9968760874589255

In [141]:
f1_score(g['target'], g['pred'], average='macro')

0.97213729438098

In [151]:
g.groupby(g.target).size()

target
0    8967
1    8589
2    1344
dtype: int64

In [160]:
gdf.groupby([gdf.target]).size()

target
0     17
1     92
2    129
dtype: int64

---

### 나이 모델 개선

In [255]:
from collections import defaultdict
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

In [234]:
temp = label_group.copy()

In [235]:
p = defaultdict(int)
for k,v in temp.iterrows():
    #print(k,v[0])
    p[age_label(k)] += v[0]

In [233]:
temp = pd.DataFrame(p,columns=["count"])

In [236]:
p

defaultdict(int, {0: 8967, 1: 8589, 2: 1344})

In [166]:
#라벨 그룹별 수 구하기
label_group = pd.DataFrame(df.groupby("label").size().to_list())
label_group.columns = ['count']
label_group = label_group.reset_index().drop(["index"],axis=1)

#평균보다 적은 라벨 리스트 구하기
# mid_count = label_group.mean()
# augmetaion_list = label_group[label_group < int(label_group.mean())].dropna()
# augmetaion_list = augmetaion_list.sort_values(by="count")

#라벨명만 가져오기
augmetaion_label = label_group.index.to_list()
augmetaion_label


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]

In [None]:
def age_label(x):
    if x in [0,3,6,9,12,15]:
        return 0    # young
    elif x in [1,4,7,10,13,16]:
        return 1    # middle
    elif x in [2,5,8,11,14,17]:
        return 2    # old

In [237]:
many_error = [2,5,8,11,14,17]

In [238]:
add_df = df[df.label.isin(many_error)]
path_add_df = []
for i in many_error:
    path_add_df.append(add_df[add_df.label == i])

In [242]:
origin = df.sample(frac=0.5)

In [244]:
augmetaion_data = pd.concat([path_add_df[i] for i in range(len(many_error))] + [origin])

In [306]:
transform = A.Compose([
        #A.RandomRotate90(),
        #A.Flip(),
        #A.Transpose(),
        A.OneOf([
            #A.IAAAdditiveGaussianNoise(),
            A.GaussNoise(),
        ], p=0.2),
        A.OneOf([
            A.MotionBlur(p=.2),
            A.MedianBlur(blur_limit=3, p=0.1),
            A.Blur(blur_limit=3, p=0.1),
        ], p=0.2),
        A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=25, p=0.4),
        A.OneOf([
            A.OpticalDistortion(p=0.3),
            A.GridDistortion(p=.1),
            #A.IAAPiecewiseAffine(p=0.3),
        ], p=0.2),
        A.OneOf([
            A.CLAHE(clip_limit=2),
            #A.IAASharpen(),
            #A.IAAEmboss(),
            A.RandomBrightnessContrast(),            
        ], p=0.3),
        A.HueSaturationValue(p=0.3),
        A.CenterCrop(320,320),
        A.Normalize(
            mean=[0.5, 0.5, 0.5],
            std=[0.2, 0.2, 0.2],
        ),
        ToTensorV2()
    ])

In [307]:
class MultiBranchDatasetAlbum(Dataset):
    def __init__(self, df, transforms):
        self.df = df
        self.image_data = self.df['path']   # x data, 이미지
        self.image_label = self.df['label'] # y data, 레이블


        self.mask_label, self.age_label, self.gender_label = split_data(df)
        self.transform = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):

        img_path = self.df['path'].iloc[idx]
        image = cv2.imread(self.image_data.iloc[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image = self.transform(image=image)["image"]
        
        dict_label = {
            'class' : self.image_label.iloc[idx],
            'mask' : self.mask_label.iloc[idx],
            'gender' : self.gender_label.iloc[idx],
            'age' : self.age_label.iloc[idx]
        }

        return image, dict_label

In [308]:
augmetaion_data_train = augmetaion_data.sample(frac=0.8)
augmetaion_data_test = augmetaion_data.drop(augmetaion_data_train.index)

In [309]:
train_dataset = MultiBranchDatasetAlbum(augmetaion_data_train, transform)
train_loader = DataLoader(
    train_dataset,
    batch_size = 64,
    shuffle=True,
    num_workers=4
)

test_dataset = MultiBranchDatasetAlbum(augmetaion_data_test, transform)
test_loader = DataLoader(
    test_dataset,
    batch_size = 64,
    shuffle=False,
    num_workers=4
)

dataloaders = {
    "train" : train_loader,
    "test" : test_loader
}

In [310]:
dfs_freeze(mask_model.mask)