In [1]:
import os
import pandas as pd
from PIL import Image
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from torchvision.transforms import Resize, ToTensor, Normalize, transforms
from torchvision.datasets import ImageFolder
import shutil
import torch.optim as optim
from zlib import crc32
import time
import copy
 

In [8]:
original_dataset_dir = './input/data/train/images'
base_dir = './each_classes'
mask_classes = ["{0:02d}".format(a) for a in range(3)]
gender_classes = ["{0:02d}".format(a) for a in range(2)]
age_classes = ["{0:02d}".format(a) for a in range(3)]

if not os.path.isdir(base_dir): 
    os.makedirs(base_dir)

mask_dir = os.path.join(base_dir, 'mask')
if not os.path.isdir(mask_dir): 
    os.makedirs(mask_dir)
mask_train_dir = os.path.join(mask_dir, 'train')
if not os.path.isdir(mask_train_dir):
    os.makedirs(mask_train_dir)
mask_test_dir = os.path.join(mask_dir, 'test')
if not os.path.isdir(mask_test_dir):
    os.makedirs(mask_test_dir)

gender_dir = os.path.join(base_dir, 'gender')
if not os.path.isdir(gender_dir):
    os.makedirs(gender_dir)
gender_train_dir = os.path.join(gender_dir, 'train')
if not os.path.isdir(gender_train_dir):
    os.makedirs(gender_train_dir)
gender_test_dir = os.path.join(gender_dir, 'test')
if not os.path.isdir(gender_test_dir):
    os.makedirs(gender_test_dir)

age_dir = os.path.join(base_dir, "age")
if not os.path.isdir(age_dir): 
    os.makedirs(age_dir)
age_train_dir = os.path.join(age_dir, 'train')
if not os.path.isdir(age_train_dir):
    os.makedirs(age_train_dir)
age_test_dir = os.path.join(age_dir, 'test')
if not os.path.isdir(age_test_dir):
    os.makedirs(age_test_dir)

## mask
for classes in mask_classes:
    train_split = os.path.join(mask_train_dir, str(classes))
    if not os.path.isdir(train_split):
        os.mkdir(train_split)
    test_split = os.path.join(mask_test_dir, str(classes))
    if not os.path.isdir(test_split):
        os.mkdir(test_split)

# gender
for classes in gender_classes:
    train_split = os.path.join(gender_train_dir, str(classes))
    if not os.path.isdir(train_split):
        os.mkdir(train_split)
    test_split = os.path.join(gender_test_dir, str(classes))
    if not os.path.isdir(test_split):
        os.mkdir(test_split)
    
# age
for classes in age_classes:
    train_split = os.path.join(age_train_dir, str(classes))
    if not os.path.isdir(train_split):
        os.mkdir(train_split)
    test_split = os.path.join(age_test_dir, str(classes))
    if not os.path.isdir(test_split):
        os.mkdir(test_split)

In [9]:
def what_mask_this(data, file):
    # 마스크 여부 분류
    if 'incorrect' in file:
        return 1
    elif 'mask' in file:
        return 0
    else: # not wear
        return 2

def what_gender_this(data, file):
    # 성별 분류
    if data['gender'] == 'male':
        return 0
    else: # female
        return 1

def what_age_this(data, file):
    # 나이 분류
    if data['age'] < 30:
        return 0
    elif data['age'] >= 60:
        return 2
    else: # 30 ~ 60
        return 1
    


In [10]:
def ratio_splitter(identifier, test_ratio):
    return crc32(np.string_(identifier)) & 0xffffffff < test_ratio * 2 ** 32

In [11]:
data_infos = pd.read_csv("./input/data/train/train.csv")
train_counter = {}
test_counter = {}

for i in range(3):
    train_counter[str(i)] = 0
    test_counter[str(i)] = 0

train_dir = os.path.join(mask_dir, 'train')
test_dir = os.path.join(mask_dir, 'test')

for row in data_infos.iterrows():
    data_num, data = row[0], row[1]

    # 목표 디렉토리 설정
    path = os.path.join(original_dataset_dir, data['path'])
    # 디렉토리 리스트 가져오기
    fnames = os.listdir(path)
    # 데이터 분활하기
    test_set_check = ratio_splitter(path, 0.15)
    
    
    for file in fnames:
        if file[0] == '.':
            continue

        target_class = what_mask_this(data, file)
        target_path = "{0:02d}".format(target_class)
        
        src = os.path.join(path, file)

        if test_set_check:
            dst = os.path.join(os.path.join(test_dir, target_path), file)
        else: # train_set
            dst = os.path.join(os.path.join(train_dir, target_path), file)

        shutil.copyfile(src, dst)

        # 파일 확장자를 추출하기 위해서
        name, extension = file.split(".") 

        if test_set_check:
            change_name = os.path.join(os.path.join(test_dir, target_path), str(test_counter[str(target_class)]) + "." + extension)
            test_counter[str(target_class)] += 1 
        else: # train_set
            change_name = os.path.join(os.path.join(train_dir, target_path), str(train_counter[str(target_class)]) + "." + extension)
            train_counter[str(target_class)] += 1 
            
        shutil.move(dst, change_name)



In [12]:
data_infos = pd.read_csv("./input/data/train/train.csv")
train_counter = {}
test_counter = {}

for i in range(2):
    train_counter[str(i)] = 0
    test_counter[str(i)] = 0

train_dir = os.path.join(gender_dir, 'train')
test_dir = os.path.join(gender_dir, 'test')

for row in data_infos.iterrows():
    data_num, data = row[0], row[1]

    # 목표 디렉토리 설정
    path = os.path.join(original_dataset_dir, data['path'])
    # 디렉토리 리스트 가져오기
    fnames = os.listdir(path)
    # 데이터 분활하기
    test_set_check = ratio_splitter(path, 0.15)
    
    
    for file in fnames:
        if file[0] == '.':
            continue

        target_class = what_gender_this(data, file)
        target_path = "{0:02d}".format(target_class)
        
        src = os.path.join(path, file)

        if test_set_check:
            dst = os.path.join(os.path.join(test_dir, target_path), file)
        else: # train_set
            dst = os.path.join(os.path.join(train_dir, target_path), file)

        shutil.copyfile(src, dst)

        # 파일 확장자를 추출하기 위해서
        name, extension = file.split(".") 

        if test_set_check:
            change_name = os.path.join(os.path.join(test_dir, target_path), str(test_counter[str(target_class)]) + "." + extension)
            test_counter[str(target_class)] += 1 
        else: # train_set
            change_name = os.path.join(os.path.join(train_dir, target_path), str(train_counter[str(target_class)]) + "." + extension)
            train_counter[str(target_class)] += 1 
            
        shutil.move(dst, change_name)



In [13]:
data_infos = pd.read_csv("./input/data/train/train.csv")
train_counter = {}
test_counter = {}

for i in range(3):
    train_counter[str(i)] = 0
    test_counter[str(i)] = 0

train_dir = os.path.join(age_dir, 'train')
test_dir = os.path.join(age_dir, 'test')

for row in data_infos.iterrows():
    data_num, data = row[0], row[1]

    # 목표 디렉토리 설정
    path = os.path.join(original_dataset_dir, data['path'])
    # 디렉토리 리스트 가져오기
    fnames = os.listdir(path)
    # 데이터 분활하기
    test_set_check = ratio_splitter(path, 0.15)
    
    
    for file in fnames:
        if file[0] == '.':
            continue

        target_class = what_age_this(data, file)
        target_path = "{0:02d}".format(target_class)
        
        src = os.path.join(path, file)

        if test_set_check:
            dst = os.path.join(os.path.join(test_dir, target_path), file)
        else: # train_set
            dst = os.path.join(os.path.join(train_dir, target_path), file)

        shutil.copyfile(src, dst)

        # 파일 확장자를 추출하기 위해서
        name, extension = file.split(".") 

        if test_set_check:
            change_name = os.path.join(os.path.join(test_dir, target_path), str(test_counter[str(target_class)]) + "." + extension)
            test_counter[str(target_class)] += 1 
        else: # train_set
            change_name = os.path.join(os.path.join(train_dir, target_path), str(train_counter[str(target_class)]) + "." + extension)
            train_counter[str(target_class)] += 1 
            
        shutil.move(dst, change_name)



In [14]:
import cv2
import albumentations as A

class Color:

    HueSaturationValue_isChecked = False
    RGBShift_isChecked = True

    #입력 이미지의 색조, 채도 및 값을 임의로 변경합니다.
    def HueSaturationValue (image, hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, always_apply=False, p=1):
        """
       hue_shift_limit	[int, int] or int	
        색조 변경 범위. hue_shift_limit가 단일 int이면 범위는 (-hue_shift_limit, hue_shift_limit)입니다. 기본값 : (-20, 20).

        sat_shift_limit	[int, int] or int	
        채도 변경 범위. sat_shift_limit가 단일 정수이면 범위는 (-sat_shift_limit, sat_shift_limit)입니다. 기본값 : (-30, 30).

        val_shift_limit	[int, int] or int	
        값 변경 범위. val_shift_limit가 단일 정수이면 범위는 (-val_shift_limit, val_shift_limit)입니다. 기본값 : (-20, 20).

        p	float	
        변환을 적용 할 확률. 기본값 : 1
        """
        transform = A.Compose([
            A.HueSaturationValue(hue_shift_limit, sat_shift_limit, val_shift_limit, always_apply, p)
        ],p=1)
        img = transform(image=image)['image']
        return img

In [15]:
def image_save(directory, image_data, image_name, excution):
    cv2.imwrite(os.path.join(directory, str(image_name) + "." + excution), image_data)

In [16]:
over_sixty_list = ["02"]
not_mask_list = ["01", "02"]
mans_list = ["00"]

over_sixty_ratio = 6
not_mask_ratio = 5
mans_ratio = 0.6

In [17]:
for dir in over_sixty_list:
    target_class = int(dir)
    train_target = os.path.join(age_train_dir, dir)
    test_target = os.path.join(age_test_dir, dir)

    # 디렉토리 리스트 가져오기
    train_fnames = os.listdir(train_target)
    test_fnames = os.listdir(test_target)
    
    for train_file in train_fnames:
        for _ in range(over_sixty_ratio):           
            aug_img = cv2.imread(os.path.join(train_target, train_file))
            aug_img = Color.HueSaturationValue(aug_img)
            _, extension = train_file.split(".") 
            image_save(train_target, aug_img, train_counter[str(target_class)], extension)
            train_counter[str(target_class)] += 1

        

In [18]:
for dir in not_mask_list:
    target_class = int(dir)
    train_target = os.path.join(mask_train_dir, dir)
    test_target = os.path.join(mask_test_dir, dir)

    # 디렉토리 리스트 가져오기
    train_fnames = os.listdir(train_target)
    test_fnames = os.listdir(test_target)
    
    for train_file in train_fnames:
        for _ in range(not_mask_ratio):           
            aug_img = cv2.imread(os.path.join(train_target, train_file))
            aug_img = Color.HueSaturationValue(aug_img)
            _, extension = train_file.split(".") 
            image_save(train_target, aug_img, train_counter[str(target_class)], extension)
            train_counter[str(target_class)] += 1


In [19]:
for dir in mans_list:
    target_class = int(dir)
    train_target = os.path.join(gender_train_dir, dir)
    test_target = os.path.join(gender_test_dir, dir)

    # 디렉토리 리스트 가져오기
    train_fnames = os.listdir(train_target)
    test_fnames = os.listdir(test_target)
    
    for train_file in train_fnames:
        target_image_path = os.path.join(train_target, train_file)
        if ratio_splitter(target_image_path, mans_ratio):      
            aug_img = cv2.imread(target_image_path)
            aug_img = Color.HueSaturationValue(aug_img)
            _, extension = train_file.split(".") 
            image_save(train_target, aug_img, train_counter[str(target_class)], extension)
            train_counter[str(target_class)] += 1


In [20]:
USE_CUDA = torch.cuda.is_available
DEVICE = torch.device('cuda' if USE_CUDA else 'cpu')
print(DEVICE)
BATCH_SIZE = 100
EPOCH = 24

cuda


In [21]:
data_transforms = {
    'train': transforms.Compose([transforms.Resize([256, 172]),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    ]),
    'test': transforms.Compose([transforms.Resize([256, 172]),
    transforms.ToTensor(),
    ])
}

In [22]:
# mask
data_dir = './each_classes/mask' 
mask_datasets = {x: ImageFolder(root=os.path.join(data_dir, x), transform=data_transforms[x]) for x in ['train', 'test']} 
mask_dataloader = {x: torch.utils.data.DataLoader(mask_datasets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=4) for x in ['train', 'test']} 
mask_dataset_sizes = {x: len(mask_datasets[x]) for x in ['train', 'test']}


In [23]:
# gender
data_dir = './each_classes/gender' 
gender_datasets = {x: ImageFolder(root=os.path.join(data_dir, x), transform=data_transforms[x]) for x in ['train', 'test']} 
gender_dataloader = {x: torch.utils.data.DataLoader(gender_datasets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=4) for x in ['train', 'test']} 
gender_dataset_sizes = {x: len(gender_datasets[x]) for x in ['train', 'test']}


In [24]:
# age
data_dir = './each_classes/age' 
age_datasets = {x: ImageFolder(root=os.path.join(data_dir, x), transform=data_transforms[x]) for x in ['train', 'test']} 
age_dataloader = {x: torch.utils.data.DataLoader(age_datasets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=4) for x in ['train', 'test']} 
age_dataset_sizes = {x: len(age_datasets[x]) for x in ['train', 'test']}


In [44]:
mask_datasets['train'].class_to_idx

{'00': 0, '01': 1, '02': 2}

In [45]:
gender_datasets['train'].class_to_idx

{'00': 0, '01': 1}

In [25]:
from torchvision import models

In [46]:
mask_resnet = models.resnet50(pretrained=True)  
num_ftrs = mask_resnet.fc.in_features   
mask_resnet.fc = nn.Linear(num_ftrs, 3) 
mask_resnet = mask_resnet.to(DEVICE)
 
criterion = nn.CrossEntropyLoss() 
optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad, mask_resnet.parameters()), lr=0.001)
 
from torch.optim import lr_scheduler
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) 

ct = 0 
for child in mask_resnet.children():  
    ct += 1  
    if ct < 6: 
        for param in child.parameters():
            param.requires_grad = False

In [47]:
gender_resnet = models.resnet50(pretrained=True)  
num_ftrs = gender_resnet.fc.in_features   
gender_resnet.fc = nn.Linear(num_ftrs, 2) 
gender_resnet = gender_resnet.to(DEVICE)
 
criterion = nn.CrossEntropyLoss() 
optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad, gender_resnet.parameters()), lr=0.001)
 
from torch.optim import lr_scheduler
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) 

ct = 0 
for child in gender_resnet.children():  
    ct += 1  
    if ct < 6: 
        for param in child.parameters():
            param.requires_grad = False

In [40]:
from defaults import _C as cfg
from model import get_model

cfg.freeze()
 # create model
print("=> creating model '{}'".format(cfg.MODEL.ARCH))
age_model = get_model(model_name=cfg.MODEL.ARCH, pretrained=None)

resume_path = "./epoch044_0.02343_3.9984.pth"
print("=> loading checkpoint '{}'".format(resume_path))
checkpoint = torch.load(resume_path, map_location="cpu")
age_model.load_state_dict(checkpoint['state_dict'])
print("=> loaded checkpoint '{}'".format(resume_path))



=> creating model 'se_resnext50_32x4d'
=> loading checkpoint './epoch044_0.02343_3.9984.pth'
=> loaded checkpoint './epoch044_0.02343_3.9984.pth'


In [48]:
def train_resnet(model, criterion, optimizer, scheduler, dataloaders, dataset_sizes, num_epochs=25):

    best_model_wts = copy.deepcopy(model.state_dict())  
    best_acc = 0.0  
    
    for epoch in range(num_epochs):
        print('-------------- epoch {} ----------------'.format(epoch+1)) 
        since = time.time()                                     
        for phase in ['train', 'test']: 
            if phase == 'train': 
                model.train() 
            else:
                model.eval()     
 
            running_loss = 0.0  
            running_corrects = 0  
 
            
            for inputs, labels in dataloaders[phase]: 
                inputs = inputs.to(DEVICE)  
                labels = labels.to(DEVICE)  
                
                optimizer.zero_grad() 
                
                with torch.set_grad_enabled(phase == 'train'):  
                    outputs = model(inputs)  
                    _, preds = torch.max(outputs, 1) 
                    loss = criterion(outputs, labels)  
    
                    if phase == 'train':   
                        loss.backward()
                        optimizer.step()
 
                running_loss += loss.item() * inputs.size(0)  
                running_corrects += torch.sum(preds == labels.data)  
            if phase == 'train':  
                scheduler.step()
 
            epoch_loss = running_loss/dataset_sizes[phase]  
            epoch_acc = running_corrects.double()/dataset_sizes[phase]  
 
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) 
 
          
            if phase == 'test' and epoch_acc > best_acc: 
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
 
        time_elapsed = time.time() - since  
        print('Completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
 
    model.load_state_dict(best_model_wts) 

    return model

In [49]:
mask_resnet50 = train_resnet(mask_resnet, criterion, optimizer_ft, exp_lr_scheduler, mask_dataloader, mask_dataset_sizes, num_epochs=EPOCH)
torch.save(mask_resnet50, 'mask_resnet50.pt')

-------------- epoch 1 ----------------
train Loss: 1.1246 Acc: 0.3444
test Loss: 1.2115 Acc: 0.1623
Completed in 1m 52s
-------------- epoch 2 ----------------
train Loss: 1.1241 Acc: 0.3430
test Loss: 1.2133 Acc: 0.1595
Completed in 1m 51s
-------------- epoch 3 ----------------
train Loss: 1.1241 Acc: 0.3444
test Loss: 1.2156 Acc: 0.1598
Completed in 1m 51s
-------------- epoch 4 ----------------
train Loss: 1.1248 Acc: 0.3438
test Loss: 1.2099 Acc: 0.1613
Completed in 1m 52s
-------------- epoch 5 ----------------


KeyboardInterrupt: 

In [None]:
gender_resnet50 = train_resnet(gender_resnet, criterion, optimizer_ft, exp_lr_scheduler, gender_dataloader, gender_dataset_sizes, num_epochs=EPOCH)