## 0. Libarary 불러오기

In [52]:
!pip install torch_optimizer

Collecting torch_optimizer
  Downloading torch_optimizer-0.1.0-py3-none-any.whl (72 kB)
[K     |████████████████████████████████| 72 kB 724 kB/s eta 0:00:011
[?25hCollecting pytorch-ranger>=0.1.1
  Downloading pytorch_ranger-0.1.1-py3-none-any.whl (14 kB)
Installing collected packages: pytorch-ranger, torch-optimizer
Successfully installed pytorch-ranger-0.1.1 torch-optimizer-0.1.0


In [53]:
import os, glob
import time
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize
from torchvision.models import resnet50

from torchsummary import summary
import torch_optimizer as optim

from sklearn.model_selection import train_test_split

In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

print('using device:', device)

using device: cuda:0


## 1. 데이터셋 정의

### (1) Train Dataset

In [3]:
# 학습 데이터셋 폴더 경로를 지정해주세요.
train_dir = '/opt/ml/input/data/train'
trainimage_dir = os.path.join(train_dir, 'images')

In [4]:
# meta 데이터와 이미지 경로를 불러옵니다.
train_df = pd.read_csv(os.path.join(train_dir, 'train.csv'))
train_df

Unnamed: 0,id,gender,race,age,path
0,000001,female,Asian,45,000001_female_Asian_45
1,000002,female,Asian,52,000002_female_Asian_52
2,000004,male,Asian,54,000004_male_Asian_54
3,000005,female,Asian,58,000005_female_Asian_58
4,000006,female,Asian,59,000006_female_Asian_59
...,...,...,...,...,...
2695,006954,male,Asian,19,006954_male_Asian_19
2696,006955,male,Asian,19,006955_male_Asian_19
2697,006956,male,Asian,19,006956_male_Asian_19
2698,006957,male,Asian,20,006957_male_Asian_20


In [5]:
masks = ['mask1', 'mask2', 'mask3', 'mask4', 'mask5', 'incorrect_mask', 'normal']
wears = ['Wear', 'Wear', 'Wear', 'Wear', 'Wear', 'Incorrect', 'Not Wear']
mask_df = pd.DataFrame()
for person in train_df.values:
    for mask, wear in zip(masks, wears):
        mask_df = mask_df.append(pd.Series(np.append(person, (mask, wear))), ignore_index=True)
mask_df.columns = np.append(train_df.columns.values, ('mask', 'wear'))
mask_df

Unnamed: 0,id,gender,race,age,path,mask
0,000001,female,Asian,45.0,000001_female_Asian_45,mask1
1,000001,female,Asian,45.0,000001_female_Asian_45,mask2
2,000001,female,Asian,45.0,000001_female_Asian_45,mask3
3,000001,female,Asian,45.0,000001_female_Asian_45,mask4
4,000001,female,Asian,45.0,000001_female_Asian_45,mask5
...,...,...,...,...,...,...
18895,006959,male,Asian,19.0,006959_male_Asian_19,mask3
18896,006959,male,Asian,19.0,006959_male_Asian_19,mask4
18897,006959,male,Asian,19.0,006959_male_Asian_19,mask5
18898,006959,male,Asian,19.0,006959_male_Asian_19,incorrect_mask


In [6]:
mask_df = mask_df.sample(frac=1).reset_index(drop=True)
mask_df

Unnamed: 0,id,gender,race,age,path,mask
0,001840-1,male,Asian,22.0,001840-1_male_Asian_22,mask5
1,001309,male,Asian,23.0,001309_male_Asian_23,mask5
2,005404,female,Asian,25.0,005404_female_Asian_25,mask3
3,001762,male,Asian,40.0,001762_male_Asian_40,mask2
4,003188,female,Asian,19.0,003188_female_Asian_19,normal
...,...,...,...,...,...,...
18895,001555,female,Asian,59.0,001555_female_Asian_59,mask5
18896,004390,female,Asian,51.0,004390_female_Asian_51,mask1
18897,005255,male,Asian,19.0,005255_male_Asian_19,mask4
18898,001100,female,Asian,43.0,001100_female_Asian_43,normal


In [7]:
train, valid = train_test_split(mask_df, test_size=0.2, stratify=mask_df['wear'])
print(f'Train Set dim : (%d, %d)' % (train.shape))
print(f'Valid Set dim : (%d, %d)' % (valid.shape))

Train Set dim : (15120, 6)
Valid Set dim : (3780, 6)


In [22]:
transform = transforms.Compose([
    Resize((224, 224), Image.BILINEAR),
    ToTensor(),
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2)),
])

In [23]:
class GenderDataset(Dataset):
    def __init__(self, path, mask_df, transform):
        super(GenderDataset).__init__()
        self.path = path
        self.mask_df = mask_df
        self.transform = transform
        
    def __getitem__(self, idx):
        full_path = os.path.join(self.path, self.mask_df.iloc[idx]['path'])
        img_list = glob.glob(full_path + '/*')
        file_name = self.mask_df.iloc[idx]['mask']
        for img_name in img_list:
            if img_name.startswith(file_name):
                break
        image = Image.open(os.path.join(full_path, img_name))
        if self.transform:
            image = self.transform(image)
        
        label = self.mask_df.iloc[idx]['gender']
        label = 0 if label=='male' else 1
        return image, label
    
    def __len__(self):
        return len(self.mask_df)

In [24]:
gender_train_data = GenderDataset(trainimage_dir, train, transform)
gender_valid_data = GenderDataset(trainimage_dir, valid, transform)

In [25]:
gender_train = DataLoader(gender_train_data, batch_size=32, shuffle=True, num_workers=2)
gender_valid = DataLoader(gender_valid_data, batch_size=32, shuffle=True, num_workers=2)

In [26]:
class AgeDataset(Dataset):
    def __init__(self, path, mask_df, transform):
        super(AgeDataset).__init__()
        self.path = path
        self.mask_df = mask_df
        self.transform = transform
        
    def __getitem__(self, idx):
        full_path = os.path.join(self.path, self.mask_df.iloc[idx]['path'])
        img_list = glob.glob(full_path + '/*')
        file_name = self.mask_df.iloc[idx]['mask']
        for img_name in img_list:
            if img_name.startswith(file_name):
                break
        image = Image.open(os.path.join(full_path, img_name))
        if self.transform:
            image = self.transform(image)
        
        label = self.mask_df.iloc[idx]['age']
        if label >= 60.0:
            label = 2
        elif label >= 30.0:
            label = 1
        else:
            label = 0
        return image, label
    
    def __len__(self):
        return len(self.mask_df)

In [27]:
age_train_data = AgeDataset(trainimage_dir, train, transform)
age_valid_data = AgeDataset(trainimage_dir, valid, transform)

In [28]:
age_train = DataLoader(age_train_data, batch_size=32, shuffle=True, num_workers=2)
age_valid = DataLoader(age_valid_data, batch_size=32, shuffle=True, num_workers=2)

In [29]:
class MaskDataset(Dataset):
    def __init__(self, path, mask_df, transform):
        super(MaskDataset).__init__()
        self.path = path
        self.mask_df = mask_df
        self.transform = transform
        
    def __getitem__(self, idx):
        full_path = os.path.join(self.path, self.mask_df.iloc[idx]['path'])
        img_list = glob.glob(full_path + '/*')
        file_name = self.mask_df.iloc[idx]['mask']
        for img_name in img_list:
            if img_name.startswith(file_name):
                break
        image = Image.open(os.path.join(full_path, img_name))
        if self.transform:
            image = self.transform(image)
        
        label = self.mask_df.iloc[idx]['mask']
        if label.startswith('mask'):
            label = 0
        elif label.startswith('incorrect'):
            label = 1
        else:
            label = 2
        return image, label
    
    def __len__(self):
        return len(self.mask_df)

In [30]:
mask_train_data = MaskDataset(trainimage_dir, train, transform)
mask_valid_data = MaskDataset(trainimage_dir, valid, transform)

In [31]:
mask_train = DataLoader(mask_train_data, batch_size=32, shuffle=True)
mask_valid = DataLoader(mask_valid_data, batch_size=32, shuffle=True)

### (2) Test Dataset

In [32]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        super(TestDataset).__init__()
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [33]:
# 테스트 데이터셋 폴더 경로를 지정해주세요.
test_dir = '/opt/ml/input/data/eval'

In [34]:
# meta 데이터와 이미지 경로를 불러옵니다.
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
testimage_dir = os.path.join(test_dir, 'images')

In [35]:
# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(testimage_dir, img_id) for img_id in submission.ImageID]
transform = transforms.Compose([
    Resize((224, 224), Image.BILINEAR),
    ToTensor(),
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2)),
])
test_dataset = TestDataset(image_paths, transform)

test_loader = DataLoader(
    test_dataset,
    shuffle=False
)

## 2. Model 정의

In [40]:
model = resnet50(pretrained=True, progress=False)

In [41]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [47]:
class MyModel(nn.Module):
    def __init__(self, num_classes: int = 1000):
        super(MyModel, self).__init__()
        self.model = resnet50(pretrained=True, progress=False)
        in_features = self.model.fc.in_features
        self.model.fc = nn.Linear(in_features, num_classes)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.model(x)
        return x

### 마스크 착용 여부 분류

In [60]:
mask_model = MyModel(num_classes=3)
for param in mask_model.parameters():
    param.requires_grad = False # frozen
for param in mask_model.model.fc.parameters():
    param.requires_grad = True # 마지막 레이어 살리기
mask_model.to(device)

MyModel(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0

### 나이 분류

### 성별 분류

### 성별 분류

In [57]:
gender_model = MyModel(num_classes=2)
for param in gender_model.parameters():
    param.requires_grad = False # frozen
for param in gender_model.model.fc.parameters():
    param.requires_grad = True # 마지막 레이어 살리기
gender_model.to(device)

MyModel(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0

In [56]:
age_model = MyModel(num_classes=3)
for param in age_model.parameters():
    param.requires_grad = False # frozen
for param in age_model.model.fc.parameters():
    param.requires_grad = True # 마지막 레이어 살리기
age_model.to(device)

MyModel(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0

## 3. Training

In [58]:
learning_rate = 0.0015
betas = (0.9, 0.999)
weight_decay = 1e-4
T_max = 50

### 마스크 착용 여부 분류

In [61]:
optimizer = optim.RAdam(mask_model.parameters(), lr=learning_rate, betas=betas, weight_decay=weight_decay)
lr_sched = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=T_max)

num_epochs = 30

valid_early_stop = 0
valid_best_loss = float('inf')
EARLY_STOPPING_EPOCH = 5
since = time.time()

final_train_loss = []
final_train_acc = []
final_valid_loss = []
final_valid_acc = []

for e in range(num_epochs) :
    print(f' ====================== epoch %d ======================' % (e+1) )
    train_loss_list = []
    train_acc_list = []

    # train
    mask_model.train()
    for i, (images, targets) in enumerate(mask_train) : 
        optimizer.zero_grad()

        images = images.to(device)
        targets = targets.to(device)

        scores = mask_model(images)
        _, preds = scores.max(dim=1)

        loss = F.cross_entropy(scores, targets)
        loss.backward()
        optimizer.step()

        correct = sum(targets == preds).cpu()
        acc=(correct/32 * 100)

        train_loss_list.append(loss)
        train_acc_list.append(acc)

        if i % 50 == 0 :
            print(f'Iteration %3.d | Train Loss  %.4f | Classifier Accuracy %2.2f' % (i, loss, acc))

    train_mean_loss = np.mean(train_loss_list, dtype="float64")
    train_mean_acc = np.mean(train_acc_list, dtype="float64")

    final_train_loss.append(train_mean_loss)
    final_train_acc.append(train_mean_acc)

    epoch_time = time.time() - since
    since = time.time()

    print('')
    print(f'[Summary] Elapsed time : %.0f m %.0f s' % (epoch_time // 60, epoch_time % 60))
    print(f'Train Loss Mean %.4f | Accuracy %2.2f ' % (train_mean_loss, train_mean_acc) )

    # validation 
    mask_model.eval()
    valid_loss_list = []
    valid_acc_list = []
    for i, (images, targets) in enumerate(mask_valid) : 
        optimizer.zero_grad()
        images = images.to(device=device)
        targets = targets.to(device=device)

        with torch.no_grad():
            scores = mask_model(images)
            loss = F.cross_entropy(scores, targets)
            _, preds = scores.max(dim=1)

        correct = sum(targets == preds).cpu()
        acc=(correct/32 * 100)

        valid_loss_list.append(loss)
        valid_acc_list.append(acc)

    val_mean_loss = np.mean(valid_loss_list, dtype="float64")
    val_mean_acc = np.mean(valid_acc_list, dtype="float64")

    final_valid_loss.append(val_mean_loss)
    final_valid_acc.append(val_mean_acc)

    print(f'Valid Loss Mean %.4f | Accuracy %2.2f ' % (val_mean_loss, val_mean_acc) )
    print('')

    if val_mean_loss < valid_best_loss:
        valid_best_loss = val_mean_loss
        valid_early_stop = 0
        # new best model save (valid 기준)
        mask_best_model = mask_model
        path = './mask_model/'
        torch.save(mask_best_model.state_dict(), f'{path}model{val_mean_acc:2.2f}_epoch_{e}.pth')
    else:
        # early stopping    
        valid_early_stop += 1
        if valid_early_stop >= EARLY_STOPPING_EPOCH:
            print("EARLY STOPPING!!")
            break

    lr_sched.step()

Iteration   0 | Train Loss  1.0621 | Classifier Accuracy 50.00
Iteration  50 | Train Loss  1.1816 | Classifier Accuracy 50.00
Iteration 100 | Train Loss  1.1725 | Classifier Accuracy 56.25
Iteration 150 | Train Loss  0.7348 | Classifier Accuracy 78.12
Iteration 200 | Train Loss  1.0063 | Classifier Accuracy 59.38
Iteration 250 | Train Loss  1.1035 | Classifier Accuracy 56.25
Iteration 300 | Train Loss  0.7957 | Classifier Accuracy 81.25
Iteration 350 | Train Loss  1.0444 | Classifier Accuracy 65.62
Iteration 400 | Train Loss  0.7279 | Classifier Accuracy 78.12
Iteration 450 | Train Loss  0.6462 | Classifier Accuracy 84.38

[Summary] Elapsed time : 3 m 21 s
Train Loss Mean 0.8234 | Accuracy 71.29 
Valid Loss Mean 0.8213 | Accuracy 70.90 

Iteration   0 | Train Loss  0.7477 | Classifier Accuracy 75.00
Iteration  50 | Train Loss  0.9708 | Classifier Accuracy 62.50
Iteration 100 | Train Loss  0.7798 | Classifier Accuracy 75.00
Iteration 150 | Train Loss  0.8133 | Classifier Accuracy 75.00


### 성별 분류

In [62]:
optimizer = optim.RAdam(gender_model.parameters(), lr=learning_rate, betas=betas, weight_decay=weight_decay)
lr_sched = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=T_max)

num_epochs = 30

valid_early_stop = 0
valid_best_loss = float('inf')
EARLY_STOPPING_EPOCH = 5
since = time.time()

final_train_loss = []
final_train_acc = []
final_valid_loss = []
final_valid_acc = []

for e in range(num_epochs) :
    print(f' ====================== epoch %d ======================' % (e+1) )
    train_loss_list = []
    train_acc_list = []

    # train
    gender_model.train()
    for i, (images, targets) in enumerate(gender_train) : 
        optimizer.zero_grad()

        images = images.to(device)
        targets = targets.to(device)

        scores = gender_model(images)
        _, preds = scores.max(dim=1)

        loss = F.cross_entropy(scores, targets)
        loss.backward()
        optimizer.step()

        correct = sum(targets == preds).cpu()
        acc=(correct/32 * 100)

        train_loss_list.append(loss)
        train_acc_list.append(acc)

        if i % 50 == 0 :
            print(f'Iteration %3.d | Train Loss  %.4f | Classifier Accuracy %2.2f' % (i, loss, acc))

    train_mean_loss = np.mean(train_loss_list, dtype="float64")
    train_mean_acc = np.mean(train_acc_list, dtype="float64")

    final_train_loss.append(train_mean_loss)
    final_train_acc.append(train_mean_acc)

    epoch_time = time.time() - since
    since = time.time()

    print('')
    print(f'[Summary] Elapsed time : %.0f m %.0f s' % (epoch_time // 60, epoch_time % 60))
    print(f'Train Loss Mean %.4f | Accuracy %2.2f ' % (train_mean_loss, train_mean_acc) )

    # validation 
    gender_model.eval()
    valid_loss_list = []
    valid_acc_list = []
    for i, (images, targets) in enumerate(gender_valid) : 
        optimizer.zero_grad()
        images = images.to(device=device)
        targets = targets.to(device=device)

        with torch.no_grad():
            scores = gender_model(images)
            loss = F.cross_entropy(scores, targets)
            _, preds = scores.max(dim=1)

        correct = sum(targets == preds).cpu()
        acc=(correct/32 * 100)

        valid_loss_list.append(loss)
        valid_acc_list.append(acc)

    val_mean_loss = np.mean(valid_loss_list, dtype="float64")
    val_mean_acc = np.mean(valid_acc_list, dtype="float64")

    final_valid_loss.append(val_mean_loss)
    final_valid_acc.append(val_mean_acc)

    print(f'Valid Loss Mean %.4f | Accuracy %2.2f ' % (val_mean_loss, val_mean_acc) )
    print('')

    if val_mean_loss < valid_best_loss:
        valid_best_loss = val_mean_loss
        valid_early_stop = 0
        # new best model save (valid 기준)
        gender_best_model = gender_model
        path = './gender_model/'
        torch.save(gender_best_model.state_dict(), f'{path}model{val_mean_acc:2.2f}_epoch_{e}.pth')
    else:
        # early stopping    
        valid_early_stop += 1
        if valid_early_stop >= EARLY_STOPPING_EPOCH:
            print("EARLY STOPPING!!")
            break

    lr_sched.step()

Iteration   0 | Train Loss  0.7551 | Classifier Accuracy 40.62
Iteration  50 | Train Loss  0.5906 | Classifier Accuracy 62.50
Iteration 100 | Train Loss  0.4163 | Classifier Accuracy 87.50
Iteration 150 | Train Loss  0.2997 | Classifier Accuracy 90.62
Iteration 200 | Train Loss  0.2705 | Classifier Accuracy 90.62
Iteration 250 | Train Loss  0.1658 | Classifier Accuracy 96.88
Iteration 300 | Train Loss  0.3838 | Classifier Accuracy 84.38
Iteration 350 | Train Loss  0.2241 | Classifier Accuracy 90.62
Iteration 400 | Train Loss  0.2420 | Classifier Accuracy 90.62
Iteration 450 | Train Loss  0.2654 | Classifier Accuracy 90.62

[Summary] Elapsed time : 0 m 54 s
Train Loss Mean 0.3526 | Accuracy 85.19 
Valid Loss Mean 0.2117 | Accuracy 91.57 

Iteration   0 | Train Loss  0.2019 | Classifier Accuracy 90.62
Iteration  50 | Train Loss  0.2633 | Classifier Accuracy 90.62
Iteration 100 | Train Loss  0.1898 | Classifier Accuracy 93.75
Iteration 150 | Train Loss  0.2081 | Classifier Accuracy 90.62


Exception ignored in: <function tqdm.__del__ at 0x7fab6fc264c0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/tqdm/std.py", line 1128, in __del__
    self.close()
  File "/opt/conda/lib/python3.8/site-packages/tqdm/notebook.py", line 261, in close
    self.sp(bar_style='danger')
AttributeError: 'tqdm' object has no attribute 'sp'
Exception ignored in: <function tqdm.__del__ at 0x7fab6fc264c0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/tqdm/std.py", line 1128, in __del__
    self.close()
  File "/opt/conda/lib/python3.8/site-packages/tqdm/notebook.py", line 261, in close
    self.sp(bar_style='danger')
AttributeError: 'tqdm' object has no attribute 'sp'


Iteration 300 | Train Loss  0.1162 | Classifier Accuracy 93.75
Iteration 350 | Train Loss  0.1497 | Classifier Accuracy 93.75
Iteration 400 | Train Loss  0.1619 | Classifier Accuracy 90.62
Iteration 450 | Train Loss  0.1533 | Classifier Accuracy 93.75

[Summary] Elapsed time : 1 m 10 s
Train Loss Mean 0.1409 | Accuracy 94.18 
Valid Loss Mean 0.1263 | Accuracy 94.17 

Iteration   0 | Train Loss  0.1489 | Classifier Accuracy 96.88
Iteration  50 | Train Loss  0.1455 | Classifier Accuracy 90.62
Iteration 100 | Train Loss  0.0879 | Classifier Accuracy 96.88
Iteration 150 | Train Loss  0.0705 | Classifier Accuracy 93.75
Iteration 200 | Train Loss  0.0175 | Classifier Accuracy 100.00
Iteration 250 | Train Loss  0.1181 | Classifier Accuracy 93.75
Iteration 300 | Train Loss  0.2891 | Classifier Accuracy 87.50
Iteration 350 | Train Loss  0.0437 | Classifier Accuracy 100.00
Iteration 400 | Train Loss  0.1214 | Classifier Accuracy 93.75
Iteration 450 | Train Loss  0.0429 | Classifier Accuracy 96.8

### 나이 분류

In [64]:
optimizer = optim.RAdam(age_model.parameters(), lr=learning_rate, betas=betas, weight_decay=weight_decay)
lr_sched = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=T_max)

num_epochs = 30

valid_early_stop = 0
valid_best_loss = float('inf')
EARLY_STOPPING_EPOCH = 5
since = time.time()

final_train_loss = []
final_train_acc = []
final_valid_loss = []
final_valid_acc = []

for e in range(num_epochs) :
    print(f' ====================== epoch %d ======================' % (e+1) )
    train_loss_list = []
    train_acc_list = []

    # train
    age_model.train()
    for i, (images, targets) in enumerate(age_train) : 
        optimizer.zero_grad()

        images = images.to(device)
        targets = targets.to(device)

        scores = age_model(images)
        _, preds = scores.max(dim=1)

        loss = F.cross_entropy(scores, targets)
        loss.backward()
        optimizer.step()

        correct = sum(targets == preds).cpu()
        acc=(correct/32 * 100)

        train_loss_list.append(loss)
        train_acc_list.append(acc)

        if i % 50 == 0 :
            print(f'Iteration %3.d | Train Loss  %.4f | Classifier Accuracy %2.2f' % (i, loss, acc))

    train_mean_loss = np.mean(train_loss_list, dtype="float64")
    train_mean_acc = np.mean(train_acc_list, dtype="float64")

    final_train_loss.append(train_mean_loss)
    final_train_acc.append(train_mean_acc)

    epoch_time = time.time() - since
    since = time.time()

    print('')
    print(f'[Summary] Elapsed time : %.0f m %.0f s' % (epoch_time // 60, epoch_time % 60))
    print(f'Train Loss Mean %.4f | Accuracy %2.2f ' % (train_mean_loss, train_mean_acc) )

    # validation 
    age_model.eval()
    valid_loss_list = []
    valid_acc_list = []
    for i, (images, targets) in enumerate(age_valid) : 
        optimizer.zero_grad()
        images = images.to(device=device)
        targets = targets.to(device=device)

        with torch.no_grad():
            scores = age_model(images)
            loss = F.cross_entropy(scores, targets)
            _, preds = scores.max(dim=1)

        correct = sum(targets == preds).cpu()
        acc=(correct/32 * 100)

        valid_loss_list.append(loss)
        valid_acc_list.append(acc)

    val_mean_loss = np.mean(valid_loss_list, dtype="float64")
    val_mean_acc = np.mean(valid_acc_list, dtype="float64")

    final_valid_loss.append(val_mean_loss)
    final_valid_acc.append(val_mean_acc)

    print(f'Valid Loss Mean %.4f | Accuracy %2.2f ' % (val_mean_loss, val_mean_acc) )
    print('')

    if val_mean_loss < valid_best_loss:
        valid_best_loss = val_mean_loss
        valid_early_stop = 0
        # new best model save (valid 기준)
        age_best_model = age_model
        path = './age_model/'
        torch.save(age_best_model.state_dict(), f'{path}model{val_mean_acc:2.2f}_epoch_{e}.pth')
    else:
        # early stopping    
        valid_early_stop += 1
        if valid_early_stop >= EARLY_STOPPING_EPOCH:
            print("EARLY STOPPING!!")
            break

    lr_sched.step()

Iteration   0 | Train Loss  1.3526 | Classifier Accuracy 12.50
Iteration  50 | Train Loss  0.6562 | Classifier Accuracy 81.25
Iteration 100 | Train Loss  0.5637 | Classifier Accuracy 81.25
Iteration 150 | Train Loss  0.3235 | Classifier Accuracy 96.88
Iteration 200 | Train Loss  0.4422 | Classifier Accuracy 81.25
Iteration 250 | Train Loss  0.3524 | Classifier Accuracy 84.38
Iteration 300 | Train Loss  0.4074 | Classifier Accuracy 87.50
Iteration 350 | Train Loss  0.4893 | Classifier Accuracy 78.12
Iteration 400 | Train Loss  0.4263 | Classifier Accuracy 78.12
Iteration 450 | Train Loss  0.3068 | Classifier Accuracy 90.62

[Summary] Elapsed time : 0 m 52 s
Train Loss Mean 0.5144 | Accuracy 80.71 
Valid Loss Mean 0.3538 | Accuracy 86.87 

Iteration   0 | Train Loss  0.3588 | Classifier Accuracy 81.25
Iteration  50 | Train Loss  0.3165 | Classifier Accuracy 87.50
Iteration 100 | Train Loss  0.3980 | Classifier Accuracy 87.50
Iteration 150 | Train Loss  0.3715 | Classifier Accuracy 84.38


## 4. Inference

### 마스크 착용 여부 분류

In [65]:
mask_best_model.eval()
# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
mask_predictions = []
for images in test_loader:
    with torch.no_grad():
        images = images.to(device)
        scores = mask_best_model(images)
        preds = scores.argmax(dim=-1)
        mask_predictions.extend(preds.cpu().numpy())

In [66]:
len(mask_predictions)

12600

In [67]:
from collections import Counter
Counter(mask_predictions)

Counter({0: 12599, 1: 1})

* 마스크는 아직도 문제다..문제..

### 성별 분류

In [68]:
gender_best_model.eval()
# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
gender_predictions = []
for images in test_loader:
    with torch.no_grad():
        images = images.to(device)
        scores = gender_best_model(images)
        preds = scores.argmax(dim=-1)
        gender_predictions.extend(preds.cpu().numpy())

In [69]:
len(gender_predictions)

12600

In [70]:
from collections import Counter
Counter(gender_predictions)

Counter({0: 6210, 1: 6390})

* 성별 비율이 비슷해졌다!

### 나이 분류

In [71]:
age_best_model.eval()
# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
age_predictions = []
for images in test_loader:
    with torch.no_grad():
        images = images.to(device)
        scores = age_best_model(images)
        preds = scores.argmax(dim=-1)
        age_predictions.extend(preds.cpu().numpy())

In [72]:
len(age_predictions)

12600

In [73]:
from collections import Counter
Counter(age_predictions)

Counter({0: 7843, 2: 1856, 1: 2901})

* 나이도 60대 이상 예측이 아직 적긴하지만 나름 분포와 비슷하게 나온 것 같다!

### 최종 클래스 선정

In [74]:
all_predictions = []
size = len(submission)
class_map = np.array([[[0, 1, 2],
                       [3, 4, 5]],
                      [[6, 7, 8],
                       [9, 10, 11]],
                      [[12, 13, 14],
                       [15, 16, 17]]])
for idx in range(size):
    i = mask_predictions[idx]
    j = gender_predictions[idx]
    k = age_predictions[idx]
    all_predictions.append(class_map[i][j][k])

In [75]:
len(all_predictions)

12600

In [76]:
from collections import Counter
Counter(all_predictions)

Counter({0: 3743, 2: 1122, 3: 4100, 1: 1344, 4: 1557, 5: 733, 8: 1})

In [77]:
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'submission_baseline_pretrained.csv'), index=False)
print('test inference is done!')

test inference is done!
