In [1]:
# !pip install GPUtil

In [21]:
import os
import datetime as dt
from pytz import timezone

import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize, PILToTensor
from torchvision.transforms import RandomCrop, RandomRotation, RandomHorizontalFlip

from GPUtil import showUtilization as gpu_usage

from tqdm import tqdm

In [12]:

# Flexible integration for any Python script
import wandb

# 1. Start a W&B run
wandb.init(project='seunghun', entity='13ai')

# 2. Save model inputs and hyperparameters

# Model training here

# 3. Log metrics over time to visualize performance

    

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
average accuracy,80.61691
_runtime,91.0
_timestamp,1629955294.0
_step,1.0
average loss,80.61691


0,1
average accuracy,▁
_runtime,▁▁
_timestamp,▁▁
_step,▁█
average loss,▁



CondaEnvException: Unable to determine environment

Please re-run this command with one of the following options:

* Provide an environment name via --name or -n
* Re-run this command inside an activated conda environment.



In [15]:
### 마스크 여부, 성별, 나이를 mapping할 클래스를 생성합니다.

class MaskLabels:
    mask = 0
    incorrect = 1
    normal = 2

class GenderLabels:
    male = 0
    female = 1

class AgeGroup:
    map_label = lambda x: 0 if int(x) < 30 else 1 if int(x) < 60 else 2

    
### Define Dataset
class MaskBaseDataset(Dataset):
    """
    1. label 이 (0, 1, 0) 과 같은 형식으로 나옴.
    >>> MaskBaseDataset(img_dir, transform=transform, split_labels=True)
    2. label 이 3 과 같은 형식으로 나옴.
    >>> MaskBaseDataset(img_dir, transform=transform)
    """
    num_classes = 3 * 2 * 3

    _file_names = {
        "mask1.jpg": MaskLabels.mask,
        "mask2.jpg": MaskLabels.mask,
        "mask3.jpg": MaskLabels.mask,
        "mask4.jpg": MaskLabels.mask,
        "mask5.jpg": MaskLabels.mask,
        "incorrect_mask.jpg": MaskLabels.incorrect,
        "normal.jpg": MaskLabels.normal
    }

    image_paths = []
    mask_labels = []
    gender_labels = []
    age_labels = []

    def __init__(self, img_dir, transform=None, split_labels:bool=False):
        """
        MaskBaseDataset을 initialize 합니다.

        Args:
            img_dir: 학습 이미지 폴더의 root directory 입니다.
            transform: Augmentation을 하는 함수입니다.
        """
        self.img_dir = img_dir
#         self.mean = mean
#         self.std = std
        self.mean = 0.5
        self.std = 0.2
        self.transform = transform
        self.split_labels = split_labels

        self.setup()

    def set_transform(self, transform):
        """
        transform 함수를 설정하는 함수입니다.
         - train_dataset, validation_dataset, test_dataset 이 정해지면 그 때 따로 설정한다.
        """
        self.transform = transform
        
    def setup(self):
        """
        image의 경로와 각 이미지들의 label을 계산하여 저장해두는 함수입니다.
        """
        profiles = os.listdir(self.img_dir)
        for profile in profiles:
            for file_name, mask_label in self._file_names.items():
                img_path = os.path.join(self.img_dir, profile, file_name)  # (resized_data, 000004_male_Asian_54, mask1.jpg)
                if os.path.exists(img_path):
                    self.image_paths.append(img_path)
                    self.mask_labels.append(mask_label)

                    id, gender, race, age = profile.split("_")
                    gender_label = getattr(GenderLabels, gender)
                    age_label = AgeGroup.map_label(age)

                    self.gender_labels.append(gender_label)
                    self.age_labels.append(age_label)

    def __getitem__(self, index):
        """
        데이터를 불러오는 함수입니다. 
        데이터셋 class에 데이터 정보가 저장되어 있고, index를 통해 해당 위치에 있는 데이터 정보를 불러옵니다.
        
        Args:
            index: 불러올 데이터의 인덱스값입니다.
        """
        # 이미지를 불러옵니다.
        image_path = self.image_paths[index]
        image = Image.open(image_path)
        
        # 이미지를 Augmentation 시킵니다.
        image_transform = self.transform(image)
        
        # 레이블을 불러옵니다.
        mask_label = self.mask_labels[index]
        gender_label = self.gender_labels[index]
        age_label = self.age_labels[index]

        
        if not self.split_labels:
            multi_class_label = mask_label * 6 + gender_label * 3 + age_label
            return image_transform, multi_class_label
        else:
            return image_transform, (mask_label, gender_label, age_label)

    def __len__(self):
        return len(self.image_paths)

In [16]:
## DataLoader

RAW_ROW_SIZE = 512
RAW_COL_SIZE = 384
PROPORTION = 0.7

ROW_SIZE = 128
COL_SIZE = 128


# random rotation & resize 적용한 train dataset 2
train_transform = transforms.Compose([
    RandomCrop(size=(int(RAW_ROW_SIZE*PROPORTION), 
                     int(RAW_COL_SIZE*PROPORTION))),
    RandomHorizontalFlip(p=0.5),
    Resize((ROW_SIZE, COL_SIZE), Image.BILINEAR),
    ToTensor(),
    Normalize(mean=(0.5, 0.5, 0.5), 
              std=(0.2, 0.2, 0.2))
])

val_transform = transforms.Compose([
    Resize((ROW_SIZE, COL_SIZE), Image.BILINEAR),
    ToTensor(),
    Normalize(mean=(0.5, 0.5, 0.5), 
              std=(0.2, 0.2, 0.2))
])

train_dir = '/opt/ml/input/data/train'
img_dir = os.path.join(train_dir, "images")

dataset = MaskBaseDataset(img_dir=img_dir,
                          split_labels=False)

n_val = int(len(dataset) * 0.2)
n_train = len(dataset) - n_val

train_dataset, val_dataset = torch.utils.data.random_split(dataset, 
                                                           [n_train, n_val])

train_dataset.dataset.set_transform(train_transform)
val_dataset.dataset.set_transform(val_transform)

train_data_loader = DataLoader(train_dataset,
                                batch_size=64,
                                shuffle=True)
val_data_loader = DataLoader(val_dataset,
                                batch_size=64,
                                shuffle=True)

In [15]:
a_data = next(iter(train_data_loader))
print(a_data[0].size())
print(a_data[1].size())

torch.Size([64, 3, 128, 128])
torch.Size([64])


---

# 모델 트레이닝

In [16]:
# ## Example

# class MyModel(nn.Module):
#     def __init__(self, num_classes: int = 1000):
#         super(MyModel, self).__init__()
#         self.features = nn.Sequential(
#             nn.Conv2d(in_channels=3, out_channels=64, 
#                       kernel_size=11, stride=4, padding=2),
#             nn.BatchNorm2d(64),
#             nn.ReLU(inplace=True),
#         )
#         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
#         self.classifier = nn.Sequential(
#             nn.Dropout(),
#             nn.Linear(64, 32),
#             nn.ReLU(inplace=True),
#             nn.Linear(32, num_classes),
#         )

#     def forward(self, x: torch.Tensor) -> torch.Tensor:
#         x = self.features(x)
#         x = self.avgpool(x)
#         x = torch.flatten(x, 1)
#         x = self.classifier(x)
#         return x 

In [17]:
# class MyModel(nn.Module):
#     def __init__(self, num_classes: int = 18):
#         super(MyModel, self).__init__()
#         self.layer1 = nn.Sequential(
#             nn.Conv2d(in_channels=3, out_channels=48, 
#                       kernel_size=11, stride=4, padding=2),
#             nn.BatchNorm2d(48),
#             nn.ReLU(inplace=True),
#             nn.maxpool((5, 5))
#         )
        
#         self.layer2 = nn.Sequential(
#             nn.Conv2d(in_channels=48, out_channels=96, 
#                       kernel_size=11, stride=4, padding=2),
#             nn.BatchNorm2d(96),
#             nn.ReLU(inplace=True),
#             nn.maxpool((3, 3))
#         )
        
#         self.layer3 = nn.Sequential(
#             nn.Conv2d(in_channels=96, out_channels=48, 
#                       kernel_size=11, stride=4, padding=2),
#             nn.BatchNorm2d(48),
#             nn.ReLU(inplace=True),
#             nn.maxpool((3, 3))
#         )
        
# #         self.fc = nn.Sequential(
# #             nn.
# #         )
        

In [18]:
def f1_loss(y_true:torch.Tensor, y_pred:torch.Tensor, is_training=False) -> torch.Tensor:
    '''Calculate F1 score. Can work with gpu tensors
    
    The original implmentation is written by Michal Haltuf on Kaggle.
    
    Returns
    -------
    torch.Tensor
        `ndim` == 1. 0 <= val <= 1
    
    Reference
    ---------
    - https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric
    - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score
    - https://discuss.pytorch.org/t/calculating-precision-recall-and-f1-score-in-case-of-multi-label-classification/28265/6
    
    '''
    assert y_true.ndim == 1
    assert y_pred.ndim == 1 or y_pred.ndim == 2
    
    if y_pred.ndim == 2:
        y_pred = y_pred.argmax(dim=1)
        
    
    tp = (y_true * y_pred).sum().to(torch.float32)
    tn = ((1 - y_true) * (1 - y_pred)).sum().to(torch.float32)
    fp = ((1 - y_true) * y_pred).sum().to(torch.float32)
    fn = (y_true * (1 - y_pred)).sum().to(torch.float32)
    
    epsilon = 1e-7
    
    precision = tp / (tp + fp + epsilon)
    recall = tp / (tp + fn + epsilon)
    
    f1 = 2 * (precision*recall) / (precision + recall + epsilon)
    f1.requires_grad = is_training
    return f1

In [None]:
# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
# model = MyModel(num_classes=18).to(device)
# model = CNN().to(device)

# model = torch.load("./alexnet_before_fine_tune_210825_18h").to(device)
model = torch.load("./resnet18_before_fine_tune").to(device)
dtype = torch.float
ltype = torch.long # entropy

# model.train()

optimizer = optim.Adam(model.parameters(), lr=1e-3)
lr_sched = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

max_validation_acc = -1
min_validation_loss = 1e+10
max_f1_score = -1

num_epochs = 1000

for e in range(num_epochs) :
    ## Train
    model.train()
    print(f' ====================== epoch %d ======================' % (e+1) )
    train_loss_list = []
    train_acc_list = []

    avg_acc = 0.0
    avg_loss = 0.0
    
#     for i, (images, targets) in tqdm(enumerate(train_data_loader)):
    for i, (images, targets) in enumerate(train_data_loader): 
        optimizer.zero_grad()

        images = images.to(device, dtype)
        targets = targets.to(device, ltype)
        scores = model(images)
        _, preds = scores.max(dim=1)

        loss = F.cross_entropy(scores, targets)
        avg_loss += loss
        loss.backward()
        optimizer.step()

        correct = sum(targets == preds).cpu()
        acc = (correct / len(targets) * 100)
        avg_acc += acc
        
        if (i+1) % 20 == 0 :
            print(f'Iteration %3.d | Train Loss  %.4f | Class Acc(train) %2.2f' 
                  % (i+1, loss, acc))
        
        del images, targets
    
    torch.cuda.empty_cache()
        
    avg_acc = avg_acc / (i+1)
    avg_loss = avg_loss / (i+1)

    wandb.log({"average accuracy" : avg_acc})
    wandb.log({"average loss" : avg_acc})
    
    # train 이후 gpu memory 확인
    gpu_usage()
    print()
    
    ## Validation    
    avg_validaion_acc = 0.0
    avg_validaion_loss = 0.0
    avg_valid_f1_score = 0.0
    
    with torch.no_grad():
        model.eval()
#         for i, (images, targets) in tqdm(enumerate(val_data_loader)): 
        for i, (images, targets) in enumerate(val_data_loader):
            images = images.to(device, dtype)
            targets = targets.to(device, ltype)
            scores = model(images)
            _, preds = scores.max(dim=1)

            loss = F.cross_entropy(scores, targets)
            avg_validaion_loss += loss

            correct = sum(targets == preds).cpu()
            acc = (correct / len(targets) * 100)
            avg_validaion_acc += acc
            
            valid_f1_score = f1_loss(y_true=targets, y_pred=preds) / len(targets)
            avg_valid_f1_score += valid_f1_score
            
            if (i+1) % 20 == 0 :
                print(f'Iteration %3.d | validation Loss  %.4f | Class Acc(validation) %2.2f | f1 score %2.2f' 
                      % (i+1, loss, acc, valid_f1_score))

            del images, targets

        torch.cuda.empty_cache()
        
    # valid 이후 gpu memory 확인
    gpu_usage()
    print()

    avg_validaion_loss = avg_validaion_loss / (i+1)
    avg_validaion_acc = avg_validaion_acc / (i+1)
    avg_valid_f1_score = avg_valid_f1_score / (i+1)
    
    print(f" >>> average validation accuracy : {avg_validaion_acc:5.2f}")
    print(f" >>>     average validation loss : {avg_validaion_loss:5.2f}")
    print(f" >>> average validation f1 score : {avg_valid_f1_score:5.2f}")

    # 성능 향상으로 보고 모델 저장
    if avg_validaion_acc > max_validation_acc and avg_validaion_loss < min_validation_loss and avg_valid_f1_score > max_f1_score:
        year = dt.datetime.now().astimezone(timezone("Asia/Seoul")).year
        month = dt.datetime.now().astimezone(timezone("Asia/Seoul")).month
        day = dt.datetime.now().astimezone(timezone("Asia/Seoul")).day
        hour = dt.datetime.now().astimezone(timezone("Asia/Seoul")).hour
        minute = dt.datetime.now().astimezone(timezone("Asia/Seoul")).minute
        second = dt.datetime.now().astimezone(timezone("Asia/Seoul")).second
        
        # 모델 세이브 파일 이름 컨벤션 논의!
        print("\n !!! Model Saved !!!")
        print(f"     >>> accuracy : from {max_validation_acc:.2f} to {avg_validaion_acc:.2f}")
        print(f"     >>>     loss : from {min_validation_loss:.2f} to {avg_validaion_loss:.2f}")
        print(f"     >>> f1 score : from {max_f1_score:.2f} to {avg_valid_f1_score:.2f}\n")
        torch.save(model, f"./model_{e}_{int(avg_validaion_acc)}_{year}{month}{day}_{hour}h{minute}m{second}s")
        max_validation_acc, min_validation_loss, max_f1_score = avg_validaion_acc, avg_validaion_loss, avg_valid_f1_score
        
    wandb.log({"average validation accuracy" : avg_validaion_acc})
    wandb.log({"average validation loss" : avg_validaion_loss})
    wandb.log({"average validation f1 score" : avg_valid_f1_score})
    
    # early stop 기능
    lr_sched.step()
    
#     torch.save(model, "./model_backup")
# torch.save(model, f"./model_{year}{month}{day}_{hour}h{minute}m{second}s")

Iteration  20 | Train Loss  1.2391 | Class Acc(train) 65.62
Iteration  40 | Train Loss  0.5432 | Class Acc(train) 78.12
Iteration  60 | Train Loss  0.4129 | Class Acc(train) 84.38
Iteration  80 | Train Loss  0.6504 | Class Acc(train) 71.88
Iteration 100 | Train Loss  0.5043 | Class Acc(train) 84.38
Iteration 120 | Train Loss  0.3235 | Class Acc(train) 90.62
Iteration 140 | Train Loss  0.4268 | Class Acc(train) 84.38
Iteration 160 | Train Loss  0.4416 | Class Acc(train) 78.12
Iteration 180 | Train Loss  0.5444 | Class Acc(train) 84.38
Iteration 200 | Train Loss  0.3394 | Class Acc(train) 84.38
Iteration 220 | Train Loss  0.3057 | Class Acc(train) 85.94
| ID | GPU | MEM |
------------------
|  0 | 33% |  5% |

Iteration  20 | validation Loss  0.5975 | Class Acc(validation) 79.69 | f1 score 0.15
Iteration  40 | validation Loss  0.7070 | Class Acc(validation) 76.56 | f1 score 0.13
| ID | GPU | MEM |
------------------
|  0 | 19% |  5% |

 >>> average validation accuracy : 83.50
 >>>     av

In [None]:
# # epoch 아래에 넣으면 early stop
# EARLY_STOPPING_EPOCH = 5

# if val_mean_loss < valid_best_loss:
#     valid_best_loss = val_mean_loss
#     valid_early_stop = 0
#     # new best model save (valid 기준)
#     best_model = model
#     path = './model/'
#     torch.save(best_model.state_dict(), f'{path}model{val_mean_acc:2.2f}_epoch_{e}.pth')
# else:
#     # early stopping    
#     valid_early_stop += 1
#     if valid_early_stop >= EARLY_STOPPING_EPOCH:
#         print("EARLY STOPPING!!")
#         break

# lr_sched.step()


In [12]:
iter(range(10000))

<range_iterator at 0x7f65383430c0>

In [23]:
# for data in tqdm(train_data_loader):
#     print(data)

In [17]:
type(train_data_loader)

torch.utils.data.dataloader.DataLoader