In [95]:
import os
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
import torch
import torch.nn as nn
from torch import Tensor
from torch.utils.data import Dataset, DataLoader, sampler
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision.transforms as T
from torchvision.transforms import Compose, Resize, ToTensor, CenterCrop, Normalize, ToPILImage, RandomHorizontalFlip
from torchvision.transforms.functional import crop
import copy
from tqdm import tqdm

from sklearn.metrics import f1_score
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [96]:
torch.cuda.empty_cache()

In [97]:
train_dir = '/opt/ml/input/data/train'
train_image_dir = os.path.join(train_dir, 'images')
train_info = pd.read_csv(os.path.join(train_dir, 'train_labeled.csv'))
test_dir = '/opt/ml/input/data/eval'
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
test_image_dir = os.path.join(test_dir, 'images')
test_image_paths = [os.path.join(test_image_dir, img_id) for img_id in submission.ImageID]

In [98]:
idx_num = train_info[train_info['stem']=='.ipynb_checkpoints'].index
train_info = train_info.drop(idx_num)

train_image = list(train_info['img_path'])
train_label = list(train_info['label'])

In [99]:
# image = Image.open(train_image[2554])
# image = T.functional.crop(image, 128,96,256,192)
# display(image)

In [100]:
#Gender 클래스와 age 클래스 수치화
# gender_list = []
# for each_gender in train_info['gender']:
#     if each_gender=='male':
#         gender_list.append(0)
#     else:
#         gender_list.append(1)
# train_info['gender']=pd.Series(gender_list)
# age_list = []
# for each_age in train_info['age']:
#     if each_age<30:
#         age_list.append(0)
#     elif 30<=each_age<60:
#         age_list.append(1)
#     else:
#         age_list.append(2)
# train_info['age']=pd.Series(age_list)

In [101]:
# #train_image 리스트에는 각 사진들의 path가 담기고, train_label에는 각 사람의 클래스 정보가 담김(0~17)
# train_image = []
# train_label = []
# for idx in range(len(train_info['id'])):
#     id, gender, race, age, path = train_info.iloc[idx]
#     root = os.path.join(train_image_dir, path)
#     dirpath, dirnames, filenames = next(os.walk(root))
#     for name in filenames:
#         if name[0]=='i':
#             label = 0
#         elif name[0]=='m':
#             label = 1
#         elif name[0]=='n':
#             label = 2
#         else:
#             continue
#         train_image.append(os.path.join(root, name))
#         train_label.append((label, gender, age))
# for idx in range(len(train_label)):
#     if train_label[idx]==(1,0,0):
#         train_label[idx]=0
#     elif train_label[idx]==(1,0,1):
#         train_label[idx]=1
#     elif train_label[idx]==(1,0,2):
#         train_label[idx]=2
#     elif train_label[idx]==(1,1,0):
#         train_label[idx]=3
#     elif train_label[idx]==(1,1,1):
#         train_label[idx]=4
#     elif train_label[idx]==(1,1,2):
#         train_label[idx]=5
#     elif train_label[idx]==(0,0,0):
#         train_label[idx]=6
#     elif train_label[idx]==(0,0,1):
#         train_label[idx]=7
#     elif train_label[idx]==(0,0,2):
#         train_label[idx]=8
#     elif train_label[idx]==(0,1,0):
#         train_label[idx]=9
#     elif train_label[idx]==(0,1,1):
#         train_label[idx]=10
#     elif train_label[idx]==(0,1,2):
#         train_label[idx]=11
#     elif train_label[idx]==(2,0,0):
#         train_label[idx]=12
#     elif train_label[idx]==(2,0,1):
#         train_label[idx]=13
#     elif train_label[idx]==(2,0,2):
#         train_label[idx]=14
#     elif train_label[idx]==(2,1,0):
#         train_label[idx]=15
#     elif train_label[idx]==(2,1,1):
#         train_label[idx]=16
#     elif train_label[idx]==(2,1,2):
#         train_label[idx]=17

In [102]:
transform = T.Compose([T.CenterCrop(224), 
                       T.ToTensor(), 
                       T.Normalize(mean=(0.55800916,0.51224077,0.47767341), std=(0.21817792,0.23804603,0.25183411))
                      ])
transform_weak = T.Compose([T.CenterCrop(224),  
                            T.RandomHorizontalFlip(p=0.5), 
                            T.RandomRotation(degrees=(-30, 30)),  
                            T.ToTensor(), 
                            T.Normalize(mean=(0.55800916,0.51224077,0.47767341), std=(0.21817792,0.23804603,0.25183411))
                           ])

In [103]:
class TrainDataset(Dataset):
    def __init__(self, img_paths, label, transform, transform_weak, weak_flag = True):
        self.img_paths = img_paths
        self.transform = transform
        self.transform_weak = transform_weak
        self.label = label
        self.classes = pd.Series(self.label).unique()
        self.weak_flag = weak_flag

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])
        y = self.label[index]
        
        if self.transform:
            if self.weak_flag:
                if y in [2,5,6,7,8,9,10,11,12,13,14,15,16,17]:
                    image = self.transform_weak(image)
                else:
                    image = self.transform(image)
            else:
                image = self.transform(image)
        return image, torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.img_paths)

In [104]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])
        #image = T.functional.crop(image, 128,96,256,192)
        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [105]:
from sklearn.model_selection import train_test_split
import random

random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
np.random.seed(random_seed)
random.seed(random_seed)

image_train, image_valid, label_train, label_valid = train_test_split(train_image, train_label, train_size=0.8, shuffle=True, random_state=random_seed, stratify=train_label)
Dataset_Train = TrainDataset(img_paths = image_train, label = label_train, transform = transform, transform_weak = transform_weak, weak_flag = True)
Dataset_Valid = TrainDataset(img_paths = image_valid, label = label_valid, transform = transform, transform_weak = transform_weak, weak_flag = False)
Dataset_Test = TestDataset(img_paths = test_image_paths, transform = transform)
# Dataset_Train = TrainDataset(img_paths = train_image, label = train_label, transform = transform)
# Dataset_Test = TestDataset(img_paths = test_image_paths, transform = transform)

In [106]:
# class SmoothCrossEntropy(nn.Module):
#     def __init__(self, alpha=0.1):
#         super(SmoothCrossEntropy, self).__init__()
#         self.alpha = alpha

#     def forward(self, logits, labels):
#         num_classes = logits.shape[-1]
#         alpha_div_k = self.alpha / num_classes
#         target_probs = F.one_hot(labels, num_classes=num_classes).float() * \
#             (1. - self.alpha) + alpha_div_k
#         loss = -(target_probs * torch.log_softmax(logits, dim=-1)).sum(dim=-1)
#         return loss.mean()

In [107]:
import timm
model_name = 'vit_tiny_patch16_224'

model = timm.create_model(model_name, pretrained=True, num_classes=18).to(device)
#model.head = nn.Sequential(nn.Linear(in_features=192, out_features=18, bias=True), nn.Dropout(0.1))
#model.to(device)
best_model_state = None
early_stop = 8
learning_rate = 1e-4
momentum = 0.9

if best_model_state is not None:
    model.load_state_dict(best_model_state)

num_epochs = 50

class_num = [2745, 2050, 415, 3660, 4085, 545, 549, 410, 83, 732, 817, 109, 549, 410, 83, 732, 817, 109]
class_weight = torch.tensor(np.max(class_num) / class_num).to(device=device, dtype=torch.float)
criterion = nn.CrossEntropyLoss(weight=class_weight)

feature_extractor = [m for n, m in model.named_parameters() if "head" not in n]
classifier = [p for p in model.head.parameters()]
params = [
    {"params": feature_extractor, "lr": learning_rate * 0.2},
    {"params": classifier, "lr": learning_rate}
]
optimizer = optim.AdamW(params, lr=learning_rate)

# for param in model.parameters():
#     param.requires_grad = False
# for param in model.head.parameters():
#     param.requires_grad = True
    
#optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
# optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
scheduler = CosineAnnealingLR(optimizer, T_max=10, eta_min=0)

batch_size = 64
# dataloaders_train = DataLoader(dataset=Dataset_Train, batch_size=batch_size, sampler=sampler.SubsetRandomSampler(range(0, len(Dataset_Train) * 4//5)), num_workers = 2)
# dataloaders_valid = DataLoader(dataset=Dataset_Train, batch_size=batch_size, sampler=sampler.SubsetRandomSampler(range(len(Dataset_Train) * 4//5, len(Dataset_Train))), num_workers = 2)
dataloaders_train = DataLoader(dataset=Dataset_Train, batch_size=batch_size, shuffle=True, num_workers = 2)
dataloaders_valid = DataLoader(dataset=Dataset_Valid, batch_size=batch_size, shuffle=True, num_workers = 2)
dataloaders_test = DataLoader(Dataset_Test, shuffle=False)

In [108]:
model

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 192, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=192, out_features=576, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=192, out_features=192, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=192, out_features=768, bias=True)
        (act): GELU()
        (fc2): Linear(in_features=768, out_features=192, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
    (1): Block(
      (norm1): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (attn): 

In [109]:
from tqdm import tqdm
best_f1 = 0
early_stop_count = 0

for epoch in range(num_epochs):
    print('*** Epoch {} ***'.format(epoch))
    
    iter_train_loss = []
    iter_valid_loss = []
    iter_train_acc = []
    iter_valid_acc = []
    iter_valid_f1 = []

    # Training
    model.train()  
        
    for idx, (inputs, labels) in tqdm(enumerate(dataloaders_train)):
      inputs = inputs.to(device)
      labels = labels.to(device)

      # zero the parameter gradients
      optimizer.zero_grad()

      # forward
      with torch.set_grad_enabled(True):
        outputs= model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        # statistics
        iter_train_loss.append(loss.cpu().item())
        train_pred_c = outputs.argmax(dim=-1)
        iter_train_acc.extend((train_pred_c == labels).cpu().tolist())

    # Validation
    model.eval()  
    
    for idx, (inputs, labels) in tqdm(enumerate(dataloaders_valid)):
      inputs = inputs.to(device)
      labels = labels.to(device)

      with torch.set_grad_enabled(False):
        outputs = model(inputs)
        # statistics
        valid_loss = criterion(outputs, labels)
        iter_valid_loss.append(valid_loss.cpu().item())
        valid_pred_c = outputs.argmax(dim=-1)
        iter_valid_acc.extend((valid_pred_c == labels).cpu().tolist())
        iter_f1_score = f1_score(y_true=labels.cpu().numpy(), y_pred=valid_pred_c.cpu().numpy(), average="macro")
        iter_valid_f1.append(iter_f1_score)

    # statistics
    epoch_train_loss = np.mean(iter_train_loss)
    epoch_valid_loss = np.mean(iter_valid_loss)
    epoch_train_acc = np.mean(iter_train_acc) * 100
    epoch_valid_acc = np.mean(iter_valid_acc) * 100
    epoch_valid_f1_score = np.mean(iter_valid_f1)
    
    scheduler.step()
    
    print(
            f"[Epoch {epoch}] "
            f"train loss : {epoch_train_loss:.4f} | train acc : {epoch_train_acc:.2f}% "
            f"valid loss : {epoch_valid_loss:.4f} | valid acc : {epoch_valid_acc:.2f}% | valid f1 score : {epoch_valid_f1_score:.4f}"
        )
    
    if epoch_valid_f1_score > best_f1:
        best_f1 = epoch_valid_f1_score
        best_model_state = model.state_dict()
        early_stop_count = 0
    else:
        early_stop_count += 1

    if early_stop_count == early_stop:
        print("early stoped." + " " * 30)
        break

*** Epoch 0 ***


237it [01:00,  3.92it/s]
60it [00:11,  5.12it/s]

[Epoch 0] train loss : 0.9958 | train acc : 72.67% valid loss : 0.8284 | valid acc : 79.95% | valid f1 score : 0.6444
*** Epoch 1 ***



237it [00:59,  4.00it/s]
60it [00:11,  5.15it/s]

[Epoch 1] train loss : 0.4724 | train acc : 88.23% valid loss : 1.0402 | valid acc : 84.07% | valid f1 score : 0.6779
*** Epoch 2 ***



237it [01:00,  3.95it/s]
60it [00:11,  5.15it/s]

[Epoch 2] train loss : 0.3109 | train acc : 92.28% valid loss : 1.3780 | valid acc : 80.82% | valid f1 score : 0.6061
*** Epoch 3 ***



237it [01:00,  3.95it/s]
60it [00:11,  5.15it/s]

[Epoch 3] train loss : 0.2582 | train acc : 94.13% valid loss : 1.4975 | valid acc : 77.88% | valid f1 score : 0.5700
*** Epoch 4 ***



237it [00:59,  3.97it/s]
60it [00:11,  5.12it/s]

[Epoch 4] train loss : 0.1697 | train acc : 96.06% valid loss : 1.2477 | valid acc : 83.54% | valid f1 score : 0.6506
*** Epoch 5 ***



237it [01:00,  3.94it/s]
60it [00:11,  5.15it/s]

[Epoch 5] train loss : 0.1333 | train acc : 97.11% valid loss : 1.0782 | valid acc : 84.58% | valid f1 score : 0.6552
*** Epoch 6 ***



237it [00:59,  3.97it/s]
60it [00:11,  5.14it/s]

[Epoch 6] train loss : 0.0872 | train acc : 98.25% valid loss : 0.7351 | valid acc : 87.22% | valid f1 score : 0.7161
*** Epoch 7 ***



237it [00:59,  3.97it/s]
60it [00:11,  5.15it/s]

[Epoch 7] train loss : 0.0564 | train acc : 99.13% valid loss : 1.2477 | valid acc : 86.14% | valid f1 score : 0.6891
*** Epoch 8 ***



237it [01:00,  3.92it/s]
60it [00:11,  5.17it/s]

[Epoch 8] train loss : 0.0394 | train acc : 99.37% valid loss : 1.1015 | valid acc : 87.33% | valid f1 score : 0.7044
*** Epoch 9 ***



237it [00:59,  4.00it/s]
60it [00:11,  5.14it/s]

[Epoch 9] train loss : 0.0388 | train acc : 99.55% valid loss : 1.0878 | valid acc : 87.62% | valid f1 score : 0.7102
*** Epoch 10 ***



237it [00:59,  4.00it/s]
60it [00:11,  5.17it/s]

[Epoch 10] train loss : 0.0328 | train acc : 99.56% valid loss : 0.9798 | valid acc : 87.62% | valid f1 score : 0.7184
*** Epoch 11 ***



237it [00:59,  4.01it/s]
60it [00:11,  5.16it/s]

[Epoch 11] train loss : 0.0326 | train acc : 99.54% valid loss : 1.0961 | valid acc : 87.57% | valid f1 score : 0.7194
*** Epoch 12 ***



237it [00:59,  3.97it/s]
60it [00:11,  5.14it/s]

[Epoch 12] train loss : 0.0303 | train acc : 99.66% valid loss : 1.1016 | valid acc : 86.61% | valid f1 score : 0.6946
*** Epoch 13 ***



237it [00:59,  4.00it/s]
60it [00:11,  5.12it/s]

[Epoch 13] train loss : 0.0432 | train acc : 99.37% valid loss : 0.9927 | valid acc : 88.23% | valid f1 score : 0.7449
*** Epoch 14 ***



237it [01:00,  3.95it/s]
60it [00:11,  5.11it/s]

[Epoch 14] train loss : 0.0726 | train acc : 98.51% valid loss : 0.9944 | valid acc : 84.76% | valid f1 score : 0.6862
*** Epoch 15 ***



237it [01:00,  3.94it/s]
60it [00:11,  5.15it/s]

[Epoch 15] train loss : 0.1285 | train acc : 97.10% valid loss : 1.5957 | valid acc : 82.94% | valid f1 score : 0.6486
*** Epoch 16 ***



237it [01:00,  3.94it/s]
60it [00:11,  5.19it/s]

[Epoch 16] train loss : 0.1397 | train acc : 96.69% valid loss : 1.8311 | valid acc : 82.96% | valid f1 score : 0.6378
*** Epoch 17 ***



237it [00:59,  3.95it/s]
60it [00:11,  5.11it/s]

[Epoch 17] train loss : 0.1678 | train acc : 95.91% valid loss : 0.7651 | valid acc : 86.88% | valid f1 score : 0.7063
*** Epoch 18 ***



237it [00:59,  3.97it/s]
60it [00:11,  5.11it/s]

[Epoch 18] train loss : 0.1208 | train acc : 96.96% valid loss : 1.5816 | valid acc : 82.51% | valid f1 score : 0.6355
*** Epoch 19 ***



17it [00:04,  3.52it/s]


KeyboardInterrupt: 

In [64]:
model.load_state_dict(best_model_state)
model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in tqdm(dataloaders_test):
    with torch.no_grad():
        images = images.to(device)
        pred = model(images)
        _, output = torch.max(pred, 1)
        all_predictions.extend(output.cpu().numpy())
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'submission.csv'), index=False)
print('test inference is done!')

100%|██████████| 12600/12600 [03:09<00:00, 66.64it/s]


test inference is done!


In [95]:
from torchsummary import summary
summary(model, (3, 384, 384))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 192, 24, 24]         147,648
          Identity-2             [-1, 576, 192]               0
        PatchEmbed-3             [-1, 576, 192]               0
           Dropout-4             [-1, 577, 192]               0
         LayerNorm-5             [-1, 577, 192]             384
            Linear-6             [-1, 577, 576]         111,168
           Dropout-7          [-1, 3, 577, 577]               0
            Linear-8             [-1, 577, 192]          37,056
           Dropout-9             [-1, 577, 192]               0
        Attention-10             [-1, 577, 192]               0
         Identity-11             [-1, 577, 192]               0
        LayerNorm-12             [-1, 577, 192]             384
           Linear-13             [-1, 577, 768]         148,224
             GELU-14             [-1, 5