## 0. Libarary 불러오기

In [9]:
import os, glob
import time
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize
from torchvision.models import resnet50, resnext50_32x4d, resnet18, resnext101_32x8d
import timm
import albumentations as A

from torchsummary import summary
import torch_optimizer as optim
from torchsampler import ImbalancedDatasetSampler
from catalyst.data import BalanceClassSampler

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, KFold

In [2]:
%matplotlib inline 

In [3]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

print('using device:', device)

using device: cuda:0


## 1. 데이터셋 정의

### (1) Train Dataset

In [4]:
# 학습 데이터셋 폴더 경로를 지정해주세요.
train_dir = '/opt/ml/input/data/train'
trainimage_dir = os.path.join(train_dir, 'images')

In [5]:
# meta 데이터와 이미지 경로를 불러옵니다.
train_df = pd.read_csv(os.path.join(train_dir, 'train.csv'))
train_df

Unnamed: 0,id,gender,race,age,path
0,000001,female,Asian,45,000001_female_Asian_45
1,000002,female,Asian,52,000002_female_Asian_52
2,000004,male,Asian,54,000004_male_Asian_54
3,000005,female,Asian,58,000005_female_Asian_58
4,000006,female,Asian,59,000006_female_Asian_59
...,...,...,...,...,...
2695,006954,male,Asian,19,006954_male_Asian_19
2696,006955,male,Asian,19,006955_male_Asian_19
2697,006956,male,Asian,19,006956_male_Asian_19
2698,006957,male,Asian,20,006957_male_Asian_20


In [6]:
masks = ['mask1', 'mask2', 'mask3', 'mask4', 'mask5', 'incorrect_mask', 'normal']
wears = ['Wear', 'Wear', 'Wear', 'Wear', 'Wear', 'Incorrect', 'Not Wear']
mask_df = pd.DataFrame()
for person in train_df.values:
    for mask, wear in zip(masks, wears):
        mask_df = mask_df.append(pd.Series(np.append(person, (mask, wear))), ignore_index=True)
mask_df.columns = np.append(train_df.columns.values, ('mask', 'wear'))
mask_df

Unnamed: 0,id,gender,race,age,path,mask,wear
0,000001,female,Asian,45.0,000001_female_Asian_45,mask1,Wear
1,000001,female,Asian,45.0,000001_female_Asian_45,mask2,Wear
2,000001,female,Asian,45.0,000001_female_Asian_45,mask3,Wear
3,000001,female,Asian,45.0,000001_female_Asian_45,mask4,Wear
4,000001,female,Asian,45.0,000001_female_Asian_45,mask5,Wear
...,...,...,...,...,...,...,...
18895,006959,male,Asian,19.0,006959_male_Asian_19,mask3,Wear
18896,006959,male,Asian,19.0,006959_male_Asian_19,mask4,Wear
18897,006959,male,Asian,19.0,006959_male_Asian_19,mask5,Wear
18898,006959,male,Asian,19.0,006959_male_Asian_19,incorrect_mask,Incorrect


In [7]:
#mask_df = mask_df.sample(frac=1).reset_index(drop=True)
#mask_df

In [8]:
labeled_df = pd.DataFrame()
for idx, person in mask_df.iterrows():
    gender = person['gender']
    gender = 0 if gender=='male' else 1

    age = person['age']
    if age >= 60.0:
        age = 2
    elif age >= 30.0:
        age = 1
    else:
        age = 0

    mask = person['wear']
    if mask == 'Wear':
        mask = 0
    elif mask == 'Incorrect':
        mask = 1
    else:
        mask = 2

    label = 6*mask + 3*gender + age
    labeled_df = labeled_df.append(pd.Series(np.append(person, label)), ignore_index=True)
labeled_df.columns = np.append(mask_df.columns.values, 'label')
labeled_df = labeled_df.astype({'label': int})
labeled_df

Unnamed: 0,id,gender,race,age,path,mask,wear,label
0,000001,female,Asian,45.0,000001_female_Asian_45,mask1,Wear,4
1,000001,female,Asian,45.0,000001_female_Asian_45,mask2,Wear,4
2,000001,female,Asian,45.0,000001_female_Asian_45,mask3,Wear,4
3,000001,female,Asian,45.0,000001_female_Asian_45,mask4,Wear,4
4,000001,female,Asian,45.0,000001_female_Asian_45,mask5,Wear,4
...,...,...,...,...,...,...,...,...
18895,006959,male,Asian,19.0,006959_male_Asian_19,mask3,Wear,0
18896,006959,male,Asian,19.0,006959_male_Asian_19,mask4,Wear,0
18897,006959,male,Asian,19.0,006959_male_Asian_19,mask5,Wear,0
18898,006959,male,Asian,19.0,006959_male_Asian_19,incorrect_mask,Incorrect,6


In [10]:
transform = transforms.Compose([
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [11]:
class TrainDataset(Dataset):
    def __init__(self, path, labeled_df, transform):
        super(TrainDataset).__init__()
        self.path = path
        self.labeled_df = labeled_df
        self.transform = transform
        
    def __getitem__(self, idx):
        full_path = os.path.join(self.path, self.labeled_df.iloc[idx]['path'])
        img_list = glob.glob(full_path + '/*')
        file_name = self.labeled_df.iloc[idx]['mask']
        try:
            image = Image.open(os.path.join(full_path, file_name+'.jpg'))
        except:
            try:
                image = Image.open(os.path.join(full_path, file_name+'.png'))
            except:
                image = Image.open(os.path.join(full_path, file_name+'.jpeg'))
        if self.transform:
            image = self.transform(image)
        label = self.labeled_df.iloc[idx]['label']
        return image, label
    
    def __len__(self):
        return len(self.labeled_df)
    
    def get_labels(self):
        return self.labeled_df['label']

## 3. Training

In [14]:
learning_rate = 0.0015
betas = (0.9, 0.999)
weight_decay = 1e-4
T_max = 50
batch_size = 32
weight = torch.tensor([1., 1., 2.] * 6).to(device)

In [20]:
NUM_FINETUNE_CLASSES = 18

num_epochs = 30
EARLY_STOPPING_EPOCH = 5
n_splits = 5
kfold = KFold(n_splits=n_splits, shuffle=False)

best_models = {}
fold_results = {}

for fold, (train_ids, test_ids) in enumerate(kfold.split(train_df)):
    print(f' ---------------------- Fold %d --------------------------------------------' % (fold+1) )

    torch.cuda.empty_cache()

    train = labeled_df.iloc[train_ids]
    valid = labeled_df.iloc[test_ids]
    
    train_data = TrainDataset(trainimage_dir, train, transform)
    valid_data = TrainDataset(trainimage_dir, valid, transform)

    train_loader = DataLoader(train_data, batch_size=batch_size, num_workers=4, sampler=ImbalancedDatasetSampler(train_data))
    valid_loader = DataLoader(valid_data, batch_size=batch_size, num_workers=4, shuffle=True)

    # Create model
    model = timm.create_model('efficientnet_b3', pretrained=True, num_classes=NUM_FINETUNE_CLASSES)
    model.to(device)

    optimizer = optim.RAdam(model.parameters(), lr=learning_rate, betas=betas, weight_decay=weight_decay)
    lr_sched = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=T_max)

    # early stopping
    valid_early_stop = 0
    valid_best_f1score = 0.0
    since = time.time()

    for e in range(num_epochs) :
        print(f' ====================== epoch %d ======================' % (e+1) )

        # train
        model.train()
        train_epoch_f1 = 0
        n_iter = 0
        train_loss_list = []
        train_acc_list = []
        for i, (images, targets) in enumerate(train_loader) : 
            optimizer.zero_grad()

            images = images.to(device)
            targets = targets.to(device)

            scores = model(images)
            _, preds = scores.max(dim=1)

            loss = F.cross_entropy(scores, targets, weight=weight)
            loss.backward()
            optimizer.step()

            correct = sum(targets == preds).cpu()
            acc=(correct/images.shape[0] * 100)
            train_epoch_f1 += f1_score(preds.cpu().numpy(), targets.cpu().numpy(), average='macro')
            n_iter += 1

            train_loss_list.append(loss)
            train_acc_list.append(acc)

            if i % 50 == 0 :
                print(f'Iteration %3.d | Train Loss  %.4f | Classifier Accuracy %2.2f' % (i, loss, acc))

        train_mean_loss = np.mean(train_loss_list, dtype="float64")
        train_mean_acc = np.mean(train_acc_list, dtype="float64")

        train_epoch_f1 = train_epoch_f1/n_iter

        epoch_time = time.time() - since
        since = time.time()

        print('')
        print(f'[Summary] Elapsed time : %.0f m %.0f s' % (epoch_time // 60, epoch_time % 60))
        print(f'Train Loss Mean %.4f | Accuracy %2.2f | F1-Score %2.4f' % (train_mean_loss, train_mean_acc, train_epoch_f1) )

        # validation 
        model.eval()
        valid_epoch_f1 = 0
        n_iter = 0
        valid_loss_list = []
        valid_acc_list = []
        for i, (images, targets) in enumerate(valid_loader) : 
            optimizer.zero_grad()
            images = images.to(device=device)
            targets = targets.to(device=device)

            with torch.no_grad():
                scores = model(images)
                loss = F.cross_entropy(scores, targets)
                _, preds = scores.max(dim=1)
                valid_epoch_f1 += f1_score(preds.cpu().numpy(), targets.cpu().numpy(), average='macro')
                n_iter += 1

            correct = sum(targets == preds).cpu()
            acc=(correct/images.shape[0] * 100)

            valid_loss_list.append(loss)
            valid_acc_list.append(acc)

        valid_mean_loss = np.mean(valid_loss_list, dtype="float64")
        valid_mean_acc = np.mean(valid_acc_list, dtype="float64")

        valid_epoch_f1 = valid_epoch_f1/n_iter

        print(f'Valid Loss Mean %.4f | Accuracy %2.2f | F1-Score %2.4f' % (valid_mean_loss, valid_mean_acc, valid_epoch_f1) )
        print('')
        
        if valid_epoch_f1 > valid_best_f1score:
            valid_best_f1score = valid_epoch_f1
            valid_early_stop = 0
            # new best model save (valid 기준)
            best_model = model
            best_models[fold] = best_model
            # 저장
            path = './model/'
            torch.save(best_model.state_dict(), f'{path}fold{fold}model{valid_epoch_f1:2.2f}_epoch_{e}.pth')
            # update fold result
            fold_results[fold] = {"train_mean_acc" : train_mean_acc, 
                                  "train_mean_loss" : train_mean_loss, 
                                  "train_mean_f1" : train_epoch_f1,
                                  "valid_mean_acc" : valid_mean_acc, 
                                  "valid_mean_loss" : valid_mean_loss,
                                  "valid_mean_f1" : valid_epoch_f1, 
                                  "epoch" : e}

        else:
            # early stopping    
            valid_early_stop += 1
            if valid_early_stop >= EARLY_STOPPING_EPOCH:  # patience
                print("EARLY STOPPING!!")
                break

        lr_sched.step()

 ---------------------- Fold 1 --------------------------------------------
Iteration   0 | Train Loss  4.8291 | Classifier Accuracy 0.00
Iteration  50 | Train Loss  0.0929 | Classifier Accuracy 96.88

[Summary] Elapsed time : 0 m 12 s
Train Loss Mean 1.0367 | Accuracy 71.97 | F1-Score 0.6765
Valid Loss Mean 1.9128 | Accuracy 70.40 | F1-Score 0.5372

Iteration   0 | Train Loss  0.1026 | Classifier Accuracy 96.88
Iteration  50 | Train Loss  0.0578 | Classifier Accuracy 96.88

[Summary] Elapsed time : 0 m 13 s
Train Loss Mean 0.0683 | Accuracy 97.61 | F1-Score 0.9673
Valid Loss Mean 1.2965 | Accuracy 77.21 | F1-Score 0.5960

Iteration   0 | Train Loss  0.0456 | Classifier Accuracy 96.88
Iteration  50 | Train Loss  0.0045 | Classifier Accuracy 100.00

[Summary] Elapsed time : 0 m 13 s
Train Loss Mean 0.0360 | Accuracy 98.48 | F1-Score 0.9790
Valid Loss Mean 0.4710 | Accuracy 87.76 | F1-Score 0.6968

Iteration   0 | Train Loss  0.0031 | Classifier Accuracy 100.00
Iteration  50 | Train Loss

In [21]:
fold_results

{0: {'train_mean_acc': 99.90808823529412,
  'train_mean_loss': 0.003409989967481659,
  'train_mean_f1': 0.9982292217586336,
  'valid_mean_acc': 95.7720588235294,
  'valid_mean_loss': 0.31055936438582754,
  'valid_mean_f1': 0.888338689175266,
  'epoch': 28},
 1: {'train_mean_acc': 98.80514705882354,
  'train_mean_loss': 0.033337570589056294,
  'train_mean_f1': 0.9805279315042602,
  'valid_mean_acc': 94.80041997572955,
  'valid_mean_loss': 0.23419583986020265,
  'valid_mean_f1': 0.8412755734633216,
  'epoch': 5},
 2: {'train_mean_acc': 97.74816176470588,
  'train_mean_loss': 0.07414228328750194,
  'train_mean_f1': 0.964452932193574,
  'valid_mean_acc': 91.17647058823529,
  'valid_mean_loss': 0.5003492908880991,
  'valid_mean_f1': 0.8490902100838601,
  'epoch': 6},
 3: {'train_mean_acc': 98.98897058823529,
  'train_mean_loss': 0.03061463130186038,
  'train_mean_f1': 0.9849594557749638,
  'valid_mean_acc': 91.91176470588235,
  'valid_mean_loss': 0.40688596939777627,
  'valid_mean_f1': 0.80

## 4. Inference

In [32]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        super(TestDataset).__init__()
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

# 테스트 데이터셋 폴더 경로를 지정해주세요.
test_dir = '/opt/ml/input/data/eval'

# meta 데이터와 이미지 경로를 불러옵니다.
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
testimage_dir = os.path.join(test_dir, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(testimage_dir, img_id) for img_id in submission.ImageID]
test_transform = transforms.Compose([
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
test_dataset = TestDataset(image_paths, test_transform)

test_loader = DataLoader(
    test_dataset,
    shuffle=False,
    num_workers=4
)

In [36]:
%%time

scores_result = []

for fold in range(n_splits):
    torch.cuda.empty_cache()
    model = best_models[fold]
    #path = './model/'
    #valid_mean_f1 = fold_results[fold]['valid_mean_f1']
    #e = fold_results[fold]['epoch']
    #mypath = f'{path}fold{fold}model{valid_mean_f1:2.2f}_epoch_{e}.pth'
    #checkpoint = torch.load(mypath)
    #model.load_state_dict(checkpoint)
    model.to(device)
    model.eval()

    #predictions = []
    score_list = []

    with torch.no_grad():
        for images in test_loader:
            images = images.to(device)
            scores = model(images)
            _, preds = scores.max(dim=1)
            
            #predictions.extend(preds.detach().cpu().numpy())
            score_list.extend(scores.detach().cpu().numpy())
    scores_result.append(score_list)

CPU times: user 24min 18s, sys: 25.6 s, total: 24min 44s
Wall time: 24min 57s


In [42]:
len(scores_result[0][0])

18

In [46]:
myresult = torch.tensor(scores_result)
print(myresult.shape)

torch.Size([5, 12600, 18])


In [47]:
myresult[0][0]

tensor([-4.0693,  2.2004, -0.3666, -4.8301, -8.7945,  0.7341, -3.0786,  4.5599,
        -1.8860, -2.8918, -7.9333, -1.6631,  0.4027, 17.7520,  1.6697, -1.3640,
         5.0236, -0.3574])

In [48]:
myresult = F.softmax(myresult, dim=2)
print(myresult.shape)

torch.Size([5, 12600, 18])


In [51]:
myresult[0][0]

tensor([3.3350e-10, 1.7620e-07, 1.3526e-08, 1.5584e-10, 2.9578e-12, 4.0663e-08,
        8.9815e-10, 1.8652e-06, 2.9602e-09, 1.0826e-09, 6.9980e-12, 3.6992e-09,
        2.9192e-08, 9.9999e-01, 1.0364e-07, 4.9887e-09, 2.9655e-06, 1.3652e-08])

In [52]:
myresult = torch.sum(myresult, dim=0)
print(myresult.shape)

torch.Size([12600, 18])


In [53]:
_, all_predictions = myresult.max(dim=1)
print(all_predictions.shape)

torch.Size([12600])


In [55]:
all_predictions

tensor([13,  1, 13,  ..., 10,  1,  7])

In [56]:
all_predictions = all_predictions.cpu().numpy()
len(all_predictions)

12600

In [57]:
from collections import Counter
Counter(all_predictions)

Counter({13: 624,
         1: 4211,
         7: 828,
         4: 3890,
         3: 151,
         0: 610,
         16: 1020,
         10: 1093,
         12: 151,
         6: 17,
         15: 5})

In [58]:
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'submission_efficientnet_kfold.csv'), index=False)
print('test inference is done!')

test inference is done!
