### Inference

In [1]:
import os
import gc
import cv2
import timm
import random
import numpy as np
import pandas as pd
from glob import glob

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from tqdm.autonotebook import tqdm

import warnings
warnings.filterwarnings(action='ignore') 

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [3]:
CFG = {
    # 원본 (512, 384)
    'IMG_SIZE_H': 512,
    'IMG_SIZE_W': 384,
    'EPOCHS': 100,
    'LEARNING_RATE': 2e-5,
    'BATCH_SIZE': 32,
    'SEED': 909,
    'PAATIENCE_LIMIT': 5,
    'MODEL': 'efficientnet_b3',
    'LOSS': 'FocalLoss',
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [5]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        self.feature = []
        
        for img_path in self.img_path_list:
            image = cv2.imread(img_path)
            if self.transforms is not None:
                image = self.transforms(image=image)['image']
            self.feature.append(image)
        
    def __getitem__(self, index):        
        if self.label_list is not None:
            return self.feature[index], self.label_list[index]
        else:
            return self.feature[index]
        
    def __len__(self):
        return len(self.img_path_list)

In [6]:
train_transform = A.Compose([
                            A.Resize(CFG['IMG_SIZE_W'],CFG['IMG_SIZE_W']),
                            A.HorizontalFlip(always_apply=False, p=0.5),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

test_transform = A.Compose([
                            A.Resize(CFG['IMG_SIZE_W'],CFG['IMG_SIZE_W']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

In [7]:
class CustomModel(nn.Module):
    def __init__(self, num_classes=18, pretrained=True):
        super(CustomModel, self).__init__()
        self.num_classes = num_classes
        self.pretrained = pretrained
        
        # self.model = timm.create_model('efficientnet_b0', pretrained=self.pretrained)
        self.model = timm.create_model(CFG['MODEL'], pretrained=self.pretrained)
        self.fc = nn.Sequential(nn.Dropout(p=0.2, inplace=True),
                               nn.Linear(1000, 512),
                               nn.Dropout(p=0.2, inplace=True),
                               nn.Linear(512, num_classes),
                               )

    def forward(self, x):
        x = self.model(x)
        x = self.fc(x)
        return x

In [8]:
test_dir = '/opt/ml/input/data/eval'

In [9]:
df = pd.read_csv(test_dir + '/info.csv')

In [10]:
image_paths = [os.path.join(test_dir, 'images', img_id) for img_id in df.ImageID]

test_dataset = CustomDataset(image_paths, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=8)

In [11]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    
    preds = []
    with torch.no_grad():
        for imgs in tqdm(iter(test_loader)):
            imgs = imgs.to(device)
            
            logit = model(imgs)

            preds += logit.argmax(1).detach().cpu().numpy().tolist()
    return preds

In [12]:
project_idx = 5
model_weights = torch.load(glob(f'/opt/ml/models/{project_idx}/ALL/*')[0])
model = CustomModel()
model.load_state_dict(model_weights)

<All keys matched successfully>

In [13]:
model_preds = inference(model, test_loader, device)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=394.0), HTML(value='')))




In [14]:
df['ans'] = model_preds
df.to_csv(os.path.join(test_dir, 'submits', f'{CFG["MODEL"]}_{project_idx}.csv'), index=False)
print(f'{CFG["MODEL"]}_{project_idx}.csv - test inference is done!')

efficientnet_b3_5.csv - test inference is done!


### Check Wrong Prediction

In [15]:
train_dir = '/opt/ml/input/data/train'

all_img_path = glob(os.path.join(train_dir, 'images', '*', '*'))

train_df = pd.DataFrame(columns=['id', 'path', 'mask_label', 'gender_label', 'age_label', 'label'])
train_df['path'] = all_img_path

_file_names = {"mask1": 0, "mask2": 0, "mask3": 0, "mask4": 0, "mask5": 0, "incorrect_mask": 1, "normal": 2}
train_df['id'] = train_df['path'].apply(lambda x : (str(x).split('/')[7]).split('_')[0])
train_df['mask_label'] = train_df['path'].apply(lambda x : _file_names[os.path.splitext(x.split('/')[-1])[0]])

_gender_labels = {"male": 0, "female": 1}
train_df['id'] = train_df['path'].apply(lambda x : (str(x).split('/')[7]).split('_')[0])
train_df['gender_label'] = train_df['path'].apply(lambda x : _gender_labels[(str(x).split('/')[7]).split('_')[1]])

train_df['id'] = train_df['path'].apply(lambda x : (str(x).split('/')[7]).split('_')[0])
train_df['age_label'] = train_df['path'].apply(lambda x : int((str(x).split('/')[7]).split('_')[3]))

train_df['age_label'].loc[train_df['age_label'] < 30] = 0
train_df['age_label'].loc[(train_df['age_label'] >= 30) & (train_df['age_label'] < 60)] = 1
train_df['age_label'].loc[train_df['age_label'] >= 60] = 2

train_df['label'] = train_df.apply(lambda x : (x['mask_label'] * 6 + x['gender_label']*3 + x['age_label']), axis=1)

train_, val_, _, _ = train_test_split(train_df, train_df['label'], test_size=0.2, random_state=CFG['SEED'], stratify=train_df['label'])
print(len(train_), len(val_))

15120 3780


In [16]:
val_dataset = CustomDataset(val_['path'].values, val_['label'].values, test_transform)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=8)

model_weights = torch.load(glob(f'/opt/ml/models/{project_idx}/ALL/*')[0])
model = CustomModel().to(device)
model.load_state_dict(model_weights)

<All keys matched successfully>

In [17]:
target, prediction = [], []
with torch.no_grad():
    for imgs, labels in tqdm(iter(val_loader)):
        imgs = imgs.float().to(device)
        labels = labels.to(device)
        
        logit = model(imgs)        
        pred = logit.argmax(dim=1)
        
        indices = (pred != labels).nonzero().squeeze()
        if indices.nelement() != 0:
            target.append(imgs[indices])
            prediction.append((pred[indices], labels[indices]))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))




In [18]:
print(len(val_))
print(len(target))

3780
100
