In [1]:
import sys
sys.path.append('/opt/ml/code')

import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from my_modules.transform import get_transform
from my_modules.dataset import TrainDataset, EvalDataset
from my_modules.trainer import Trainer
from my_modules.loss import LabelSmoothing

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models

from efficientnet_pytorch import EfficientNet

In [2]:
## seeds

random_seed = 42

torch.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [3]:
## hyperparameters

model_name = 'efficientnet-b0' #'tf_efficientnetv2_b3' # 'efficientnet-b0'
device = 'cuda'
batch_size = 64
target = 'age'
fold = 3

df_train_path = f'df/fold/df_train_fold2_{fold}.csv'
df_valid_path = f'df/fold/df_valid_fold2_{fold}.csv'
df_test_path = '/opt/ml/input/data/eval/info.csv'

df_train = pd.read_csv(df_train_path)
df_valid = pd.read_csv(df_valid_path)
df_test = pd.read_csv(df_test_path)

In [None]:
bins = [0, 29, 58, 100]
labels = [0, 1, 2]
df_valid['age'] = pd.cut(df_valid['age_real'], bins=bins, labels=labels)

In [4]:
input_size = EfficientNet.get_image_size(model_name)
print(input_size)

224


In [5]:
## transform

transform_valid = get_transform(augment=False, crop=350, resize=input_size, cutout=None)

In [6]:
## prepare dataset

#v#alid_dataset = TrainDataset(df=df_test, transform=transform_valid, target=target)
valid_dataset = EvalDataset(df=df_test, transform=transform_valid)

In [7]:
## prepare dataloader

dataloader = DataLoader(valid_dataset, batch_size=batch_size, num_workers=3, drop_last=False, shuffle=False)

In [8]:
## prepare model

if model_name.startswith('efficientnet'):
    model = EfficientNet.from_pretrained(model_name, num_classes=2 if target=='gender' else 3)
elif model_name.startswith('tf'):
    import timm
    model = timm.create_model('tf_efficientnetv2_b3', pretrained=True, num_classes=2 if target=='gender' else 3)
else:
    model = models.resnet18(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, len(train_dataset.classes))
    
model.to(device)
print(model_name, 'ready')

Loaded pretrained weights for efficientnet-b0
efficientnet-b0 ready


In [None]:
# check input train batch

from my_modules.utils import imshow
batch = next(iter(dataloader))
imshow(batch[0])
print(batch[1])

## Test

In [None]:
model.load_state_dict(torch.load(f'/opt/ml/code/save/gender_pseudo/fold3.pt'))
model.eval()
print('ready')

In [None]:
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, accuracy_score
from tqdm import tqdm

model.eval()
y_true = []
y_pred = []
wrong = []
cnt = 0

for inputs, labels in tqdm(dataloader):
    inputs = inputs.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.tolist())
        y_pred.extend(preds.tolist())
        
        for p, t, inp in zip(preds, labels, inputs):
            if p.item() != t.item():
                wrong.append((inp.cpu().numpy(), p.item(), t.item(), cnt))
            cnt += 1

mtx = confusion_matrix(y_true, y_pred)
print(mtx)

acc = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='macro')
print(f'acc:{acc:.4f}, f1:{f1:.4f}')

In [None]:
import pickle

d = {'y_true':y_true, 'y_pred':y_pred}

with open('pkl/result.pkl', 'wb') as f:
    pickle.dump(d, f)

In [None]:
label_ = list(range(2 if target=='gender' else 3))

fig, ax = plt.subplots(figsize=(4,4))
im = ax.imshow(mtx, cmap='Blues', vmax=100, vmin=-20)

ax.set_xticks(np.arange(len(label_)))
ax.set_yticks(np.arange(len(label_)))
# ... and label them with the respective list entries
ax.set_xticklabels(label_)
ax.set_yticklabels(label_)

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")

# Loop over data dimensions and create text annotations.
for i in range(len(label_)):
    for j in range(len(label_)):
        if mtx[i, j]:
            text = ax.text(j, i, mtx[i, j],
                           ha="center", va="center", color="w", size=10)
ax.set_xlabel('Prediction')
ax.set_ylabel('True Label')
ax.set_title("Confusion mtx")
fig.tight_layout()
plt.show()

In [None]:
label_d = {
    'age' : ['< 30', '30~59', ' 60 <='],
    'gender' : ['Male','Female'],
    'mask' : ['Mask', 'Incorrect', 'Normal']
}

iterator = iter(wrong)

label = label_d[target]
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(8,8))
for ax in axes.flatten():
    try:
        img, p, t, idx = next(iterator)
    except:
        continue
    img = img.transpose((1, 2, 0))
    img = std * img + mean
    img = np.clip(img, 0, 1)
    ax.imshow(img)
    ax.axis('off')
    ax.set_title(f'{idx}\nt : {label[t]}\np : {label[p]}')
plt.show()

In [None]:
df_valid.iloc[538]

In [None]:
model.load_state_dict(torch.load(f'/opt/ml/code/save/age_labeled/epoch_011.pt'))
model.eval()
print('ready')

In [30]:
import torch.nn.functional as F
from tqdm import tqdm

model.eval()

with torch.no_grad():

    probs = []
    preds = []

    model.load_state_dict(torch.load('/opt/ml/code/save/final/epoch_005.pt'))
    print(f'starting fold{fold}...')

    for inputs in tqdm(dataloader):

        inputs = inputs.to(device)
        outputs = model(inputs)

        _, pred = torch.max(outputs, 1)
        prob = F.softmax(outputs, dim=-1)

        preds.append(pred.cpu().numpy())
        probs.append(prob.cpu().numpy())

    print(f'finished fold{fold}!')
    

starting fold3...


100%|██████████| 197/197 [00:24<00:00,  7.98it/s]

finished fold3!





In [31]:
preds = np.concatenate(preds)

In [32]:
np.bincount(preds)

array([5372, 3333, 3895])

In [33]:
import pickle
with open(f'pkl/final5.pkl', 'wb') as f:
    pickle.dump(preds, f)

In [None]:
import pickle
with open(f'pkl/gender_ensemble_soft.pkl', 'rb') as f:
    original = pickle.load(f)

In [None]:
df_pseudo = df_test.copy()
df_pseudo['path'] = '/opt/ml/input/data/eval/images/' + df_pseudo['ImageID']
df_pseudo['age'] = preds

In [None]:
df_pseudo = df_pseudo.iloc[disagree].copy()

In [None]:
df_pseudo.age.value_counts()

In [None]:
from PIL import Image
import os

In [None]:
import pickle
with open(f'pkl/ensembled_age5.pkl', 'rb') as f:
    best = pickle.load(f)

jh = pd.read_csv('/opt/ml/code/agesample2(0.00006).csv').ans
    

In [None]:
disagree = []

for i in range(len(df_test)):
    if best[i] != jh[i]:
        disagree.append(i)

In [None]:
i = -1
gender=['young', 'middle', 'old']

In [None]:
i += 1
idx = disagree[i]
print(f'{i+1}/{len(disagree)}  original : {gender[best[idx]]}, jk : {gender[jh[idx]]}')
img = Image.open(os.path.join('/opt/ml/input/data/eval/images', df_test.iloc[idx]['ImageID']))
img