In [133]:
import os

os.chdir('/opt/ml/P-Stage/1-STAGE/')

from PIL import Image

import wandb
import torch
import numpy as np
import pandas as pd
import torch.nn as nn

import matplotlib.pyplot as plt
from sklearn.metrics import (
    f1_score,
    recall_score,
    accuracy_score,
    precision_score,
    confusion_matrix,
)

from config import get_args
from prepare import get_dataloader, get_classes, get_album_transforms
from metrics import (
    change_2d_to_1d,
    tensor_to_numpy,
    calculate_18class,
    tensor_images_to_numpy_images,
)


from log_helper import (
    log_f1_and_acc_scores,
    log_confusion_matrix,
    log_confusion_matrix_by_images,
)

def validation_test(models, dataloader, num_class=3):
    labels = np.zeros((len(dataloader.dataset)))
    ensemble_results = np.zeros((len(dataloader.dataset), num_class))
    
    for model in models:
        model = torch.load(model)
    
        for idx, (image, label) in enumerate(dataloader):
            image, label = image.cuda(), label.cuda()
            bs = image.shape[0]
        
            preds = model(image)
            
            labels[idx*bs:(idx+1)*bs] = tensor_to_numpy(label)
            ensemble_results[idx*bs:(idx+1)*bs] += tensor_to_numpy(torch.softmax(preds, dim=1))
            
            print(idx, end="\r")
    
    return ensemble_results, labels

In [134]:
args = get_args()
args

Namespace(age_model='/opt/ml/weights/age.pt', aug_keys='CJ,FancyPCA', batch_size=64, data_dir='/opt/ml/input/data/train', epochs=25, eval=True, gender_model='/opt/ml/weights/gender.pt', image_size=224, inf_filename='valid', loss_metric='f1_loss', lr=0.001, mask_model='/opt/ml/weights/mask.pt', model_path='/opt/ml/weights/', model_save=True, optimizer='adam', seed=42, test=True, train_key='mask', use_only_mask=False, valid_size=0.5, workers=2)

## Age Ensemble Test

In [135]:
age_models = [
    'age-04070152-easy-sweep-6',
    'age-04070047-driven-sweep-5',
    'age-04062341-magic-sweep-4',
    'age-04062055-still-sweep-1'
]

age_models = list(map(lambda x: os.path.join('/opt/ml/weights/', x) + ".pt", age_models))
age_models

['/opt/ml/weights/age-04070152-easy-sweep-6.pt',
 '/opt/ml/weights/age-04070047-driven-sweep-5.pt',
 '/opt/ml/weights/age-04062341-magic-sweep-4.pt',
 '/opt/ml/weights/age-04062055-still-sweep-1.pt']

In [174]:
args.train_key = "age"
args.valid_size = 0.15
args.test = False

_, dataloader = get_dataloader(args)
print(len(dataloader.dataset))

2835


In [138]:
preds, labels = validation_test(age_models[:1], dataloader, 3)
log_f1_and_acc_scores(args, np.argmax(preds, axis=1), labels)

44

Unnamed: 0,0 f1,0 pr,0 re,0 acc,1 f1,1 pr,1 re,1 acc,2 f1,2 pr,2 re,2 acc
age,0.973034,1.0,0.947484,0.947484,0.947009,1.0,0.899351,0.899351,0.689266,1.0,0.525862,0.525862


In [139]:
preds, labels = validation_test(age_models[1:2], dataloader, 3)
log_f1_and_acc_scores(args, np.argmax(preds, axis=1), labels)

44

Unnamed: 0,0 f1,0 pr,0 re,0 acc,1 f1,1 pr,1 re,1 acc,2 f1,2 pr,2 re,2 acc
age,0.967864,1.0,0.937729,0.937729,0.936816,1.0,0.881141,0.881141,0.716049,1.0,0.557692,0.557692


In [140]:
preds, labels = validation_test(age_models[2:3], dataloader, 3)
log_f1_and_acc_scores(args, np.argmax(preds, axis=1), labels)

44

Unnamed: 0,0 f1,0 pr,0 re,0 acc,1 f1,1 pr,1 re,1 acc,2 f1,2 pr,2 re,2 acc
age,0.975086,1.0,0.951384,0.951384,0.934735,1.0,0.877467,0.877467,0.59,1.0,0.41844,0.41844


In [141]:
preds, labels = validation_test(age_models[3:4], dataloader, 3)
log_f1_and_acc_scores(args, np.argmax(preds, axis=1), labels)

44

Unnamed: 0,0 f1,0 pr,0 re,0 acc,1 f1,1 pr,1 re,1 acc,2 f1,2 pr,2 re,2 acc
age,0.978186,1.0,0.957303,0.957303,0.94266,1.0,0.891538,0.891538,0.765432,1.0,0.62,0.62


In [171]:
preds, labels = validation_test(age_models, dataloader, 3)
labels = labels.astype(np.int)

59

In [143]:
# age_model 다 한거, 0.645161
log_f1_and_acc_scores(args, np.argmax(preds, axis=1), labels)

Unnamed: 0,0 f1,0 pr,0 re,0 acc,1 f1,1 pr,1 re,1 acc,2 f1,2 pr,2 re,2 acc
age,0.979886,1.0,0.960565,0.960565,0.945028,1.0,0.895785,0.895785,0.784314,1.0,0.645161,0.645161


In [176]:
preds, labels = validation_test(age_models[:2] + age_models[3:], dataloader, 3)
labels = labels.astype(np.int)

44

In [177]:
# age_model 중에서 recall 점수 낮은 거 뺀거
log_f1_and_acc_scores(args, np.argmax(preds, axis=1), labels)

Unnamed: 0,0 f1,0 pr,0 re,0 acc,1 f1,1 pr,1 re,1 acc,2 f1,2 pr,2 re,2 acc
age,0.978804,1.0,0.958488,0.958488,0.946429,1.0,0.898305,0.898305,0.787097,1.0,0.648936,0.648936


In [178]:
f1_score(np.argmax(preds, axis=1), labels, average='macro')

0.8286956965030856

### 첫 번째 Age Ensemble

![image.png](attachment:de6fb230-d5d9-438b-b5e2-dc9d44052127.png)

## Mask Ensemble Test

In [145]:
mask_models = [
    'mask-04050759-absurd-sweep-4', # 7
    'mask-04050847-devout-sweep-5', # 8
    'mask-04051102-copper-sweep-9', # 8
    'mask-04051658-rich-sweep-17', # 8
]

mask_models = list(map(lambda x: os.path.join('/opt/ml/weights/', x) + ".pt", mask_models))
mask_models

['/opt/ml/weights/mask-04050759-absurd-sweep-4.pt',
 '/opt/ml/weights/mask-04050847-devout-sweep-5.pt',
 '/opt/ml/weights/mask-04051102-copper-sweep-9.pt',
 '/opt/ml/weights/mask-04051658-rich-sweep-17.pt']

In [146]:
args.train_key = "mask"
args.valid_size = 0.2
args.test = False

_, dataloader = get_dataloader(args)
print(len(dataloader.dataset))

3780


In [147]:
preds, labels = validation_test(mask_models[0:1], dataloader, 3)
labels = labels.astype(np.int)
log_f1_and_acc_scores(args, np.argmax(preds, axis=1), labels)

59

Unnamed: 0,0 f1,0 pr,0 re,0 acc,1 f1,1 pr,1 re,1 acc,2 f1,2 pr,2 re,2 acc
mask,0.998517,1.0,0.997038,0.997038,0.992509,1.0,0.98513,0.98513,0.99722,1.0,0.994455,0.994455


In [148]:
preds, labels = validation_test(mask_models[1:2], dataloader, 3)
labels = labels.astype(np.int)
log_f1_and_acc_scores(args, np.argmax(preds, axis=1), labels)

59

Unnamed: 0,0 f1,0 pr,0 re,0 acc,1 f1,1 pr,1 re,1 acc,2 f1,2 pr,2 re,2 acc
mask,0.998514,1.0,0.997033,0.997033,0.98694,1.0,0.974217,0.974217,0.995357,1.0,0.990758,0.990758


In [149]:
preds, labels = validation_test(mask_models[2:3], dataloader, 3)
labels = labels.astype(np.int)
log_f1_and_acc_scores(args, np.argmax(preds, axis=1), labels)

59

Unnamed: 0,0 f1,0 pr,0 re,0 acc,1 f1,1 pr,1 re,1 acc,2 f1,2 pr,2 re,2 acc
mask,0.998517,1.0,0.997038,0.997038,0.993427,1.0,0.98694,0.98694,0.995375,1.0,0.990792,0.990792


In [150]:
preds, labels = validation_test(mask_models[3:4], dataloader, 3)
labels = labels.astype(np.int)
log_f1_and_acc_scores(args, np.argmax(preds, axis=1), labels)

59

Unnamed: 0,0 f1,0 pr,0 re,0 acc,1 f1,1 pr,1 re,1 acc,2 f1,2 pr,2 re,2 acc
mask,0.9987,1.0,0.997404,0.997404,0.987884,1.0,0.976059,0.976059,0.996289,1.0,0.992606,0.992606


In [151]:
preds, labels = validation_test(mask_models, dataloader, 3)
labels = labels.astype(np.int)
log_f1_and_acc_scores(args, np.argmax(preds, axis=1), labels)

59

Unnamed: 0,0 f1,0 pr,0 re,0 acc,1 f1,1 pr,1 re,1 acc,2 f1,2 pr,2 re,2 acc
mask,0.998889,1.0,0.99778,0.99778,0.996248,1.0,0.992523,0.992523,0.996296,1.0,0.99262,0.99262


In [153]:
f1_score(np.argmax(preds, axis=1), labels, average='macro')

0.9938103829060253

![image.png](attachment:d5cb581d-573e-4d66-81b7-022ac7c4f7ae.png)

## 첫 번째 Mask Ensemble

In [35]:
print(mask_models)

['/opt/ml/weights/mask-04050759-absurd-sweep-4.pt', '/opt/ml/weights/mask-04050847-devout-sweep-5.pt', '/opt/ml/weights/mask-04051102-copper-sweep-9.pt', '/opt/ml/weights/mask-04051658-rich-sweep-17.pt']


In [36]:
log_f1_and_acc_scores(args, np.argmax(preds, axis=1), labels)

Unnamed: 0,0 f1,0 pr,0 re,0 acc,1 f1,1 pr,1 re,1 acc,2 f1,2 pr,2 re,2 acc
mask,0.998889,1.0,0.99778,0.99778,0.996248,1.0,0.992523,0.992523,0.996296,1.0,0.99262,0.99262


In [37]:
f1_score(np.argmax(preds, axis=1), labels, average='macro')

0.9938103829060253

## Gender Ensemble Test

In [154]:
gender_models = [
    'gender-04052055-still-sweep-14', # 38
]

gender_models = list(map(lambda x: os.path.join('/opt/ml/weights/', x) + ".pt", gender_models))
gender_models

['/opt/ml/weights/gender-04052055-still-sweep-14.pt']

In [155]:
args.train_key = "gender"
args.valid_size = 0.2
args.test = False

_, dataloader = get_dataloader(args)
print(len(dataloader.dataset))

3780


In [156]:
preds, labels = validation_test(gender_models, dataloader, 2)
labels = labels.astype(np.int)

59

![image.png](attachment:4f13264a-619a-488a-aa55-c27df24a3752.png)

## 첫 번째 Gender Ensemble

In [49]:
print(gender_models)

['/opt/ml/weights/gender-04052055-still-sweep-14.pt']


In [50]:
log_f1_and_acc_scores(args, np.argmax(preds, axis=1), labels)

Unnamed: 0,0 f1,0 pr,0 re,0 acc,1 f1,1 pr,1 re,1 acc
gender,0.976936,1.0,0.954913,0.954913,0.99187,1.0,0.983871,0.983871


In [51]:
f1_score(np.argmax(preds, axis=1), labels, average='macro')

0.971064663176251

## Ensemble EVAL

In [87]:
train_df = pd.read_csv('/opt/ml/inp)

SyntaxError: EOL while scanning string literal (<ipython-input-87-0d01f24d8a45>, line 1)

In [157]:
test_df = pd.read_csv('/opt/ml/input/data/eval/info.csv')
image_path = test_df["ImageID"].apply(lambda x: os.path.join("/opt/ml/input/data/eval/images/", x))

In [158]:
test_df.head()

Unnamed: 0,ImageID,ans
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,0
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,0
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,0
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,0
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,0


In [161]:
def ensemble_results(models, transform, num_class=3):
    test_df = pd.read_csv('/opt/ml/input/data/eval/info.csv')
    image_pathes = test_df["ImageID"].apply(lambda x: os.path.join("/opt/ml/input/data/eval/images/", x))
    
    # print(len(test_df), len(image_pathes))
    
    ensemble_results = np.zeros((len(test_df), num_class))
    
    for model in models:
        model = torch.load(model)
        
        for idx, image_full_path in enumerate(image_pathes):
        
            image = np.array(Image.open(image_full_path))
            image = transform(image=image)['image']
            image = np.transpose(image, axes=(2, 0, 1))
            image = torch.tensor(image).cuda().unsqueeze(0)
            
            preds = model(image)
            ensemble_results[idx] += tensor_to_numpy(torch.softmax(preds[0], dim=0))
            
            print(idx, end='\r')
    
    return ensemble_results

In [162]:
results = dict()

for key, models in zip(["age", "mask", "gender"], [age_models, mask_models, gender_models]):
    args.train_key = key
    num_class = len(get_classes(args))
    _, transform = get_album_transforms(args)
    
    e_results = ensemble_results(models, transform, num_class)
    results[key] = e_results

12599

In [163]:
results.keys()

dict_keys(['age', 'mask', 'gender'])

In [164]:
results["age"]

array([[2.40769233e-02, 2.38698074e+00, 1.58894224e+00],
       [7.42887736e-04, 1.72548079e+00, 2.27377629e+00],
       [1.07504429e-02, 1.87998065e+00, 2.10926884e+00],
       ...,
       [3.90758890e+00, 9.24026179e-02, 8.53714772e-06],
       [2.87383316e-02, 3.95292193e+00, 1.83398458e-02],
       [4.91724099e-03, 1.03729321e+00, 2.95778960e+00]])

In [165]:
np.argmax(results["age"], axis=1)

array([1, 2, 2, ..., 0, 1, 2])

In [166]:
results["age"] = np.argmax(results["age"], axis=1)
results["mask"] = np.argmax(results["mask"], axis=1)
results["gender"] = np.argmax(results["gender"], axis=1)

In [167]:
results

{'age': array([1, 2, 2, ..., 0, 1, 2]),
 'mask': array([2, 0, 2, ..., 1, 0, 1]),
 'gender': array([1, 0, 1, ..., 1, 0, 0])}

## Final Result..

하.. 의미 없다

In [168]:
test_df = pd.read_csv('/opt/ml/input/data/eval/info.csv')

age_results = results["age"]
mask_results = results["mask"]
gender_results = results["gender"]

for idx, (mi, gi, ai) in enumerate(zip(mask_results, gender_results, age_results)):
    res = calculate_18class(mi, gi, ai)
    test_df.iloc[idx, 1] = res

In [169]:
test_df.head()

Unnamed: 0,ImageID,ans
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,16
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,2
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,17
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,13
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,12


## Save Model ( 이름 수정 !! )

In [170]:
# age: 1, mask: 1, gender: 1
file_name = "ensemble-003.csv"

test_df.to_csv(os.path.join("/opt/ml/P-Stage/1-STAGE/submissions/", file_name), index=False)