In [9]:
import pandas as pd
import numpy as np

from my_modules.transform import get_transform
from my_modules.model import MyModel
from my_modules.dataset import ClusterTestDataset, EvalDataset

import torch
from torch.utils.data import DataLoader

from sklearn.metrics import f1_score, accuracy_score
from tqdm import tqdm
from PIL import Image

In [17]:
import pickle
with open('pkl/age_pred_best.pkl', 'rb') as f:
    age_pred_best = pickle.load(f)

In [3]:
device='cuda'
model = MyModel('efficientnet-b0').to(device)
model.load_state_dict(torch.load('/opt/ml/code/save/cluster/log9_3.pt'))
model.eval()
print('model ready')

Loaded pretrained weights for efficientnet-b0
model ready


In [4]:
# get transform

transform = get_transform(augment=False, crop=350, resize=224, cutout=None)

In [5]:
# get datafraame

df_key = pd.read_csv('/opt/ml/input/data/eval/info.csv')
df_query = pd.read_csv('df/df_labled_valid.csv')

In [10]:
# get dataset

ds_key = EvalDataset(df_key, transform)
ds_query = ClusterTestDataset(df_query, transform)

In [11]:
# get dataloader

dataloader_key = DataLoader(ds_key, batch_size=64, shuffle=False, num_workers=3)
dataloader_query = DataLoader(ds_query, batch_size=64, shuffle=False, num_workers=3)

In [12]:
# create keys

model.create_keys(dataloader_key, device)

100%|██████████| 197/197 [00:24<00:00,  8.01it/s]


In [21]:
import os
# ensemble prediction

df = df_key
n_neighbors = 5
y_pred = age_pred_best
# y_true = result['y_true']
ensembled_y_pred = []

img_path_ = '/opt/ml/input/data/eval/images'

for i in tqdm(range(len(df))):
    
    #y_true.append(df.iloc[i]['age'])
    
    a = [0, 0, 0]
    
    image_path = os.path.join(img_path_, df['ImageID'].iloc[i])
    img = Image.open(image_path)
    #img = Image.open(df.iloc[i]['path'])
    img = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        queries = model.eff(img).cpu().numpy()

    groups_idx = model.query(queries, n_neighbors)[0]

    for idx in groups_idx:
        a[y_pred[idx]] += 1
    
    ensembled_y_pred.append(np.argmax(a))

# f1 = f1_score(y_true, ensembled_y_pred, average='macro')
# acc = accuracy_score(y_true, ensembled_y_pred)

# print(f'acc:{acc:.4f}, f1:{f1:.4f}')

100%|██████████| 12600/12600 [07:55<00:00, 26.52it/s]


In [8]:
# Key / Query

In [22]:
with open('pkl/ensembled_age5.pkl', 'wb') as f:
    pickle.dump(ensembled_y_pred, f)

In [27]:
# ensemble prediction

df = df_key
n_neighbors = 5
weights = 1.0 / (df_key.age.value_counts().sort_index().to_numpy())**0.25
y_pred = []
y_true = []

for i in tqdm(range(len(df))):
    
    y_true.append(df.iloc[i]['age'])
    
    a = [0, 0, 0]
    
    img = Image.open(df.iloc[i]['path'])
    img = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        queries = model.eff(img).cpu().numpy()

    groups_idx = model.query(queries, n_neighbors)[0]

    for idx in groups_idx:
        pred_idx = df_key['age'].iloc[idx]
        a[pred_idx] += 1

    y_pred.append(np.argmax(a))

f1 = f1_score(y_true, y_pred, average='macro')
acc = accuracy_score(y_true, y_pred)

print(f'acc:{acc:.4f}, f1:{f1:.4f}')

100%|██████████| 15120/15120 [04:53<00:00, 51.57it/s]

acc:0.4329, f1:0.3266





In [None]:
# fig, axes = plt.subplots(1, n + 1, figsize=(15, 5))
# axes[0].imshow(img_pil)
# axes[0].axis('off')
# axes[0].set_title('query')

# for ax, i in zip(axes[1:], groups_idx):
#     img = Image.open(df_key.iloc[i]['path'])
#     ax.imshow(img)
#     ax.axis('off')
#     ax.set_title(df_key.iloc[i]['gender'])

# plt.show()