In [2]:
!pwd

/opt/ml/image-classification-level1-04


In [1]:
from pytorch_metric_learning.utils.inference import MatchFinder, InferenceModel
from pytorch_metric_learning.distances import CosineSimilarity
from pytorch_metric_learning.utils import common_functions as c_f
from model.model import PretrainModelTimmArc
from data_loader.datasets import MaskDataset, MaskSubDataset, MaskSubmitDataset
from data_loader.data_loaders import MaskDataLoader
from torchvision import transforms
import torchvision
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import matplotlib.pyplot as plt
import numpy as np

# Create the datset and load the trained model

In [2]:

data_loader_args = {
    "data_dir": "../input/data",
    "batch_size": 32,
    "shuffle": False,
    "validation_split": 0.1,
    "num_workers": 2,
    "submit": True,
    "sampler": "no"
}
train_data_loader, valid_data_loader, submit_data_loader = MaskDataLoader(**data_loader_args).split_validation()

Current transforms : None
num_workers:  2
No sampling method




In [3]:
train_dataset = train_data_loader.dataset
valid_dataset = valid_data_loader.dataset
submit_dataset = submit_data_loader.dataset

In [4]:
model = PretrainModelTimmArc()
checkpoint = torch.load("/opt/ml/image-classification-level1-04/model_best.pth")
model.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [5]:
# model = torch.nn.DataParallel(model)
print("done model loading")

done model loading


---

# Evaluate Validation set based on Train set

In [6]:
from pytorch_metric_learning import losses, miners, distances, reducers, testers
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator

In [7]:
device = torch.device("cuda")
model.to(device)

PretrainModelTimmArc(
  (model): EfficientNet(
    (conv_stem): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): SiLU(inplace=True)
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
          (bn1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act1): SiLU(inplace=True)
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv_pw): Conv2d(40, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(24, eps=1e-05, momentum=0.1, 

In [8]:
def get_all_embeddings(dataset, model):
    tester = testers.BaseTester(normalize_embeddings=True,
                    use_trunk_output=False,
                    batch_size=64,
                    dataloader_num_workers=4,
                    pca=None,
                    data_device=device,
                    dtype=None,
                    data_and_label_getter=None,
                    label_hierarchy_level=0,
                    end_of_testing_hook=None,
                    dataset_labels=None,
                    set_min_label_to_zero=False,
                    accuracy_calculator=None,
                    visualizer=None,
                    visualizer_hook=None,)
    return tester.get_all_embeddings(dataset, model)

In [9]:
def test(train_set, test_set, model, accuracy_calculator):
    train_embeddings, train_labels = get_all_embeddings(train_set, model)
    print(train_labels)
    test_embeddings, test_labels = get_all_embeddings(test_set, model)
    train_labels = train_labels.squeeze(1)
    test_labels = test_labels.squeeze(1)
    print("Computing accuracy")
    accuracies = accuracy_calculator.get_accuracy(test_embeddings, 
                                                train_embeddings,
                                                test_labels,
                                                train_labels,
                                                False)
    print("Test set accuracy (Precision@1) = {}".format(accuracies["precision_at_1"]))


In [10]:
accuracy_calculator = AccuracyCalculator(include = ("precision_at_1",), k = 1)

In [11]:
test(train_dataset, valid_dataset, model, accuracy_calculator)

100%|██████████| 266/266 [00:33<00:00,  8.04it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

tensor([[ 1],
        [10],
        [ 4],
        ...,
        [ 4],
        [ 4],
        [ 0]], device='cuda:0')


100%|██████████| 30/30 [00:04<00:00,  6.49it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.9915344051551074


---

# Inference on the Submit Dataset

In [12]:
submit_dataset = submit_data_loader.dataset

## Create helper functions

In [13]:
def print_decision(is_match):
    if is_match:
        print("Same class")
    else:
        print("Different class")

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

inv_normalize = transforms.Normalize(
   mean= [-m/s for m, s in zip(mean, std)],
   std= [1/s for s in std]
)

def imshow(img, figsize=(8, 4)):
    img = inv_normalize(img)
    npimg = img.numpy()
    plt.figure(figsize = figsize)
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [14]:
labels_to_indices = c_f.get_labels_to_indices(train_dataset.get_labels().values)

In [15]:
labels_to_indices

defaultdict(list,
            {1: array([    0,    15,    24, ..., 16979, 16994, 16999]),
             10: array([    1,     8,    30,    40,    92,   138,   146,   190,   197,
                      206,   211,   281,   286,   288,   291,   298,   330,   333,
                      334,   374,   450,   459,   473,   479,   497,   539,   551,
                      559,   614,   657,   704,   717,   732,   756,   759,   789,
                      796,   807,   849,   850,   854,   864,   867,   908,   929,
                      961,   977,  1036,  1074,  1076,  1107,  1115,  1136,  1146,
                     1158,  1228,  1231,  1274,  1294,  1309,  1322,  1336,  1491,
                     1499,  1506,  1562,  1572,  1581,  1600,  1609,  1616,  1635,
                     1683,  1722,  1777,  1783,  1883,  1943,  1970,  1994,  2005,
                     2020,  2049,  2064,  2066,  2100,  2114,  2120,  2128,  2161,
                     2174,  2205,  2213,  2221,  2227,  2299,  2316,  2349, 

In [16]:
dataset = train_data_loader.dataset

In [17]:
model = PretrainModelTimmArc()
checkpoint = torch.load("/opt/ml/image-classification-level1-04/model_best.pth")
model.load_state_dict(checkpoint['state_dict'])
model = torch.nn.DataParallel(model)
model.to(torch.device("cuda"))

DataParallel(
  (module): PretrainModelTimmArc(
    (model): EfficientNet(
      (conv_stem): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): SiLU(inplace=True)
      (blocks): Sequential(
        (0): Sequential(
          (0): DepthwiseSeparableConv(
            (conv_dw): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
            (bn1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (act1): SiLU(inplace=True)
            (se): SqueezeExcite(
              (conv_reduce): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
              (act1): SiLU(inplace=True)
              (conv_expand): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
              (gate): Sigmoid()
            )
            (conv_pw): Conv2d(40, 24, kernel_size=(1, 1), stride=(1, 1), bias=False

In [18]:
# initialize with a model
match_finder = MatchFinder(distance=CosineSimilarity(), threshold=0.7)
inference_model = InferenceModel(model, match_finder=match_finder)

In [19]:
# create faiss index
# pass in a dataset to serve as the search space for k-nn
# It take's long time (3m on V100)
inference_model.train_indexer(dataset)

In [20]:
from tqdm import tqdm

In [21]:
train_data_loader, valid_data_loader, submit_data_loader = MaskDataLoader(**data_loader_args).split_validation()

Current transforms : None
num_workers:  2
No sampling method




In [22]:
batch_size = submit_data_loader.batch_size
n_last_samples = len(submit_data_loader.dataset) - (len(submit_data_loader) - 1) * batch_size

In [23]:
def imshow(img, figsize=(8, 4)):
    img = inv_normalize(img)
    npimg = img.numpy()
    plt.figure(figsize = figsize)
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [24]:
df_submit = submit_data_loader.dataset.df
predicts = np.zeros(len(df_submit))

In [25]:

# df_submit['ans']
for i, img in tqdm(enumerate(submit_data_loader)):
    # print("query image")
    # imshow(torchvision.utils.make_grid(img))
    samples, _ = inference_model.get_nearest_neighbors(img, k=1)
    # nearest_imgs = [dataset[i][0] for i in samples.flatten()]
    # print("nearest images")
    # nearest_imgs = [dataset[i][0] for i in samples.flatten()]
    # nearest_classes = [dataset[i][1].item() for i in samples.flatten()]
    if len(img) == batch_size:
        predicts[i*batch_size:(i+1)*batch_size] = np.array([dataset[s][1].item() for s in samples.flatten()])
    else:
        predicts[-n_last_samples:] = np.array([dataset[s][1].item() for s in samples.flatten()])

    # nearest_classes = [[dataset[i][1].item() for i in sample ] for sample in samples]
    # imshow(torchvision.utils.make_grid(nearest_imgs))
    # print(nearest_classes)
    # break

394it [02:11,  3.01it/s]


In [26]:
df_submit['ans'] = predicts.reshape(-1, 1)

In [27]:
df_submit['ans'] = df_submit['ans'].astype(int)

In [38]:
import pandas as pd
df_best = pd.read_csv('/opt/ml/image-classification-level1-04/submission_EfficientNet_b3_Cutout_Elastic_Transform_0831_144053_checkpoint-epoch7.pth.csv')
df_last = pd.read_csv('/opt/ml/image-classification-level1-04/ArcMarginLoss_LabelSmoothing.csv')

In [39]:
(df_best['ans'] == df_submit['ans']).mean()

0.8912698412698413

In [40]:
(df_last['ans'] == df_submit['ans']).mean()

0.8901587301587301

In [63]:
(df_best['ans'] != df_submit['ans']).mean()

0.8910317460317461

In [41]:
df_submit[df_best['ans'] != df_submit['ans']]['ans'].value_counts()

2     397
1     241
5     123
13    111
4     110
7     100
3      69
8      39
11     29
0      28
17     25
10     24
14     21
15     19
9      17
16     13
6       2
12      2
Name: ans, dtype: int64

In [49]:
df_submit['ans'].value_counts()

0     2305
1     1958
4     1753
3     1515
2     1145
7      482
13     462
12     453
6      453
10     330
16     329
15     319
9      317
5      299
14     173
8      172
11      69
17      66
Name: ans, dtype: int64

In [32]:
test_dir = '/opt/ml/input/data/eval'
image_dir = os.path.join(test_dir, 'images')

In [33]:
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))

In [37]:
df_submit[df_best['ans'] != df_submit['ans']]['ans'].value_counts()

2     397
1     241
5     123
13    111
4     110
7     100
3      69
8      39
11     29
0      28
17     25
10     24
14     21
15     19
9      17
16     13
6       2
12      2
Name: ans, dtype: int64

In [53]:
df_submit[(df_best['ans'] != df_submit['ans']) & (df_submit['ans'] == 4)]

Unnamed: 0,ImageID,ans
60,01ed60df34a15534443de81f062bfcdd9b373f8b.jpg,4
119,18d1611138d046b92058e43bd2766904ae948a87.jpg,4
264,a7f53f3fb1bef7f99de46b06e0da73b8c86f8ea6.jpg,4
519,49900d73ebc4e7c11de80bcb8388df61160c3f77.jpg,4
663,e571d2d2984ca45867d74ed2f5614ca6755d02ed.jpg,4
...,...,...
12184,27f3d78a161b1c4ef4070d2f899dd746e568523b.jpg,4
12241,a74ec32ee6a46d1cc7046f46e418affdf9912562.jpg,4
12245,a66bc8ad6f53ef775b5630cf3a8d871721c9a7c0.jpg,4
12291,fd230e44677d11d833f1f124f3cfd1b27e566f4a.jpg,4


In [54]:
df_submit.to_csv("ArcMarginLoss_FocalLoss.csv")

In [94]:
df_submit.to_csv("ArcMarginLoss_LabelSmoothing.csv")

In [77]:
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]

In [80]:
img_set = set(df_submit[df_best['ans'] != df_submit['ans']]['ImageID'].values)

In [89]:
img_set

{'6f0f49f0db2c806bcada623fda06624e065b8399.jpg',
 'ab7f1203f524ca4c189bd28fd12e3d2b29d72f91.jpg',
 'd805e928df69a60d86e6a8b065ec50d32351f1c2.jpg',
 'bc2ac641f3ca7ff36c99d79201066437f3ad5c3c.jpg',
 'cf35106e1b4f9f6c7c9387e15c345e713a54d95b.jpg',
 '5239f6b4a07f3d971ed34631ae940080fe18c358.jpg',
 'bbd595b4fb6224d770081051c162e8ac0e8c4468.jpg',
 '27ced423e752439774c023af444aed7b7ee3d68e.jpg',
 '43921d0ca4a6f3c25ea4ac2fe8b7018202354c80.jpg',
 '37ea6c80eab05dfaa49fc0a7e48c9f6aa6c1306c.jpg',
 '274eaf13b3b824df0ad899f832633f83ca9ecacd.jpg',
 'a43df2551a7ae20937a97b419fb1ae7299251170.jpg',
 '5f48ba87d1650004d45901fb4af40afe830dc40b.jpg',
 '84679bdc8dd573f0d97e8f8130661da2f303de8f.jpg',
 'db24141f383627414caa052e491354ca01551f8c.jpg',
 'db5e7887deef2b0178adeb0b7f51e2041aba495d.jpg',
 'a3edf746b0c2512bf6b70cab6913f4a02dfe47db.jpg',
 'f77d7059181cd4cbd2c17fa578eb021711236932.jpg',
 '8fe1c1ad9f3208f3cac5a97b74a3962d03a6abcd.jpg',
 'cff048841c8e64c482568a087c5c8f06413f2a79.jpg',
 '5e87c2ba9fec9ed9d3

In [81]:
image_paths_tochk = [ i for i in image_paths if i in img_set]

In [83]:
from PIL import Image

In [87]:
for path in image_paths_tochk:
    img = np.array(Image.open(path).convert('RGB'))
    plt.imshow(img)
    plt.show()
    break


In [None]:
# get the 10 nearest neighbors of a query
for query in sumbit_data_loader:
    indices, distances = im.get_nearest_neighbors(query, k=10)

## Get nearest neighbors of a query

In [None]:
# get 10 nearest neighbors for a car image
for img_type in [classA, classB]:
    img = dataset[img_type[0]][0].unsqueeze(0)
    print("query image")
    imshow(torchvision.utils.make_grid(img))
    indices, distances = inference_model.get_nearest_neighbors(img, k=10)
    nearest_imgs = [dataset[i][0] for i in indices[0]]
    print("nearest images")
    imshow(torchvision.utils.make_grid(nearest_imgs))

## Compare two images of the same class

In [None]:
# compare two images of the same class
(x, _), (y, _) = dataset[classA[0]], dataset[classA[1]]
imshow(torchvision.utils.make_grid(torch.stack([x,y], dim=0)))
decision = inference_model.is_match(x.unsqueeze(0), y.unsqueeze(0))
print_decision(decision)

## Compare two images of different classes

In [None]:
# compare two images of a different class
(x, _), (y, _) = dataset[classA[0]], dataset[classB[0]]
imshow(torchvision.utils.make_grid(torch.stack([x,y], dim=0)))
decision = inference_model.is_match(x.unsqueeze(0), y.unsqueeze(0))
print_decision(decision)

## Compare multiple pairs of images

In [None]:
# compare multiple pairs of images
x = torch.zeros(20, 3, 384, 384)
y = torch.zeros(20, 3, 384, 384)
for i in range(0, 20, 2):
    x[i] = dataset[classA[i]][0]
    x[i+1] = dataset[classB[i]][0]
    y[i] = dataset[classA[i+20]][0]
    y[i+1] = dataset[classB[i+20]][0]
imshow(torchvision.utils.make_grid(torch.cat((x,y), dim=0), nrow=20), figsize=(30, 3))
decision = inference_model.is_match(x, y)
for d in decision:
    print_decision(d)
print("accuracy = {}".format(np.sum(decision)/len(x)))

## Compare all pairs within a batch

In [None]:
# compare all pairs within a batch
match_matrix = inference_model.get_matches(x)
assert match_matrix[0,0] # the 0th image should match with itself
imshow(torchvision.utils.make_grid(torch.stack((x[3],x[4]), dim=0)))
print_decision(match_matrix[3,4]) # does the 3rd image match the 4th image?

In [None]:
# compare all pairs between queries and references
match_matrix = inference_model.get_matches(x, y)
imshow(torchvision.utils.make_grid(torch.stack((x[6],y[6]), dim=0)))
print_decision(match_matrix[6, 6]) # does the 6th query match the 6th reference?

In [None]:
# make a new model with high threshold
match_finder = MatchFinder(distance=CosineSimilarity(), threshold=0.95)
inference_model = InferenceModel(model, match_finder=match_finder)

# get all matches in tuple form
match_tuples = inference_model.get_matches(x, y, return_tuples=True)
print("MATCHING IMAGE PAIRS")
for i,j in match_tuples:
    print(i,j)
    imshow(torchvision.utils.make_grid(torch.stack((x[i],y[j]), dim=0)))