In [1]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from timeit import default_timer as timer
from datetime import timedelta
import pandas as pd

plt.ion()   # interactive mode

<matplotlib.pyplot._IonContext at 0x24302b0c760>

In [2]:
directory = 'C:\\Data_Competitions\\Facebook image matching\\FB_image_matching_competition\\'
data_directory = directory + 'data\\'
training_image_path = data_directory + 'training_images\\'
ref_image_path = data_directory + 'reference_images\\'
query_image_path = data_directory + 'query_images\\'
ground_truth_csv = directory + 'public_ground_truth.csv'
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [3]:
from resnet18.FBImageMatchingDataset import FBImageMatchingDataset
from triplet_loss.resnet_triplet import Resnet18Triplet 

dataset_ref = FBImageMatchingDataset(ref_image_path, transforms = data_transforms['val']) 
dataloader_ref = torch.utils.data.DataLoader(dataset_ref, batch_size=2, shuffle=True, num_workers=2)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
resnet_model = Resnet18Triplet(pretrained=False)

detect 1000000 jpg images under directory C:\Data_Competitions\Facebook image matching\FB_image_matching_competition\data\reference_images\
built dataset with 1000000 entries
cuda:0


In [4]:
# load model
resume_path = directory + 'triplet_loss\\resnet18_semihard48.pt'
checkpoint = torch.load(resume_path)
resnet_model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [6]:
resnet_model

Resnet18Triplet(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, tra

In [5]:
def get_image_embedding(model, dataloader, dataset, device):
    images_so_far = 0
    tt_number_images = len(dataset)
    model.to(device)
    model.eval()
    start = timer() #type:float, current system time since program(e.g:jupyter) start
    result = {}
    with torch.no_grad():
        for i, (images, idxs) in enumerate(dataloader):
        # Note the dataloader combine multiple imputs into (tuple,tuple)
            inputs = images.to(device)
            outputs = model(inputs)  # (512, 1) tensor
            for idx, output in zip(idxs, outputs):
                image_id = dataset.get_image_id(idx.item())
                embedding = output.detach().cpu().numpy()  # shape = (512,)
                images_so_far += 1
                result[image_id] = embedding
            
            # output the progress
            if images_so_far % 5000 == 0:
                end = timer()
                anticipate_remain = (end - start) * (tt_number_images - images_so_far) / images_so_far
                print("{} : processed {} so far, remain time {}".format(timedelta(seconds=end-start), images_so_far, timedelta(seconds=anticipate_remain)))        
    return result

In [6]:
# inferencing
from sys import getsizeof
result = get_image_embedding(resnet_model, dataloader_ref, dataset_ref, device)

print(len(result))
print(getsizeof(result))

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


0:00:38.088623 : processed 5000 so far, remain time 2:06:19.635897
0:01:13.269518 : processed 10000 so far, remain time 2:00:53.682292
0:01:48.887073 : processed 15000 so far, remain time 1:59:10.251147
0:02:24.751963 : processed 20000 so far, remain time 1:58:12.846207
0:02:59.768993 : processed 25000 so far, remain time 1:56:50.990743
0:03:34.344582 : processed 30000 so far, remain time 1:55:30.474805
0:04:08.847933 : processed 35000 so far, remain time 1:54:21.093007
0:04:43.366397 : processed 40000 so far, remain time 1:53:20.793523
0:05:17.873865 : processed 45000 so far, remain time 1:52:25.989797
0:05:52.400776 : processed 50000 so far, remain time 1:51:35.614752
0:06:27.585471 : processed 55000 so far, remain time 1:50:59.423094
0:07:02.559446 : processed 60000 so far, remain time 1:50:20.097989
0:07:37.545679 : processed 65000 so far, remain time 1:49:41.618615
0:08:12.233802 : processed 70000 so far, remain time 1:48:59.677649
0:08:47.692446 : processed 75000 so far, remain t

In [7]:
import pandas as pd
start = timer()
df = pd.DataFrame(result.items())
df = df.rename(columns={0:'image_id',1:'embedding'}).set_index('image_id')
df.to_csv('triplet_loss/ref_embedding_semihard48.csv')
end = timer()
print("finish saving, takes: {}".format(timedelta(seconds=end-start)))

finish saving, takes: 0:43:58.239993


In [7]:
import numpy as np
def convert_df_to_np(df):
    return np.stack(df.iloc[:,0]).astype('float32') # faiss only works with float32

In [4]:
import pandas as pd
def converter(instr):
    return np.fromstring(instr[1:-1],sep=' ')
df_query = pd.read_csv('triplet_loss//query_embedding_semihard48.csv',converters={'embedding':converter})
df_ref = pd.read_csv('triplet_loss//ref_embedding_semihard48.csv',converters={'embedding':converter})
df_ref = df_ref.rename(columns={0:'image_id',1:'embedding'}).set_index('image_id')
df_query = df_query.rename(columns={0:'image_id',1:'embedding'}).set_index('image_id')
df_ref.sort_index(inplace=True)
df_query.sort_index(inplace=True)

In [None]:
# using the descriptor track 
from eval_metrics_script.eval_metrics import get_matching_from_descs, evaluate_metrics
import json

query = convert_df_to_np(df_query)
ref = convert_df_to_np(df_ref)
gt_df = pd.read_csv(ground_truth_csv)

qry_ids = ['Q' + str(x).zfill(5) for x in range(50_000)]
ref_ids = ['R' + str(x).zfill(6) for x in range(1_000_000)]
submission_df = get_matching_from_descs(query, ref, qry_ids, ref_ids, gt_df)
ap, rp90 = evaluate_metrics(submission_df, gt_df)

print(json.dumps(
            {
                "average_precision": ap,
                "recall_p90": rp90,
            },
            indent=2,
        )
    ) 

In [None]:
submission_df.to_csv("triplet_loss/submission_semihard48.csv")