In [8]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from timeit import default_timer as timer
from datetime import timedelta
import pandas as pd

plt.ion()   # interactive mode

In [9]:
directory = 'C:\\Data_Competitions\\Facebook image matching\\FB_image_matching_competition\\'
data_directory = directory + 'data\\'
training_image_path = data_directory + 'training_images\\'
ref_image_path = data_directory + 'reference_images\\'
query_image_path = data_directory + 'query_images\\'
ground_truth_csv = directory + 'public_ground_truth.csv'
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [10]:
from resnet18.FBImageMatchingDataset import FBImageMatchingDataset
from triplet_loss.resnet_triplet import Resnet18Triplet 

dataset_ref = FBImageMatchingDataset(ref_image_path, transforms = data_transforms['val']) 
dataloader_ref = torch.utils.data.DataLoader(dataset_ref, batch_size=2, shuffle=True, num_workers=2)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
resnet_model = Resnet18Triplet(pretrained=True)

detect 1000000 jpg images under directory C:\Data_Competitions\Facebook image matching\FB_image_matching_competition\data\reference_images\
built dataset with 1000000 entries
cuda:0


In [11]:
# load model
resume_path = directory + 'triplet_loss\\resnet18_semihard24.pt'
checkpoint = torch.load(resume_path)
resnet_model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [6]:
resnet_model

Resnet18Triplet(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, tra

In [12]:
def get_image_embedding(model, dataloader, dataset, device):
    images_so_far = 0
    tt_number_images = len(dataset)
    model.to(device)
    model.eval()
    start = timer() #type:float, current system time since program(e.g:jupyter) start
    result = {}
    with torch.no_grad():
        for i, (images, idxs) in enumerate(dataloader):
        # Note the dataloader combine multiple imputs into (tuple,tuple)
            inputs = images.to(device)
            outputs = model(inputs)  # (512, 1) tensor
            for idx, output in zip(idxs, outputs):
                image_id = dataset.get_image_id(idx.item())
                embedding = output.detach().cpu().numpy()  # shape = (512,)
                images_so_far += 1
                result[image_id] = embedding
            
            # output the progress
            if images_so_far % 5000 == 0:
                end = timer()
                anticipate_remain = (end - start) * (tt_number_images - images_so_far) / images_so_far
                print("{} : processed {} so far, remain time {}".format(timedelta(seconds=end-start), images_so_far, timedelta(seconds=anticipate_remain)))        
    return result

In [13]:
# inferencing
from sys import getsizeof
result = get_image_embedding(resnet_model, dataloader_ref, dataset_ref, device)

print(len(result))
print(getsizeof(result))

0:00:35.318397 : processed 5000 so far, remain time 1:57:08.360963
0:01:08.310061 : processed 10000 so far, remain time 1:52:42.696079
0:01:41.330639 : processed 15000 so far, remain time 1:50:54.045288
0:02:14.313510 : processed 20000 so far, remain time 1:49:41.361980
0:02:47.613330 : processed 25000 so far, remain time 1:48:56.919874
0:03:20.732494 : processed 30000 so far, remain time 1:48:10.350652
0:03:53.660674 : processed 35000 so far, remain time 1:47:22.358583
0:04:26.982695 : processed 40000 so far, remain time 1:46:47.584678
0:05:00.870632 : processed 45000 so far, remain time 1:46:25.143406
0:05:37.460463 : processed 50000 so far, remain time 1:46:51.748789
0:06:14.379181 : processed 55000 so far, remain time 1:47:12.515022
0:06:51.165227 : processed 60000 so far, remain time 1:47:21.588550
0:07:28.905959 : processed 65000 so far, remain time 1:47:37.339571
0:08:05.184009 : processed 70000 so far, remain time 1:47:26.016125
0:08:41.643612 : processed 75000 so far, remain t

In [14]:
import pandas as pd
start = timer()
df = pd.DataFrame(result.items())
df = df.rename(columns={0:'image_id',1:'embedding'}).set_index('image_id')
df.to_csv('triplet_loss/ref_embedding_semihard24.csv')
end = timer()
print("finish saving, takes: {}".format(timedelta(seconds=end-start)))

finish saving, takes: 0:40:22.783435
