## Simple Image Retrieval network in Pytorch

In [1]:
# Import libraries 
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np
from numpy import linalg as LA
import torchvision
import torch.nn.functional as F
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import time
from torchvision import transforms
import copy
import os
import h5py
from skimage.transform import rescale, resize, downscale_local_mean
from PIL import Image
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
# Create the network to extract the features
class MyResNetFeatureExtractor(nn.Module):
    def __init__(self, resnet, transform_input=False):
        super(MyResNetFeatureExtractor, self).__init__()
        self.transform_input = transform_input
        self.conv1 = resnet.conv1
        self.bn1 = resnet.bn1
        self.maxpool = resnet.maxpool
        self.layer1 = resnet.layer1
        self.layer2 = resnet.layer2
        self.layer3 = resnet.layer3
        self.layer4 = resnet.layer4
        # self.fc = resnet.fc
        # stop where you want, copy paste from the model def

    def forward(self, x):
        if self.transform_input:
            x = x.clone()
            x[0] = x[0] * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
            x[1] = x[1] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
            x[2] = x[2] * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
        # 299 x 299 x 3
        x = self.conv1(x)
        # 149 x 149 x 32
        x = self.bn1(x)
        x = F.relu(x)
        x = self.maxpool(x)
        # 147 x 147 x 32
        x = self.layer1(x)
        # 147 x 147 x 64
        x = self.layer2(x)
        # 73 x 73 x 64
        x = self.layer3(x)
        x = self.layer4(x)
        x = F.avg_pool2d(x, kernel_size=7, stride=7)

        return x

In [3]:
# Import pre-trained model from by using torchvision package
model = torchvision.models.resnet50(pretrained = True) # resnet 50 model is imported

# set the model train False since we are using our feature extraction network 
model.train(False)

# Set our model with pre-trained model 
my_resnet = MyResNetFeatureExtractor(model).cuda()


In [4]:
# read images training images from the directory
root = './index/'
list_imgs_names = os.listdir(root)
list_imgs_names

['BigBen09.jpg',
 'Herend08.jpg',
 'Cathedral08.jpg',
 'Cathedral05.jpg',
 'BigBen05.jpg',
 'None10.jpg',
 'BrandenburgGate03.jpg',
 'CommandoMemorial10.jpg',
 'NotreDameDesAanges03.jpg',
 'BrandenburgGate10.jpg',
 'AmidaBuddha.jpg',
 'BigBen04.jpg',
 'None02.jpg',
 'BigBen01.jpg',
 'GuildHouses08.jpg',
 'Basilique08.jpg',
 'Herend04.jpg',
 'Cathedral07.jpg',
 'Herend.jpg',
 'BigBen03.jpg',
 'BrandenburgGate02.jpg',
 'None04.jpg',
 'None03.jpg',
 'None06.jpg',
 'Cathedral04.jpg',
 'AmidaBuddha10.jpg',
 'AmidaBuddha04.jpg',
 'CommandoMemorial07.jpg',
 'Basilique01.jpg',
 'BrandenburgGate05.jpg',
 'Basilique04.jpg',
 'Herend07.jpg',
 'NotreDameDesAanges04.jpg',
 'AmidaBuddha02.jpg',
 'Cathedral06.jpg',
 'GuildHouses05.jpg',
 'CommandoMemorial09.jpg',
 'Cathedral02.jpg',
 'None07.jpg',
 'Basilique.jpg',
 'NotreDameDesAanges10.jpg',
 'NotreDameDesAanges05.jpg',
 'BrandenburgGate01.jpg',
 'CommandoMemorial08.jpg',
 'Basilique06.jpg',
 'CommandoMemorial04.jpg',
 'GuildHouses02.jpg',
 'BigBen

In [None]:
print("The number of train images:",len(list_imgs_names))

###  Feature extraction with CNN 

In [5]:
since = time.time()
# read images images from a directory
root = './index/'
list_imgs_names = os.listdir(root)
list_imgs_names
# create an array to store features 
N = len(list_imgs_names)
fea_all = np.zeros((N, 2048))
# define empy array to store image names
image_all = []
# extract features 
for ind, img_name in enumerate(list_imgs_names):
    #print(img_name)
    img_path = os.path.join(root, img_name)
    image_np = Image.open(img_path)
    image_np = np.array(image_np)
    image_np = resize(image_np, (224, 224))
    image_np = torch.from_numpy(image_np).permute(2, 0, 1).float()
    image_np = Variable(image_np.unsqueeze(0))   #bs, c, h, w
    image_np = image_np.cuda()
    fea = my_resnet(image_np)
    fea = fea.squeeze()
    fea = fea.cpu().data.numpy()
    fea = fea.reshape((1, 2048))
    fea = fea / LA.norm(fea)
    fea_all[ind] = fea
    image_all.append(img_name)

time_elapsed = time.time() - since 

print('Feature extraction complete in {:.02f}s'.format(time_elapsed % 60))

Feature extraction complete in 4.01s


### Saving extracted features as a model 

In [58]:
fea_all = np.array(fea_all)
img_all = np.array(image_all)
h5f = h5py.File('featsss.h5', 'w')
h5f.create_dataset('dataset_1', data=fea_all)
#h5f.create_dataset('dataset_2', data=img_all)

img_all = [n.encode("ascii", "ignore") for n in image_all]
h5f.create_dataset('dataset_2', (len(img_all),1), data=img_all)

h5f.close()


In [59]:
h5f = h5py.File('featsss.h5', 'r')
fea_all = h5f['dataset_1'][:]
im_all = h5f['dataset_2'][:]
h5f.close()

In [60]:
a = im_all[1][0].decode('utf-8')
a




'Herend08.j'

In [10]:
fea_all = np.array(fea_all)
h5f = h5py.File('featsCNNNNm.h5', 'w')
h5f.create_dataset('dataset_1', data=fea_all)
h5f.create_dataset('dataset_2', data=image_all)
h5f.close()

TypeError: No conversion path for dtype: dtype('<U24')

In [None]:
fea_all = np.array(fea_all)
scores  = np.dot(fea_all, fea_all.T)
sort_ind = np.argsort(scores)[0][::-1]
scores = scores[0, sort_ind]


In [None]:
maxres = 10
imlist = [image_all[index] for i,index in enumerate(sort_ind[0:maxres])]
print ("top %d images in order are: " %maxres, imlist)

fig = plt.figure()

for i in range(len(imlist)):
    sample = imlist[i]
    img = mpimg.imread('./index' + '/' + sample)
    ax = plt.subplot(2, 5, i+1)
    ax.autoscale()
    #plt.tight_layout()
    plt.imshow(img, interpolation='nearest')
    ax.set_title('{:.3f}%'.format(scores[i]))
    ax.axis('off')
plt.show()