In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.autograd import Variable
from PIL import Image

In [16]:
pic_one = str('/content/26.jpg')
pic_two = str('/content/img_105.jpg')

In [3]:
# Load the pretrained model
model = models.resnet18(pretrained=True)
# Use the model object to select the desired layer
layer = model._modules.get('avgpool')

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 216MB/s]


In [4]:
# Set model to evaluation mode
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [5]:
scaler = transforms.Resize((224, 224))
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
to_tensor = transforms.ToTensor()

In [6]:
def get_vector(image_name):
    # 1. Load the image with Pillow library
    img = Image.open(image_name)
    # 2. Create a PyTorch Variable with the transformed image
    t_img = Variable(normalize(to_tensor(scaler(img))).unsqueeze(0))
    # 3. Create a vector of zeros that will hold our feature vector
    #    The 'avgpool' layer has an output size of 512
    my_embedding = torch.zeros(512)
    # 4. Define a function that will copy the output of a layer
    def copy_data(m, i, o):
        my_embedding.copy_(o.data.reshape(o.data.size(1)))
    # 5. Attach that function to our selected layer
    h = layer.register_forward_hook(copy_data)
    # 6. Run the model on our transformed image
    model(t_img)
    # 7. Detach our copy function from the layer
    h.remove()
    # 8. Return the feature vector
    return my_embedding

In [18]:
pic_one_vector = get_vector(pic_one)
#pic_two_vector = get_vector(pic_two)

In [15]:
pic_one_vector


tensor([1.0401e+00, 3.9349e-01, 7.6473e-01, 1.0625e-01, 1.4177e-01, 1.0908e-01,
        1.0564e+00, 4.7352e+00, 2.6898e-02, 3.9582e+00, 2.2217e-01, 9.3210e-02,
        0.0000e+00, 5.9293e-02, 5.0496e-02, 2.5217e+00, 6.6857e-01, 1.4738e+00,
        8.8085e-03, 1.3152e+00, 1.0413e-03, 2.1327e-01, 3.6598e-01, 2.3528e+00,
        6.4570e-03, 1.5999e+00, 8.9942e-02, 1.4803e+00, 2.2670e-01, 1.6051e+00,
        1.3308e-01, 7.0444e-02, 5.5934e-02, 2.1423e-01, 1.5822e+00, 1.5061e+00,
        1.8242e-01, 4.5989e-02, 5.0539e-02, 5.4030e+00, 5.0134e-01, 2.4174e-01,
        1.6655e-01, 1.7960e-01, 7.1745e-02, 1.2742e+00, 3.9478e-02, 1.3803e+00,
        7.5369e-01, 2.1739e-02, 0.0000e+00, 2.1307e+00, 2.6124e-01, 1.1327e+00,
        4.9237e-01, 1.0753e+00, 3.4015e+00, 1.2134e-02, 1.4705e+00, 1.2073e+00,
        7.2657e-01, 2.0881e-01, 4.2857e-01, 1.3095e+00, 5.9206e-02, 2.1506e+00,
        8.2775e-02, 1.7095e-01, 4.3121e-01, 9.1391e-01, 3.6785e-02, 2.1096e+00,
        2.7961e-01, 1.4101e-01, 5.7482e-

In [9]:
# Using PyTorch Cosine Similarity
cos = nn.CosineSimilarity(dim=1, eps=1e-6)
cos_sim = cos(pic_one_vector.unsqueeze(0),
              pic_two_vector.unsqueeze(0))
print('\nCosine similarity: {0}\n'.format(cos_sim))


Cosine similarity: tensor([1.])



In [8]:
pic_one_vector

tensor([1.0401e+00, 3.9349e-01, 7.6473e-01, 1.0625e-01, 1.4177e-01, 1.0908e-01,
        1.0564e+00, 4.7352e+00, 2.6898e-02, 3.9582e+00, 2.2217e-01, 9.3210e-02,
        0.0000e+00, 5.9293e-02, 5.0496e-02, 2.5217e+00, 6.6857e-01, 1.4738e+00,
        8.8085e-03, 1.3152e+00, 1.0413e-03, 2.1327e-01, 3.6598e-01, 2.3528e+00,
        6.4570e-03, 1.5999e+00, 8.9942e-02, 1.4803e+00, 2.2670e-01, 1.6051e+00,
        1.3308e-01, 7.0444e-02, 5.5934e-02, 2.1423e-01, 1.5822e+00, 1.5061e+00,
        1.8242e-01, 4.5989e-02, 5.0539e-02, 5.4030e+00, 5.0134e-01, 2.4174e-01,
        1.6655e-01, 1.7960e-01, 7.1745e-02, 1.2742e+00, 3.9478e-02, 1.3803e+00,
        7.5369e-01, 2.1739e-02, 0.0000e+00, 2.1307e+00, 2.6124e-01, 1.1327e+00,
        4.9237e-01, 1.0753e+00, 3.4015e+00, 1.2134e-02, 1.4705e+00, 1.2073e+00,
        7.2657e-01, 2.0881e-01, 4.2857e-01, 1.3095e+00, 5.9206e-02, 2.1506e+00,
        8.2775e-02, 1.7095e-01, 4.3121e-01, 9.1391e-01, 3.6785e-02, 2.1096e+00,
        2.7961e-01, 1.4101e-01, 5.7482e-