# Prototypes

In [1]:
# Colab Stuff
from google.colab import drive
drive.mount('/content/drive')
import os
os.listdir("./drive/MyDrive/img")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


['index.jpg']

# PyTorch

In [2]:
import torch
import torchvision
from torchvision.io import read_image

In [3]:
from torchvision.models import resnet50, ResNet50_Weights
weights = ResNet50_Weights.DEFAULT
model = resnet50(weights=weights)
model.eval()

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [4]:
preprocess = weights.transforms()

In [5]:
img = read_image("./drive/MyDrive/img/index.jpg")
img

tensor([[[225, 225, 225,  ..., 233, 233, 233],
         [225, 225, 225,  ..., 233, 233, 233],
         [225, 225, 225,  ..., 233, 233, 233],
         ...,
         [208, 208, 208,  ..., 155, 159, 161],
         [208, 208, 208,  ..., 155, 159, 161],
         [208, 208, 208,  ..., 155, 159, 161]],

        [[209, 209, 209,  ..., 227, 227, 227],
         [209, 209, 209,  ..., 227, 227, 227],
         [209, 209, 209,  ..., 227, 227, 227],
         ...,
         [189, 189, 189,  ...,  96, 100, 102],
         [189, 189, 189,  ...,  96, 100, 102],
         [189, 189, 189,  ...,  96, 100, 102]],

        [[196, 196, 196,  ..., 227, 227, 227],
         [196, 196, 196,  ..., 227, 227, 227],
         [196, 196, 196,  ..., 227, 227, 227],
         ...,
         [175, 175, 175,  ...,  54,  58,  58],
         [175, 175, 175,  ...,  54,  58,  58],
         [175, 175, 175,  ...,  54,  58,  58]]], dtype=torch.uint8)

In [6]:
image_tensor = preprocess(img).unsqueeze(0)
image_tensor

tensor([[[[ 1.7352,  1.7523,  1.7523,  ...,  1.8722,  1.8722,  1.8722],
          [ 1.7352,  1.7352,  1.7352,  ...,  1.8722,  1.8722,  1.8722],
          [ 1.7352,  1.7352,  1.7352,  ...,  1.8722,  1.8722,  1.8722],
          ...,
          [ 1.4783,  1.4783,  1.4783,  ...,  0.5364,  0.6221,  0.6563],
          [ 1.4783,  1.4783,  1.4783,  ...,  0.5022,  0.6049,  0.6392],
          [ 1.4612,  1.4612,  1.4612,  ...,  0.4679,  0.5536,  0.6221]],

         [[ 1.6232,  1.6408,  1.6408,  ...,  1.9384,  1.9384,  1.9384],
          [ 1.6232,  1.6232,  1.6232,  ...,  1.9384,  1.9384,  1.9384],
          [ 1.6232,  1.6232,  1.6232,  ...,  1.9384,  1.9384,  1.9384],
          ...,
          [ 1.3081,  1.3081,  1.3081,  ..., -0.3550, -0.2850, -0.2850],
          [ 1.3081,  1.3081,  1.3081,  ..., -0.3725, -0.3025, -0.2850],
          [ 1.2906,  1.2906,  1.2906,  ..., -0.3901, -0.3375, -0.3025]],

         [[ 1.6117,  1.6291,  1.6291,  ...,  2.1520,  2.1520,  2.1520],
          [ 1.6117,  1.6117,  

In [7]:
prediction = model(image_tensor).squeeze(0).softmax(0)
preds = [x.item() for x in prediction]
categories = weights.meta["categories"]
res_pt = [ {"score": el[1], "label": el[0] } for el in list(zip(weights.meta["categories"], preds))] 
res_pt = sorted(res_pt, key=lambda p: p["label"])
res_pt

[{'score': 0.00014131961506791413, 'label': 'Afghan hound'},
 {'score': 0.00048165678163059056, 'label': 'African chameleon'},
 {'score': 0.0005595835391432047, 'label': 'African crocodile'},
 {'score': 0.0007206276641227305, 'label': 'African elephant'},
 {'score': 0.0006230680155567825, 'label': 'African grey'},
 {'score': 0.0004835375293623656, 'label': 'African hunting dog'},
 {'score': 0.00041846948442980647, 'label': 'Airedale'},
 {'score': 0.0005643519689328969, 'label': 'American Staffordshire terrier'},
 {'score': 0.0006051490199752152, 'label': 'American alligator'},
 {'score': 0.00012603333743754774, 'label': 'American black bear'},
 {'score': 0.000616180885117501, 'label': 'American chameleon'},
 {'score': 0.0004896431928500533, 'label': 'American coot'},
 {'score': 0.0006429959321394563, 'label': 'American egret'},
 {'score': 0.000561753346119076, 'label': 'American lobster'},
 {'score': 0.0006787634920328856, 'label': 'Angora'},
 {'score': 0.00033066130708903074, 'label':

### Cosine Similarity

In [8]:
# Cosine sim with torch
cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
p = prediction.reshape(1, 1000)
print(cos(p, p).item())

0.9999997019767761


In [9]:
# Cosine sim with numpy
import numpy as np
def cosine_similarity(a, b):
  a, b = np.array(a), np.array(b)
  return np.dot(a,b) / ( np.linalg.norm(a) * np.linalg.norm(b) )

cosine_similarity(preds, preds)

1.0000000000000002

# Hugging Face

In [None]:
!pip install transformers 
!pip install datasets datasets[vision]

In [13]:
from transformers import pipeline
model = pipeline(task="image-classification", model="google/vit-base-patch16-224")

Downloading:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/346M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/160 [00:00<?, ?B/s]

In [14]:
res_hf = model("./drive/MyDrive/img/index.jpg", top_k=1000)
res_hf = sorted(res_hf, key=lambda p: p["label"])
res_hf

[{'score': 6.167036190163344e-05, 'label': 'Afghan hound, Afghan'},
 {'score': 6.970731647015782e-06,
  'label': 'African chameleon, Chamaeleo chamaeleon'},
 {'score': 1.1768658623623196e-05,
  'label': 'African crocodile, Nile crocodile, Crocodylus niloticus'},
 {'score': 5.614721521851607e-05,
  'label': 'African elephant, Loxodonta africana'},
 {'score': 3.037473106815014e-06,
  'label': 'African grey, African gray, Psittacus erithacus'},
 {'score': 4.610600808518939e-05,
  'label': 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus'},
 {'score': 0.0005311789573170245, 'label': 'Airedale, Airedale terrier'},
 {'score': 0.0009858707198873162,
  'label': 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier'},
 {'score': 4.616965270543005e-06,
  'label': 'American alligator, Alligator mississipiensis'},
 {'score': 9.217957085638773e-06,
  'label': 'American black bear, black bear, Ursus americanus, Euarctos americanus'},
 {'s

## Comparisons

Basic idea: extract all 1000 classes (label with probability) from an image. Compare that feature vector with feature vector from other images. 

Use cosine similarity as metric: 1 means both vectors are parallel (=cos(0°)), 0 means both vectors are orthogonal (=cos(90°))

In [15]:
el_pt = [x for x in res_pt if x["label"] in "golden retriever"]
el_hf = [x for x in res_hf if x["label"] in "golden retriever"]

In [16]:
el_pt

[{'score': 0.0972425788640976, 'label': 'golden retriever'}]

In [17]:
el_hf

[{'score': 0.4586021900177002, 'label': 'golden retriever'}]

In [19]:
features_pt = [x["score"] for x in res_pt]
features_hf = [x["score"] for x in res_hf]

tensor_pt = torch.FloatTensor(features_pt).reshape(1, 1000)
tensor_hf = torch.FloatTensor(features_hf).reshape(1, 1000)

In [22]:
%%timeit -r 1 -n 1
print(cos(tensor_pt, tensor_hf).item())    # torch

0.7526504397392273
6.81 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [23]:
%%timeit -r 1 -n 1
print(cosine_similarity(features_pt, features_hf))  # implemented with numpy

0.7526504339147044
1.7 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
