In [1]:
import json
import glob, os

# data_dir = os.getcwd()
IMAGES_DIR = os.path.join(os.getcwd(), "updated_images", "vizzy_images")
list_imgs = glob.glob(os.path.join(IMAGES_DIR, "/**/*.JPEG"))

from torchvision import datasets, transforms
import torch

In [2]:
tc = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor()              
])

In [3]:
image_datasets = datasets.ImageFolder(IMAGES_DIR, transform=tc)
dloader = torch.utils.data.DataLoader(image_datasets, batch_size=10, shuffle=False)
images = dloader.sampler.data_source.imgs #contains information on the path of the image

In [4]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)

Using cache found in /Users/lukemainwaring/.cache/torch/hub/pytorch_vision_v0.10.0


In [5]:
# Select the desired layer
layer = model._modules.get('avgpool')

In [6]:
def copy_embeddings(m, i, o):
    """Copy embeddings from the penultimate layer.
    """
    o = o[:, :, 0, 0].detach().numpy().tolist()
    outputs.append(o)

In [7]:
outputs = []
# attach hook to the penulimate layer
_ = layer.register_forward_hook(copy_embeddings)
model.eval() # Inference mode

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [8]:
# Generate image's embeddings for all images in dloader and saves 
# them in the list outputs
for X, y in dloader:
    _ = model(X)

In [9]:
# flatten list of embeddings to remove batches
list_embeddings = [item for sublist in outputs for item in sublist]

In [10]:
len(list_embeddings)

300

In [11]:
# Reduce dimensionality to 32
from sklearn.decomposition import TruncatedSVD

svd = TruncatedSVD(n_components=32, random_state=42)
image_embeddings_reduced = svd.fit_transform(list_embeddings)

In [12]:
image_embeddings_reduced

array([[22.46178911, -4.10716465, -2.89212444, ...,  1.46268801,
        -1.27287472,  1.3422883 ],
       [21.28943066, -5.98700833, -7.06602362, ..., -2.40693728,
        -1.27088574,  1.5257578 ],
       [21.03024859, -6.83107403, -0.29999491, ...,  0.4579964 ,
        -0.31609899,  2.32670187],
       ...,
       [19.03057534,  0.32610435,  3.08390332, ...,  0.79905242,
        -0.164118  ,  2.25945384],
       [20.62756837,  1.79797952,  2.29369479, ...,  1.15797056,
         0.22354295, -1.73505181],
       [22.11451417, -4.12899557,  9.0843392 , ...,  1.29073713,
        -0.97499853,  0.0385179 ]])

In [13]:
out = {}
with open('input_data_before_embeddings.json') as json_file:
    out = json.load(json_file)

In [14]:
out

{'ILSVRC2012_val_00000592': {'id': 'ILSVRC2012_val_00000592',
  'src': 'n02133161/ILSVRC2012_val_00000592.JPEG',
  'givenLabel': 'bear'},
 'ILSVRC2012_val_00000865': {'id': 'ILSVRC2012_val_00000865',
  'src': 'n02133161/ILSVRC2012_val_00000865.JPEG',
  'givenLabel': 'bear'},
 'ILSVRC2012_val_00000871': {'id': 'ILSVRC2012_val_00000871',
  'src': 'n02132136/ILSVRC2012_val_00000871.JPEG',
  'givenLabel': 'bear'},
 'ILSVRC2012_val_00001199': {'id': 'ILSVRC2012_val_00001199',
  'src': 'n02132136/ILSVRC2012_val_00001199.JPEG',
  'givenLabel': 'bear'},
 'ILSVRC2012_val_00002329': {'id': 'ILSVRC2012_val_00002329',
  'src': 'n02134418/ILSVRC2012_val_00002329.JPEG',
  'givenLabel': 'bear'},
 'ILSVRC2012_val_00003044': {'id': 'ILSVRC2012_val_00003044',
  'src': 'n02132136/ILSVRC2012_val_00003044.JPEG',
  'givenLabel': 'bear'},
 'ILSVRC2012_val_00004578': {'id': 'ILSVRC2012_val_00004578',
  'src': 'n02132136/ILSVRC2012_val_00004578.JPEG',
  'givenLabel': 'bear'},
 'ILSVRC2012_val_00004612': {'id':

In [15]:
for i in range(len(image_embeddings_reduced)):
    out[os.path.basename(images[i][0])[:-5]]["embedding"] = list(image_embeddings_reduced[i])

In [16]:
with open('output_data_embeddings_32.json', 'w') as f:
    json.dump(out, f)