In [120]:
import torch
import torch.nn as nn
import torch.nn.init
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

import numpy as np
import pandas as pd
import glob
import os
from PIL import Image

In [2]:
def normalization(X):
    """L2-normalization of features columns"""
    norm = torch.pow(X, 2).sum(dim=1, keepdim=True).sqrt()
    X = torch.div(X, norm)
    
    return X

In [3]:
class ImageEncoder(nn.Module):

  def __init__(self, embedding_size, cnn_type):
    """Initializing parameters"""
    super(ImageEncoder).__init__()
    self.embedding_size = embedding_size # Size of projected image
    self.cnn = self.load_cnn(cnn_type)

    # No need to finetune parameters = frozen layers
    for param in self.cnn.parameters():
        param.requires_grad = False

    # Replacing last fully connected layer with new one
    self.fc = nn.Linear(self.cnn.classifier._modules['6'].in_features, embedding_size)
    self.cnn.classifier = nn.Sequential(*list(self.cnn.classifier.children())[:-1])

    # Initializing the weights of fully-connected layer, which makes projection to new space
    self.initialization_weights()
  
  def load_cnn(self, cnn_type):
    """Loading pretrained model"""
    model = models.__dict__[cnn_type](pretrained=True)

    return model

  def initialization_weights(self):
    """Xavier initialization"""
    r = np.sqrt(6.) / np.sqrt(self.fc.in_features + self.fc.out_features)
    self.fc.weight.data.uniform_(-r, r)
    self.fc.bias.data.fill_(0)

  def forward(self, X):
    """Creation of features"""
    # Creation of embeddings
    features = self.cnn(X)

    # Normalization of embeddings
    features = normalization(features)

    # Projection to new space
    features = self.fc(features)

    return features