In [1]:
import torch
import torch.nn as nn
import torch.nn.init
import torchvision.models as models
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms
import numpy as np
import pandas as pd
from PIL import Image
import glob
import os

In [2]:
def normalization(X):
    """L2-normalization of features columns"""
    norm = torch.pow(X, 2).sum(dim=1, keepdim=True).sqrt()
    X = torch.div(X, norm)
    
    return X

In [3]:
class ImageEncoder(nn.Module):

  def __init__(self, embedding_size, cnn_type):
    """Initializing parameters"""
    super(ImageEncoder).__init__()
    self.embedding_size = embedding_size # Size of projected image
    self.cnn = self.load_cnn(cnn_type)

    # No need to finetune parameters = frozen layers
    for param in self.cnn.parameters():
        param.requires_grad = False

    # Replacing last fully connected layer with new one
    self.fc = nn.Linear(self.cnn.classifier._modules['6'].in_features, embedding_size)
    self.cnn.classifier = nn.Sequential(*list(self.cnn.classifier.children())[:-1])

    # Initializing the weights of fully-connected layer, which makes projection to new space
    self.initialization_weights()
  
  def load_cnn(self, cnn_type):
    """Loading pretrained model"""
    model = models.__dict__[cnn_type](pretrained=True)

    return model

  def initialization_weights(self):
    """Xavier initialization"""
    r = np.sqrt(6.) / np.sqrt(self.fc.in_features + self.fc.out_features)
    self.fc.weight.data.uniform_(-r, r)
    self.fc.bias.data.fill_(0)

  def forward(self, X):
    """Creation of features"""
    # Creation of embeddings
    features = self.cnn(X)

    # Normalization of embeddings
    features = normalization(features)

    # Projection to new space
    features = self.fc(features)

    return features

### Preparation of dataset

In [87]:
def download_data(texts_path, images_path, transform):
  """Creation of train and test sets"""
  # Images and texts
  X = np.array([])

  # Download texts
  texts_df = pd.read_csv(texts_path)
  texts_df['Text'] = texts_df['color'] + " " + texts_df['name'] + " " + texts_df['description']

  # Download images
  for image in glob.glob(images_path):
    im = Image.open(image)
    im = transform(im)
    key_img_name = os.path.basename(image).split('_')[0]
    description = texts_df[texts_df['Unnamed: 0']==int(key_img_name)].iloc[0,6]
    X = np.append(X,im)
   
  print(X)
  dataset = TensorDataset(X)  
#   return dataset

In [88]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Transformation of images
transform = transforms.Compose([
    transforms.ToTensor(),  # Transform to tensor
    transforms.Resize(size=(224,224)),
    transforms.Normalize((0.5,), (0.5,))  # Scale images to [-1, 1]
])

# Data paths
descriptions_data = "./processed_data/processedSKUs_nodups.csv"
images_folder = "./processed_data/images/*.jpg"

# Creation of dataset
dataset = download_data(descriptions_data, images_folder, transform)

#trainloader = DataLoader(trainset, batch_size=32, shuffle=True)
#testloader = DataLoader(testset, batch_size=5, shuffle=False)

[1. 1. 1. ... 1. 1. 1.]


TypeError: 'int' object is not callable