In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
# import Datasets
from torch.utils.data import Dataset, DataLoader
# import plotting tools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import cv2 as cv

In [4]:
device = torch.device(
    'cuda') if torch.cuda.is_available() else torch.device('cpu')
train_data_path = '../../../Datasets/Model/train'
test_data_path = '../../../Datasets/Model/test'
image_path = '../../../Datasets/Model/Images'
data_save_path = '../../../Save/AdvancedDeliverables/DVBPR'
analytics_path = '../../../Analysis/AdvancedDeliverables'
lr = 0.005
batch_size = 32
max_epoch = 100

train_users = np.load(f'{train_data_path}/train_users.npy')
train_items = np.load(f'{train_data_path}/train_items.npy')
train_ratings = np.load(f'{train_data_path}/train_ratings.npy')

test_users = np.load(f'{test_data_path}/test_users.npy')
test_items = np.load(f'{test_data_path}/test_items.npy')
test_ratings = np.load(f'{test_data_path}/test_ratings.npy')

# images = []

# items = np.unique(np.concatenate((train_items, test_items)))
# for item in tqdm(items):

def load_image(item):
  try: 
    img = Image.open(f'{image_path}/{item}.jpg')
    img = img.resize((224, 224))
    # normalize the image
    img = np.array(img) / 255
    # convert the image to list
    img = img.tolist()
  except:
    # use the black number as the image if the image is not found
    img = torch.zeros(224, 224, 3)
    # convert the image to list
    img = img.tolist()
  return img


In [5]:
'''function sample: input a user id, and return an item the user has rated, and another item the user has not rated'''
def sample_user(user, users, items, ratings):
    user_indices = np.where(users == user)[0]
    user_index = np.random.choice(user_indices)
    user_item = items[user_index]
    user_rating = ratings[user_index]
    # load the image of the user_item
    user_image = load_image(user_item)

    # select an item the user has not rated
    other_items = np.where(users != user)[0]
    other_item_index = np.random.choice(other_items)
    other_item = items[other_item_index]
    other_rating = ratings[other_item_index]
    # load the image of the other_item
    other_image = load_image(other_item)

    return user, user_item, user_rating, user_image, other_item, other_rating, other_image

def get_batch(users, items, ratings, batch_size=32, fixed_users_set = None):
  # get a batch of users
  if fixed_users_set is None:
    batch_users = np.random.choice(users, size=batch_size)
  else:
    batch_users = fixed_users_set
  # get the batch of samples
  batch = [list(sample_user(user, users, items, ratings)) for user in batch_users]
  # transform each column of the batch to a pytorch tensor
  batch = [torch.tensor(sample) for sample in zip(*batch)]
  return batch

In [7]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /Users/zhouzihan/.cache/torch/hub/v0.10.0.zip


In [8]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [10]:
# Create a Siamese CNN network using AlexNet architecture
def init_weights(m):
    if isinstance(m, nn.Conv2d):
        torch.nn.init.kaiming_normal_(m.weight)
        m.bias.data.fill_(0.01)
    elif isinstance(m, nn.Linear):
        torch.nn.init.kaiming_normal_(m.weight)
        m.bias.data.fill_(0.01)

class FM(nn.Module):
    def __init__(self, latent_dim, fea_num):
        super().__init__()

        self.latent_dim = latent_dim
        self.w0 = nn.Parameter(torch.zeros([1, ]))
        self.w1 = nn.Parameter(torch.rand([fea_num, 1]))
        self.w2 = nn.Parameter(torch.rand([fea_num, latent_dim]))

    def forward(self, inputs):
        # inputs = inputs.long()
        first_order = self.w0 + torch.mm(inputs, self.w1)
        second_order = 1/2 * torch.sum(
            torch.pow(torch.mm(inputs, self.w2), 2) -
            torch.mm(torch.pow(inputs, 2), torch.pow(self.w2, 2)),

            dim=1,
            keepdim=True
        )

        result = first_order + second_order
        # flatten the result
        result = result.squeeze(1)

        return result

# AlexNet architecture
class SiameseCNN(nn.Module):
    def __init__(self):
        super(SiameseCNN, self).__init__()
        # input size is 3 x 224 x 224
        self.net = nn.Sequential(
          nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
          # size: 64 x 55 x 55
          nn.ReLU(inplace=True),
          nn.MaxPool2d(kernel_size=3, stride=2),
          # size: 64 x 27 x 27
          nn.Conv2d(64, 192, kernel_size=5, padding=2),
          # size: 192 x 27 x 27
          nn.ReLU(inplace=True),
          nn.MaxPool2d(kernel_size=3, stride=2),
          # size: 192 x 13 x 13
          nn.Conv2d(192, 384, kernel_size=3, padding=1),
          # size: 384 x 13 x 13
          nn.ReLU(inplace=True),
          nn.Conv2d(384, 256, kernel_size=3, padding=1),
          # size: 256 x 13 x 13
          nn.ReLU(inplace=True),
          nn.Conv2d(256, 256, kernel_size=3, padding=1),
          # size: 256 x 13 x 13
          nn.ReLU(inplace=True),
          nn.MaxPool2d(kernel_size=3, stride=2),
          # size: 256 x 6 x 6
        )

        self.fc = nn.Sequential(
          nn.Linear(256 * 6 * 6, 4096),
          nn.ReLU(inplace=True),
          nn.Dropout(0.5),
          nn.Linear(4096, 4096),
          nn.ReLU(inplace=True),
          nn.Dropout(0.5),
          nn.Linear(4096, 100),
        )

    def forward(self, x1, x2):
        x1 = x1.permute(0, 3, 1, 2)
        x2 = x2.permute(0, 3, 1, 2)
        output1 = self.net(x1)
        output1 = output1.view(output1.size(0), -1)
        output1 = self.fc(output1)
        output2 = self.net(x2)
        output2 = output2.view(output2.size(0), -1)
        output2 = self.fc(output2)
        return output1, output2

DVBPR = SiameseCNN().apply(init_weights).to(device)
predictor = FM(latent_dim=10, fea_num=100).to(device)

optimiser_predictor = torch.optim.Adam(predictor.parameters(), lr=lr, weight_decay=0.0001)
optimiser_DVBPR = optim.Adam(DVBPR.parameters(), lr=lr, weight_decay=0.001)

def save_training(path = f'{data_save_path}/'):
    torch.save(DVBPR.state_dict(), path + 'DVBPR.pt')
    torch.save(predictor.state_dict(), path + 'predictor.pt')

def load_training(path = f'{data_save_path}/'):
    DVBPR.load_state_dict(torch.load(path + 'DVBPR.pt'))
    predictor.load_state_dict(torch.load(path + 'predictor.pt'))

In [14]:
def evaluation(test_batch_size = 32):
  test_users_unique = list(set(test_users))
  mse_arr = []

  for i in tqdm(range(0, len(test_users_unique), test_batch_size)):
    test_users_batch = test_users_unique[i:i+test_batch_size]
    batch_test = get_batch(test_users, test_items, test_ratings, fixed_users_set=test_users_batch)
    user, user_item, user_rating, user_image, other_item, other_rating, other_image = batch_test
    user, user_item, user_rating, user_image, other_item, other_rating, other_image = user.to(device), user_item.to(device), user_rating.to(device), user_image.to(device), other_item.to(device), other_rating.to(device), other_image.to(device)

    img_result_1, img_result_2 = DVBPR(user_image, other_image)

    pred_1 = predictor(img_result_1)
    pred_2 = predictor(img_result_2)

    mse_1 = torch.mean(torch.pow(pred_1 - user_rating, 2))
    mse_2 = torch.mean(torch.pow(pred_2 - other_rating, 2))

    mse_avg = (mse_1 + mse_2) / 2
    mse_arr.append(mse_avg.item())
  
  return np.mean(mse_arr)

In [15]:
best_rmse = 1e6

print('Evaluating on test set...')

best_rmse = evaluation()

print(f'Baseline RMSE: {best_rmse}')

'''training the model'''

loss_arr = []
rmse_arr = []

rmse_arr.append(best_rmse)

for epoch in range (max_epoch):
  for i in tqdm(range(1000)):
    user, user_item, user_rating, user_image, other_item, other_rating, other_image = get_batch(train_users, train_items, train_ratings, batch_size=32)
    user, user_item, user_rating, user_image, other_item, other_rating, other_image = user.to(device), user_item.to(device), user_rating.to(device), user_image.to(device), other_item.to(device), other_rating.to(device), other_image.to(device)
    img_result_1, img_result_2 = DVBPR(user_image, other_image)

    rating_diff = user_rating - other_rating
    pred_diff = predictor(img_result_1) - predictor(img_result_2)
    loss = rating_diff - pred_diff
    # average loss
    loss = torch.mean(loss)

    optimiser_predictor.zero_grad()
    optimiser_DVBPR.zero_grad()
    loss.backward()
    optimiser_predictor.step()
    optimiser_DVBPR.step()

  with torch.no_grad():
    loss_arr.append(loss.item())
    rmse = evaluation()
    print(f'Epoch {epoch+1} RMSE: {rmse}')
    rmse_arr.append(rmse)
    if rmse < best_rmse:
      best_rmse = rmse
      save_training()
      print('Saved model')

Evaluating on test set...


  0%|          | 3/3609 [00:10<3:24:24,  3.40s/it]


KeyboardInterrupt: 