In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from torch.utils.data import DataLoader
import random
from collections import Counter
import math
import os
import pandas as pd

In [None]:
batch_size = 32
max_epoch = 40
lr = 0.0005

SINGLE_REVIEW_LIMIT = 100
USER_REVIEW_LIMIT = 1000
ITEM_REVIEW_LIMIT = 1000


device = torch.device(
    'cuda') if torch.cuda.is_available() else torch.device('cpu')
train_data_path = '../../Datasets/Model/train'
test_data_path = '../../Datasets/Model/test'
image_path = '../../Datasets/Images'
data_save_path = '../../Save/BasicDeliverables/TransNets'
analytics_path = '../../Analysis/BasicDeliverables/TransNets'
rmse_arr = []
tokens = []


In [None]:
train_users = np.load(f'{train_data_path}/train_users.npy')
train_items = np.load(f'{train_data_path}/train_items.npy')
train_ratings = np.load(f'{train_data_path}/train_ratings.npy')
train_reviews = np.load(
    f'{train_data_path}/train_reviews.npy', allow_pickle=True)

test_users = np.load(f'{test_data_path}/test_users.npy')
test_items = np.load(f'{test_data_path}/test_items.npy')
test_ratings = np.load(f'{test_data_path}/test_ratings.npy')
test_reviews = np.load(f'{test_data_path}/test_reviews.npy', allow_pickle=True)


In [None]:
# this function cuts a sequence to a certain length, if the sequence is shorter than
# that length, it fills the rest with a parameterised placeholder, if the sequence is longer than
# that length, it cuts the sequence to that length.
def pad_sequence(sequence, length, placeholder=0):
  if len(sequence) > length:
    sequence = sequence[:length]
  else:
    sequence += [placeholder] * (length - len(sequence))
  return sequence

'''sample user'''
'''this function takes a user, who is from a set of users, a set of items, a set of reviews, a set of titles, 
a set of categories, a set of prices, a set of ratings.'''

def sample_user(user, users, items, reviews, ratings):
  # find out all the indicies of the user in users
  user_indices = np.where(users == user)[0]
  # randomly select an index from these indicies.
  user_index = np.random.choice(user_indices)

  # get the item that the user has purchased.
  item = items[user_index]
  # get the review that the user has written about that item.
  review = reviews[user_index]
  # get the rating that the user has given to that item.
  rating = ratings[user_index]
  # find out the indices of the item in items
  item_indices = np.where(items == item)[0]
  # get the reviews of that item, exept the reviews of the user
  item_reviews = [reviews[i] for i in item_indices if i != user_index]
  # flatten the list_reviews 
  item_reviews = [item for sublist in item_reviews for item in sublist]

  '''user reviews'''
  # get all the reviews of the items that the user has purchased, except the review of the user on that item
  user_reviews = [reviews[i] for i in user_indices if i != user_index]
  # flatten user_reviews
  user_reviews = [item for sublist in user_reviews for item in sublist]
  

  # apply function pad_sequence to the user_titles, user_reviews, item_reviews, title, with padding 0
  review = pad_sequence(review, SINGLE_REVIEW_LIMIT)
  user_reviews = pad_sequence(user_reviews, USER_REVIEW_LIMIT)
  item_reviews = pad_sequence(item_reviews, ITEM_REVIEW_LIMIT)

  return user, item, review, rating, item_reviews, user_reviews


'''function to get a batch of training samples.
this function selects a random user from a set of users
and gets his information by function sample_user, repeat it for batch_size times and 
gets the batch of samples. Each element in the batch tuple is then transormed to a pytorch tensor and returned.'''
def get_batch(users, items, reviews, ratings, batch_size=32, fixed_users_set = None):
  # get a batch of users
  if fixed_users_set is None:
    batch_users = np.random.choice(users, size=batch_size)
  else:
    batch_users = fixed_users_set
  # get the batch of samples
  batch = [list(sample_user(user, users, items, reviews, ratings)) for user in batch_users]
  # transform each column of the batch to a pytorch tensor
  batch = [torch.tensor(sample) for sample in zip(*batch)]
  return batch


def weights_init(m):
  if isinstance(m, nn.Linear):
    nn.init.xavier_uniform_(m.weight.data)
    nn.init.constant_(m.bias.data, 0.1)

In [None]:
# ref: https://github.com/zhongqiangwu960812/AI-RecommenderSystem/blob/master/Rank/DeepFM/DeepFM_Model.ipynb
'''builds a factorisation machine that is used to predict the rating of an item by a user.
parameters: latent_dim: the dimension of the latent factors.
            fea_num: the number of features.'''
            
class FM(nn.Module):
    def __init__(self, latent_dim, fea_num):
        super().__init__()

        self.latent_dim = latent_dim
        self.w0 = nn.Parameter(torch.zeros([1, ]))
        self.w1 = nn.Parameter(torch.rand([fea_num, 1]))
        self.w2 = nn.Parameter(torch.rand([fea_num, latent_dim]))

    def forward(self, inputs):
        # inputs = inputs.long()
        first_order = self.w0 + torch.mm(inputs, self.w1)
        second_order = 1/2 * torch.sum(
            torch.pow(torch.mm(inputs, self.w2), 2) -
            torch.mm(torch.pow(inputs, 2), torch.pow(self.w2, 2)),

            dim=1,
            keepdim=True
        )

        return first_order + second_order


class TextCNN(nn.Module):
    def __init__(self, vocab_size, embed_size, kernel_sizes, num_neurons, latent_dim):
        super(TextCNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.dropout = nn.Dropout(0.5)
        self.decoder = nn.Linear(sum(num_neurons), latent_dim)
        self.pool = nn.AdaptiveMaxPool1d(1)
        self.tanh = nn.Tanh()
        self.convs = nn.ModuleList()
        for c, k in zip(num_neurons, kernel_sizes):
            self.convs.append(nn.Conv1d(embed_size, c, k))

    def forward(self, inputs):
        embeddings = self.embedding(inputs)
        embeddings = embeddings.permute(0, 2, 1)
        encoding = torch.cat([
            torch.squeeze(self.tanh(self.pool(conv(embeddings))), dim=-1)
            for conv in self.convs], dim=1)
        outputs = self.decoder(self.dropout(encoding))
        return outputs


class TransformMLP(nn.Module):
  def __init__(self, concated_size, latent_vector_size):
      super().__init__()
      self.net = nn.Sequential(
          nn.Linear(concated_size, 2*concated_size),
          nn.Tanh(),
          nn.Linear(2*concated_size, latent_vector_size),
          nn.Tanh()
      )
      self.dropout = nn.Dropout(0.5)

  def forward(self, x, batch_size=32):
    out = self.dropout(self.net(x))
    out = out.view(batch_size, out.shape[1])
    return out


textCNN_I = TextCNN(80000, 50, [3], [100], 50).apply(weights_init).to(device)
textCNN_U = TextCNN(80000, 50, [3], [100], 50).apply(
    weights_init).to(device)
textCNN_T = TextCNN(80000, 50, [3], [100], 50).apply(
    weights_init).to(device)


transform = TransformMLP(100, 50).apply(weights_init).to(device)
fm_T = FM(8, 50).apply(weights_init).to(device)
fm_S = FM(8, 50).apply(weights_init).to(device)

'''optimisers for the models, with weight decay of 0.01, and learning rate of lr'''
optimiser_textCNN_I = torch.optim.Adam(textCNN_I.parameters(), lr=lr)
optimiser_textCNN_U = torch.optim.Adam(textCNN_U.parameters(), lr=lr)
optimiser_textCNN_T = torch.optim.Adam(textCNN_T.parameters(), lr=lr)
optimiser_transform = torch.optim.Adam(transform.parameters(), lr=lr)
optimiser_fm_T = torch.optim.Adam(fm_T.parameters(), lr=lr)
optimiser_fm_S = torch.optim.Adam(fm_S.parameters(), lr=lr)


'''function that saves the training, path is the path to the folder where the model will be saved'''
def save_training(path):
    torch.save(textCNN_I.state_dict(), path + 'textCNN_I.pth')
    torch.save(textCNN_U.state_dict(), path + 'textCNN_U.pth')
    torch.save(textCNN_T.state_dict(), path + 'textCNN_T.pth')
    torch.save(transform.state_dict(), path + 'transform.pth')

'''function that loads the model, path is the path to the folder where the model is saved'''
def load_training(path):
    textCNN_I.load_state_dict(torch.load(path + 'textCNN_I.pth'))
    textCNN_U.load_state_dict(torch.load(path + 'textCNN_U.pth'))
    textCNN_T.load_state_dict(torch.load(path + 'textCNN_T.pth'))
    transform.load_state_dict(torch.load(path + 'transform.pth'))



In [None]:
'''Training'''

def evaluation(test_batch_size = 32):
  test_users_unique = list(set(test_users))
  rmse_arr = []

  for i in tqdm(range(0, len(test_users_unique), test_batch_size)):
    test_users_batch = test_users_unique[i:i+test_batch_size]
    batch_test = get_batch(test_users, test_items, test_reviews, test_ratings, fixed_users_set=test_users_batch)
    user_test, item_test, review_test, rating_test, item_reviews_test, user_reviews_test = batch_test
    user_test, item_test, review_test, rating_test, item_reviews_test, user_reviews_test = \
      user_test.to(device), item_test.to(device), review_test.to(device), rating_test.to(device), item_reviews_test.to(device), user_reviews_test.to(device)

    latent_review_user = textCNN_U(user_reviews_test)
    latent_review_item = textCNN_I(item_reviews_test)
    latent_review_user_item = torch.cat((latent_review_user, latent_review_item), 1)
    transform_construction = transform(latent_review_user_item, batch_size=len(test_users_batch))
    # flatten the transform_construction
    transform_construction = transform_construction.view(transform_construction.size(0), -1)

    prediction_source = fm_S(transform_construction)
    # calculate rooted mean square error
    rmse_arr.append(torch.sqrt(torch.mean((prediction_source - rating_test)**2)).item())
  
  # return the mean of the rmse_arr
  return np.mean(rmse_arr)

print("Program: Evaluating the baseline RMSE of the model on the test set")

# set best_rmse to be the largest possible value
best_rmse = evaluation()

# print baseline best_rmse
print('Baseline RMSE:', best_rmse)

for epoch in range (max_epoch):
  for i in tqdm(range(1000)):
    batch =  get_batch(train_users, train_items, train_reviews, train_ratings)
    user, item, review, rating, item_reviews, user_reviews = batch
    user, item, review, rating, item_reviews, user_reviews = user.to(device), item.to(device), review.to(device), rating.to(device), item_reviews.to(device), user_reviews.to(device)

    '''Train target network'''
    latent_review = textCNN_T(review)
    latent_review_data = latent_review.data
    prediction_target = fm_T(latent_review)
    # calculate the L1 loss
    loss_target = torch.mean(torch.abs(prediction_target - rating))

    # backpropagation
    optimiser_textCNN_T.zero_grad()
    optimiser_fm_T.zero_grad()
    loss_target.backward()
    optimiser_fm_T.step()
    optimiser_textCNN_T.step()

    '''Train transform network'''
    latent_review_user = textCNN_U(user_reviews)
    latent_review_item = textCNN_I(item_reviews)

    # concatenate the latent vectors and flatten them
    latent_review_user_item = torch.cat((latent_review_user, latent_review_item), 1)
    transform_construction = transform(latent_review_user_item)
    transform_construction = transform_construction.view(transform_construction.size(0), -1)
    # calculate the L2 loss between transform_construction and latent_review_data
    loss_transform = torch.mean((transform_construction - latent_review_data) ** 2)

    # backpropagation
    optimiser_transform.zero_grad()
    optimiser_textCNN_I.zero_grad()
    optimiser_textCNN_U.zero_grad()
    loss_transform.backward()
    optimiser_textCNN_I.step()
    optimiser_textCNN_U.step()
    optimiser_transform.step()

    '''Train source network'''
    transform_construction_data = transform_construction.data
    prediction_source = fm_S(transform_construction_data)
    
    # calculate the L1 loss
    loss_source = torch.mean(torch.abs(prediction_source - rating))

    # backpropagation
    optimiser_fm_S.zero_grad()
    loss_source.backward()
    optimiser_fm_S.step()

  '''evaluate the model on the test set'''
  rmse_test = evaluation()
  # print out the loss of the models and the test rmse
  print('Epoch:', epoch, 'Loss_target:', loss_target.item(), 'Loss_transform:', loss_transform.item(), 'Loss_source:', loss_source.item(), 'RMSE_test:', rmse_test)
  if rmse_test < best_rmse:
    best_rmse = rmse_test
    save_training(data_save_path+'/')


In [None]:
with open (f'{analytics_path}/TransNets.txt', 'w') as f:
  for r in rmse_arr:
    f.write(str(np.round(r, 4)) + '\n')