# Two Tower Model Implementation in PyTorch

[Link to Tensorflow Implementation](https://github.com/john2408/Machine-Learning-Specialization-Coursera/blob/main/C3%20-%20Unsupervised%20Learning%2C%20Recommenders%2C%20Reinforcement%20Learning/week2/C3W2/C3W2A2/C3_W2_RecSysNN_Assignment.ipynb)

In [1]:
import numpy as np
import numpy.ma as ma
from numpy import genfromtxt
from collections import defaultdict
import pandas as pd
import tabulate
from recsysNN_utils import *
pd.set_option("display.precision", 1)


## Movie ratings dataset 
The data set is derived from the [MovieLens ml-latest-small](https://grouplens.org/datasets/movielens/latest/) dataset. 

[F. Maxwell Harper and Joseph A. Konstan. 2015. The MovieLens Datasets: History and Context. ACM Transactions on Interactive Intelligent Systems (TiiS) 5, 4: 19:1–19:19. <https://doi.org/10.1145/2827872>]

The original dataset has 9000 movies rated by 600 users with ratings on a scale of 0.5 to 5 in 0.5 step increments. The dataset has been reduced in size to focus on movies from the years since 2000 and popular genres. The reduced dataset has $n_u = 395$ users and $n_m= 694$ movies. For each movie, the dataset provides a movie title, release date, and one or more genres. For example "Toy Story 3" was released in 2010 and has several genres: "Adventure|Animation|Children|Comedy|Fantasy|IMAX".  This dataset contains little information about users other than their ratings. This dataset is used to create training vectors for the neural networks described below. 

In [77]:
# Load Data, set configuration variables
item_train, user_train, y_train, item_features, user_features, item_vecs, movie_dict, user_to_genre = load_data()

num_user_features = user_train.shape[1] - 3  # remove userid, rating count and ave rating during training
num_item_features = item_train.shape[1] - 1  # remove movie id at train time
uvs = 3  # user genre vector start
ivs = 3  # item genre vector start
u_s = 3  # start of columns to use in training, user
i_s = 1  # start of columns to use in training, items
scaledata = True  # applies the standard scalar to data if true
print(f"Number of training vectors: {len(item_train)}")

Number of training vectors: 58187


In [78]:
len(user_train), len(item_train), len(y_train)

(58187, 58187, 58187)

In [79]:
len(item_vecs)

1883

In [80]:
pprint_train(user_train, user_features, uvs,  u_s, maxcount=5)

[user id],[rating count],[rating ave],Act ion,Adve nture,Anim ation,Chil dren,Com edy,Crime,Docum entary,Drama,Fan tasy,Hor ror,Mys tery,Rom ance,Sci -Fi,Thri ller
2,16,4.1,3.9,5.0,0.0,0.0,4.0,4.2,4.0,4.0,0.0,3.0,4.0,0.0,4.2,3.9
2,16,4.1,3.9,5.0,0.0,0.0,4.0,4.2,4.0,4.0,0.0,3.0,4.0,0.0,4.2,3.9
2,16,4.1,3.9,5.0,0.0,0.0,4.0,4.2,4.0,4.0,0.0,3.0,4.0,0.0,4.2,3.9
2,16,4.1,3.9,5.0,0.0,0.0,4.0,4.2,4.0,4.0,0.0,3.0,4.0,0.0,4.2,3.9
2,16,4.1,3.9,5.0,0.0,0.0,4.0,4.2,4.0,4.0,0.0,3.0,4.0,0.0,4.2,3.9


In [81]:
pprint_train(item_train, item_features, ivs, i_s, maxcount=5, user=False)

[movie id],year,ave rating,Act ion,Adve nture,Anim ation,Chil dren,Com edy,Crime,Docum entary,Drama,Fan tasy,Hor ror,Mys tery,Rom ance,Sci -Fi,Thri ller
6874,2003,4.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
6874,2003,4.0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
6874,2003,4.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
8798,2004,3.8,1,0,0,0,0,0,0,0,0,0,0,0,0,0
8798,2004,3.8,0,0,0,0,0,1,0,0,0,0,0,0,0,0


In [82]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.metrics import f1_score
import numpy as np
from torch.optim import Adam

# Define User and Product Towers
class UserTower(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2):
        super(UserTower, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim1)
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

class ProductTower(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2):
        super(ProductTower, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim1)
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

# Two-Tower Recommendation Model
class TwoTowerRecommendationModel(pl.LightningModule):
    def __init__(self, user_config, product_config, learning_rate):
        super(TwoTowerRecommendationModel, self).__init__()
        self.user_tower = UserTower(**user_config)
        self.product_tower = ProductTower(**product_config)
        self.criterion = nn.MSELoss()
        self.learning_rate = learning_rate

    def forward(self, user_input, product_input):
        user_output = self.user_tower(user_input)
        product_output = self.product_tower(product_input)
        rating_pred_tensor = F.cosine_similarity(user_output, product_output)
        return rating_pred_tensor

    def training_step(self, batch, batch_idx):
        user_input, product_input, target = batch
        rating_pred_tensor = self(user_input, product_input)
        loss = self.criterion(rating_pred_tensor, target)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        user_input, product_input, target = batch
        rating_pred_tensor = self(user_input, product_input)
        loss = self.criterion(rating_pred_tensor, target)
        self.log('val_loss', loss)

    def test_step(self, batch, batch_idx):
        user_input, product_input, target = batch
        rating_pred_tensor = self(user_input, product_input)
        loss = self.criterion(rating_pred_tensor, target)
        self.log('test_loss', loss)
        return {'preds': rating_pred_tensor, 'target': target}

    def predict_step(self, batch, batch_idx):
        user_input, product_input, _ = batch
        rating_pred_tensor = self(user_input, product_input)
        return rating_pred_tensor

    def configure_optimizers(self):
        return Adam(self.parameters(), lr=self.learning_rate)

# Generate Random Sample Data
def generate_random_sample_data(num_samples):
    user_input_dim = 6
    product_input_dim = 3
    user_data = np.random.rand(num_samples, user_input_dim)
    product_data = np.random.rand(num_samples, product_input_dim)
    target_data = np.random.randint(0, 2, size=(num_samples, 1))  # Binary targets for F1 score

    user_tensor = torch.tensor(user_data, dtype=torch.float32)
    product_tensor = torch.tensor(product_data, dtype=torch.float32)
    target_tensor = torch.tensor(target_data, dtype=torch.float32)

    return user_tensor, product_tensor, target_tensor

In [83]:
# Generate synthetic data
user_data, product_data, target_data = generate_random_sample_data(num_samples)

In [84]:
user_data.shape, product_data.shape, target_data.shape

(torch.Size([58187, 6]), torch.Size([58187, 3]), torch.Size([58187, 1]))

In [85]:
# scale training data
scaledata = True
if scaledata:
    item_train_save = item_train
    user_train_save = user_train
    y_train_save = y_train

    scalerItem = StandardScaler()
    scalerItem.fit(item_train)
    item_train = scalerItem.transform(item_train)

    scalerUser = StandardScaler()
    scalerUser.fit(user_train)
    user_train = scalerUser.transform(user_train)
    
    targetScaler = MinMaxScaler((-1, 1))
    targetScaler.fit(y_train.reshape(-1, 1))
    y_train = targetScaler.transform(y_train.reshape(-1, 1))

    print(np.allclose(item_train_save, scalerItem.inverse_transform(item_train)))
    print(np.allclose(user_train_save, scalerUser.inverse_transform(user_train)))

user_data_tensor = torch.tensor(user_train[:, u_s:], dtype=torch.float32)
product_data_tensor = torch.tensor(item_train[:, i_s:], dtype=torch.float32)
target_data_tensor = torch.tensor(y_train.reshape(-1,1), dtype=torch.float32) 

True
True


In [86]:
user_data_tensor.shape, product_data_tensor.shape, target_data_tensor.shape

(torch.Size([58187, 14]), torch.Size([58187, 16]), torch.Size([58187, 1]))

In [87]:
len(item_features), len(user_features)

(17, 17)

In [88]:
dataset = TensorDataset(user_data_tensor, product_data_tensor, target_data_tensor)

In [89]:
target_data_tensor.shape[0]

58187

In [91]:
# Hyperparameters
num_samples = 1000
batch_size = 32
epochs = 10
learning_rate = 0.001

# Model configurations
user_config = {'input_dim': user_data_tensor.shape[1], 'hidden_dim1': 128, 'hidden_dim2': 64}
product_config = {'input_dim': product_data_tensor.shape[1], 'hidden_dim1': 128, 'hidden_dim2': 64}

# Create DataLoader
num_samples = target_data_tensor.shape[0]
train_size = int(0.8 * num_samples)
test_size = num_samples - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Initialize model
model = TwoTowerRecommendationModel(user_config, product_config, learning_rate)

# Train the model
trainer = pl.Trainer(max_epochs=epochs)
trainer.fit(model, train_loader, test_loader)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name          | Type         | Params | Mode 
-------------------------------------------------------
0 | user_tower    | UserTower    | 10.2 K | train
1 | product_tower | ProductTower | 10.4 K | train
2 | criterion     | MSELoss      | 0      | train
-------------------------------------------------------
20.6 K    Trainable params
0         Non-trainable params
20.6 K    Total params
0.082     Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


                                                                            

  return F.mse_loss(input, target, reduction=self.reduction)
/Users/JOHTORR/Repos/two_tower_model/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=13` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 1455/1455 [00:08<00:00, 177.59it/s, v_num=3]

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1:   2%|▏         | 32/1455 [00:00<00:08, 170.68it/s, v_num=3]  

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 9: 100%|██████████| 1455/1455 [00:08<00:00, 164.66it/s, v_num=3]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1455/1455 [00:08<00:00, 164.50it/s, v_num=3]


## Predictions for a new user
First, we'll create a new user and have the model suggest movies for that user. After you have tried this example on the example user content, feel free to change the user content to match your own preferences and see what the model suggests. Note that ratings are between 0.5 and 5.0, inclusive, in half-step increments.

In [51]:
new_user_id = 5000
new_rating_ave = 1.0
new_action = 1.0
new_adventure = 1
new_animation = 1
new_childrens = 1
new_comedy = 5
new_crime = 1
new_documentary = 1
new_drama = 1
new_fantasy = 1
new_horror = 1
new_mystery = 1
new_romance = 5
new_scifi = 5
new_thriller = 1
new_rating_count = 3

user_vec = np.array([[new_user_id, new_rating_count, new_rating_ave,
                      new_action, new_adventure, new_animation, new_childrens,
                      new_comedy, new_crime, new_documentary,
                      new_drama, new_fantasy, new_horror, new_mystery,
                      new_romance, new_scifi, new_thriller]])

user_vecs = gen_user_vecs(user_vec,len(item_vecs))

In [55]:
user_vecs.shape, item_vecs.shape

((1883, 17), (1883, 17))

In [94]:
if scaledata:
    scaled_user_vecs = scalerUser.transform(user_vecs)
    scaled_item_vecs = scalerItem.transform(item_vecs)
    user_data_tensor = torch.tensor(scaled_user_vecs[:, u_s:], dtype=torch.float32)
    product_data_tensor = torch.tensor(scaled_item_vecs[:, i_s:], dtype=torch.float32)
    y_p = model(user_data_tensor, product_data_tensor).detach().numpy()
else:
    y_p = model(user_vecs[:, u_s:], item_vecs[:, i_s:])
y_p = targetScaler.inverse_transform(y_p.reshape(-1, 1))
    
if np.any(y_p < 0) : 
        print("Error, expected all positive predictions")
sorted_index = np.argsort(-y_p,axis=0).reshape(-1).tolist()  #negate to get largest rating first
sorted_ypu   = y_p[sorted_index]
sorted_items = item_vecs[sorted_index]
sorted_user  = user_vecs[sorted_index]

In [96]:
y_p, user, item, movie_dict = sorted_ypu, sorted_user, sorted_items, movie_dict

maxcount=10
count = 0
movies_listed = defaultdict(int)
disp = [["y_p", "movie id", "rating ave", "title", "genres"]]

for i in range(0, y_p.shape[0]):
    if count == maxcount:
        break
    count += 1
    movie_id = item[i, 0].astype(int)
    if movie_id in movies_listed:
        continue
    movies_listed[movie_id] = 1
    disp.append([y_p[i, 0], item[i, 0].astype(int), item[i, 2].astype(float),
                movie_dict[movie_id]['title'], movie_dict[movie_id]['genres']])

table = tabulate.tabulate(disp, tablefmt='html',headers="firstrow")

In [97]:
table

y_p,movie id,rating ave,title,genres
3.59534,7361,4.16031,Eternal Sunshine of the Spotless Mind (2004),Drama|Romance|Sci-Fi
3.59533,4973,4.18333,"Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)",Comedy|Romance
3.59506,44555,4.11765,"Lives of Others, The (Das leben der Anderen) (2006)",Drama|Romance|Thriller
3.59486,57504,4.1,"Girl Who Leapt Through Time, The (Toki o kakeru shôjo) (2006)",Animation|Comedy|Drama|Romance|Sci-Fi
3.59468,89904,4.04545,The Artist (2011),Comedy|Drama|Romance
3.59459,33660,4.08824,Cinderella Man (2005),Drama|Romance
3.59458,60069,4.05769,WALL·E (2008),Adventure|Animation|Children|Romance|Sci-Fi
3.59431,31658,4.075,Howl's Moving Castle (Hauru no ugoku shiro) (2004),Adventure|Animation|Fantasy|Romance
3.59387,88163,3.98387,"Crazy, Stupid, Love. (2011)",Comedy|Drama|Romance
3.59366,6711,4.03378,Lost in Translation (2003),Comedy|Drama|Romance
