In [1]:
import numpy as np
import random
import pandas as pd
import pickle
import torch
from torch import nn

In [2]:
train = pd.read_pickle("../data/ml-1m-split/train.pkl")
test = pd.read_pickle("../data/ml-1m-split/test.pkl")
full = pd.read_pickle("../data/ml-1m-split/full.pkl")

num_users = len(full.groupby("user").size())
num_items = len(full.groupby("item").size())

In [58]:
class MatrixFactorization(nn.Module):
  def __init__(self, num_users, num_items, num_factors):
    super().__init__()
    self.user_factors = nn.Embedding(num_users, num_factors, sparse=False)
    self.item_factors = nn.Embedding(num_items, num_factors, sparse=False)
    
    self.user_factors.weight.data.uniform_(-0.25, 0.25)
    self.item_factors.weight.data.uniform_(-0.25, 0.25)
    
  def forward(self, users, items):
    return torch.diagonal(torch.mm(self.user_factors(users), torch.transpose(self.item_factors(items), 0, 1)))

In [59]:
num_factors = 30
epochs = 1

def alt_min(num_factors, learning_rate):
  model = MatrixFactorization(num_users, num_items, num_factors)
  params = list(model.parameters())
  user_optimizer = torch.optim.SGD([params[0]], lr=learning_rate)
  item_optimizer = torch.optim.SGD([params[1]], lr=learning_rate)
  loss_fn = nn.MSELoss()

  for i in range(epochs):
    for (idx, [user, item, _, rating, _]) in train.iterrows():
      prediction = model(torch.LongTensor([user]), torch.LongTensor([item]))
      loss = loss_fn(prediction, torch.FloatTensor([rating]))
      grad = loss.backward()
      print(grad)
      user_optimizer.step()
      model.zero_grad()

In [25]:
avg_item_ratings = np.zeros(num_items)
for (item, df) in train.groupby("item"):
  avg_item_ratings[item] = np.mean(df["rating"].values)

In [33]:
train_shuffled = train.sample(frac=1).reset_index(drop=True)

In [36]:
len(train_shuffled)

800193

In [40]:
epochs = 10

def alt_min(num_factors, lrate):
  M = np.random.uniform(0, 0.25, (num_items, num_factors))
  # M[:, 0] = avg_item_ratings
  U = np.ones((num_users, num_factors))
  U_prev = np.zeros((num_users, num_factors))
  U_freqs = train.groupby("user").size().values.reshape(-1, 1)    
    
  for i in range(epochs):
    U_step = np.zeros((num_users, num_factors))
    total_loss = 0
    for (idx, [user, item, _, rating, _]) in train_shuffled.iterrows():
      residual = rating - np.dot(U[user], M[item])
      total_loss += residual ** 2
      U_step[user] += residual * M[item]
    
    U_step = lrate * (U_step / U_freqs)
    U += U_step
    print("EPOCH 1\nStep L2 Norm: {}\nRMSE: {}".format(np.linalg.norm(U_step), np.sqrt(total_loss / len(train_shuffled))))
    
          
      
    
  
  

In [43]:
alt_min(30, 0.5)

0
200000
400000
600000
800000
EPOCH 1
Step L2 Norm: 12.253237942641325
RMSE: 1.2000548624106067
0
200000
400000
600000
800000
EPOCH 1
Step L2 Norm: 9.43106566112119
RMSE: 1.1570828266019815
0
200000
400000
600000
800000
EPOCH 1
Step L2 Norm: 7.318747459406452
RMSE: 1.1311688881735902
0
200000
400000
600000
800000
EPOCH 1
Step L2 Norm: 5.752674947704704
RMSE: 1.1156156609148236
0
200000
400000
600000
800000
EPOCH 1
Step L2 Norm: 4.6083465600404665
RMSE: 1.1062456775370202
0
200000


KeyboardInterrupt: 