<a href="https://colab.research.google.com/github/ciciwu/recsys_playground/blob/main/MovieLen_collab_filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
data = pd.read_csv('movie_len/ratings.csv')

In [2]:
data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [7]:
num_users = data.userId.nunique()

In [6]:
num_items = data.movieId.nunique()

## Map the id to smaller ids,save spaces

In [53]:
userId2idx = {}
for idx, id in enumerate(data.userId.unique()):
  userId2idx[id] = idx

In [54]:
itemId2idx = {}
for idx, id in enumerate(data.movieId.unique()):
  itemId2idx[id] = idx

In [52]:
num_users

610

## Define the model

In [30]:
import torch.nn as nn
from torch.optim import SGD
from torch.nn import MSELoss


In [16]:
emb_size = 10

In [143]:
class MF(nn.Module):
  def __init__(self, num_items,num_users,emb_size):
    super(MF,self).__init__()
    self.user_emb = nn.Embedding(num_users, emb_size)
    self.item_emb = nn.Embedding(num_items, emb_size)
    self.user_emb.weight.data.uniform_(0, 1)
    self.item_emb.weight.data.uniform_(0, 1)

  def forward(self,userIds, itemIds):
    user_emb = self.user_emb(userIds)
    item_emb = self.item_emb(itemIds)
    # dot_p = user_emb*item_emb
    # res = dot_p.sum(axis = 1)
    return (user_emb*item_emb).sum(axis = 1)


In [56]:
userId = data.userId.apply(lambda x: userId2idx[x])

In [69]:
itemId.shape

(100836,)

In [57]:
itemId = data.movieId.apply(lambda x: itemId2idx[x])

In [None]:
nn.embed()

## Define optimizer, loss


In [32]:
lr = 0.1

In [144]:
mf = MF(num_items, num_users, 100)

In [145]:
optimizer = SGD(params=mf.parameters(), lr=lr)

In [65]:
mseloss = MSELoss()

## Training loop

In [45]:
import torch

In [148]:
ratings = torch.tensor(data.rating, dtype=torch.float32 )
userID = torch.tensor(userId, dtype=torch.long )
itemID = torch.tensor(itemId, dtype=torch.long )

In [42]:
epoch = 5

In [135]:
import torch.nn.functional as F

In [150]:
data.shape

(100836, 4)

In [155]:
running_loss = 0.0
for i in range(epoch):
  pred = mf(userID,itemID)
  loss = mseloss(pred, ratings)
  running_loss += loss.item()
  optimizer.zero_grad()
  loss.register_hook(lambda grad: print(grad))
  loss.backward()
  print(mf.user_emb.weight.grad)
  optimizer.step()

  print(f'loss this epoch {loss.item()}')
  print(f'epoch: {i+1}, loss: {running_loss/(i+1)}')



tensor(385.5774)
None
tensor(1.)


  print(loss.grad)


tensor([[0.0468, 0.0483, 0.0475,  ..., 0.0500, 0.0491, 0.0479],
        [0.0056, 0.0056, 0.0048,  ..., 0.0047, 0.0051, 0.0056],
        [0.0098, 0.0079, 0.0089,  ..., 0.0100, 0.0082, 0.0080],
        ...,
        [0.1631, 0.1648, 0.1615,  ..., 0.1602, 0.1538, 0.1586],
        [0.0062, 0.0079, 0.0083,  ..., 0.0084, 0.0066, 0.0076],
        [0.2331, 0.2387, 0.2228,  ..., 0.2330, 0.2290, 0.2317]])
loss this epoch 385.5773620605469
epoch: 1, loss: 385.5773620605469
tensor(383.4738)
None
tensor(1.)
tensor([[0.0467, 0.0482, 0.0474,  ..., 0.0499, 0.0490, 0.0478],
        [0.0056, 0.0056, 0.0048,  ..., 0.0047, 0.0051, 0.0056],
        [0.0097, 0.0079, 0.0089,  ..., 0.0100, 0.0082, 0.0080],
        ...,
        [0.1624, 0.1641, 0.1608,  ..., 0.1595, 0.1531, 0.1579],
        [0.0062, 0.0078, 0.0083,  ..., 0.0084, 0.0065, 0.0076],
        [0.2316, 0.2371, 0.2213,  ..., 0.2315, 0.2274, 0.2301]])
loss this epoch 383.4738464355469
epoch: 2, loss: 384.5256042480469
tensor(381.3937)
None
tensor(1.)
te

In [156]:
mf.parameters

<bound method Module.parameters of MF(
  (user_emb): Embedding(610, 100)
  (item_emb): Embedding(9724, 100)
)>