<a href="https://colab.research.google.com/github/Zooshi/neural-networks-in-python/blob/master/recommender_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!wget 'http://files.grouplens.org/datasets/movielens/ml-latest-small.zip'

--2021-12-05 09:19:11--  http://files.grouplens.org/datasets/movielens/ml-latest-small.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 978202 (955K) [application/zip]
Saving to: ‘ml-latest-small.zip’


2021-12-05 09:19:11 (7.10 MB/s) - ‘ml-latest-small.zip’ saved [978202/978202]



In [4]:
!ls

links.csv  movies.csv  ratings.csv  README.txt	tags.csv


In [5]:
import pandas as pd

df = pd.read_csv('ratings.csv')
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [6]:
df['rating'].value_counts()

4.0    26818
3.0    20047
5.0    13211
3.5    13136
4.5     8551
2.0     7551
2.5     5550
1.0     2811
1.5     1791
0.5     1370
Name: rating, dtype: int64

In [8]:
import torch
import torch.nn as nn
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

lbl_user = preprocessing.LabelEncoder()
lbl_movie = preprocessing.LabelEncoder()

df['userId'] = lbl_user.fit_transform(df['userId'])
df['movieId'] = lbl_movie.fit_transform(df['movieId'])

df_train, df_test =train_test_split(df,test_size=0.2, random_state=33, stratify=df['rating'])

In [9]:
class MovieDataset():
  def __init__(self,users,movies,ratings=None):
    self.users = users
    self.movies = movies
    self.ratings = ratings

  def __len__(self):
    return len(self.ratings)

  def __getitem__(self,ix):
    return {
        "users":torch.tensor(self.users[ix], dtype=torch.long),
        "movies":torch.tensor(self.movies[ix], dtype=torch.long),
        "ratings":torch.tensor(self.ratings[ix],dtype=torch.float)
    }


In [14]:

train_dataset = MovieDataset(df_train['userId'].values, df_train['movieId'].values, df_train['rating'].values)

test_dataset = MovieDataset(df_test['userId'].values, df_test['movieId'].values, df_test['rating'].values)

In [15]:
train_dataset[0]

{'movies': tensor(260), 'ratings': tensor(3.5000), 'users': tensor(598)}

In [16]:
from torch.utils.data import DataLoader

dl_train  =DataLoader(train_dataset, batch_size=16, shuffle=True)
dl_test = DataLoader(test_dataset, batch_size=16)

In [17]:
ex = next(iter(dl_train))
ex

{'movies': tensor([  15, 1260,  364, 2264, 1085,   10, 1494, 2449,  948,  676, 7823, 2224,
         1455, 1210, 2300, 3977]),
 'ratings': tensor([4.0000, 1.0000, 3.0000, 3.0000, 3.0000, 2.5000, 5.0000, 2.5000, 5.0000,
         3.0000, 1.0000, 3.5000, 2.0000, 4.0000, 3.0000, 3.5000]),
 'users': tensor([587, 468, 482, 602, 604, 344, 413, 598,  63, 473,  49, 278, 526,  95,
         306, 181])}

In [18]:
from sklearn import metrics
import numpy as np
import torch.optim as optim

class Model(nn.Module):
  def __init__(self,num_users,num_movies):
    super().__init__()

    self.user_embed = nn.Embedding(num_users,32)
    self.movie_embed = nn.Embedding(num_movies,32)

    self.out = nn.Linear(64,1)

  def forward(self,users,movies,ratings=None):
    user_embeds = self.user_embed(users)
    movie_embeds = self.movie_embed(movies)
    output = torch.cat([user_embeds,movie_embeds],dim=1)
    out = self.out(output)
    return out


In [19]:
m = Model(len(lbl_user.classes_),len(lbl_movie.classes_)).cuda()

In [20]:
ex_cuda = {key: value.cuda() for key,value in ex.items()}

In [21]:
m(**ex_cuda)

tensor([[-0.0212],
        [ 0.6576],
        [ 0.3691],
        [-0.4524],
        [ 1.0529],
        [ 0.2440],
        [ 0.2049],
        [-0.1166],
        [-0.3739],
        [ 0.5215],
        [-0.0213],
        [-0.3556],
        [-0.3736],
        [ 0.0184],
        [-0.9002],
        [-0.1586]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [22]:
loss = nn.MSELoss()
optimizer = optim.Adam(m.parameters(),lr=1e-3)

In [23]:
loss(m(**ex_cuda),ex_cuda['ratings'])

  return F.mse_loss(input, target, reduction=self.reduction)


tensor(10.7316, device='cuda:0', grad_fn=<MseLossBackward0>)

In [25]:
for epoch in range(10):
  ep_loss = 0.0
  for batch in tqdm(dl_train):
    batch_loss = 0.0
    batch  ={key:value.cuda() for key,value in batch.items()}
    out = m(**batch)
    optimizer.zero_grad()
    l = loss(out,batch['ratings'])
    batch_loss += l.item()
    ep_loss+=l.item()
    l.backward()
    optimizer.step()
  print(f'epoch {epoch+1} with loss {ep_loss}')
    

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 5042/5042 [00:11<00:00, 420.46it/s]


epoch 1 with loss 14099.26837131381


100%|██████████| 5042/5042 [00:12<00:00, 416.98it/s]


epoch 2 with loss 5572.091757416725


100%|██████████| 5042/5042 [00:12<00:00, 412.75it/s]


epoch 3 with loss 5519.997042194009


100%|██████████| 5042/5042 [00:12<00:00, 409.48it/s]


epoch 4 with loss 5516.56616050005


100%|██████████| 5042/5042 [00:12<00:00, 404.81it/s]


epoch 5 with loss 5515.641989082098


100%|██████████| 5042/5042 [00:12<00:00, 406.05it/s]


epoch 6 with loss 5514.380329057574


100%|██████████| 5042/5042 [00:12<00:00, 401.21it/s]


epoch 7 with loss 5513.997905641794


 22%|██▏       | 1108/5042 [00:02<00:09, 401.31it/s]


KeyboardInterrupt: ignored

In [24]:
from tqdm import tqdm