In [None]:
import pandas as pd

In [None]:
books_df = pd.read_csv('books.csv')
ratings_df = pd.read_csv('ratings.csv')

print('Dimensions of books are:', books_df.shape, '\nThe dimensions of ratings are:', ratings_df.shape)

n_users = ratings_df.user_id.nunique()
n_books = ratings_df.book_id.nunique()
print('A full rating matrix will have dimensions:', n_users*n_books, 'elements.But we only have ', len(ratings_df), 'ratings, so we have a sparsity of', len(ratings_df)/(n_users*n_books)*100, '%')

In [None]:
import torch
import numpy as np
from torch.autogad import Variable
from tqdm import tqdm_notebooks as tqdm

In [None]:
class MatrixFactorization(torch.nn.Module):
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        self.user_factors = torch.nn.Embedding(n_users, n_factors)
        self.item_factors = torch.nn.Embedding(n_items, n_factors)
        self.user_factors.weight.data.uniform_(0, 0.05)
        self.item_factors.weight.data.uniform_(0, 0.05)
        
    def forward(self, data):
        users, items = data[:,0], data[:,1]
        return (self.user_factors(users)*self.item_factors(items)).sum(1)
    
    def predict(self, user, item):
        return self.forward(user, item)

In [None]:
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader 

class Loader(Dataset):
    def __init__(self):
        self.ratings = ratings_df.copy()
        users = ratings_df.userId.unique()
        books = ratings_df.bookId.unique()
        self.userid2idx = {o:i for i,o in enumerate(users)}
        self.bookid2idx = {o:i for i,o in enumerate(books)}
        
        self.idx2userid = {i:o for o,i in self.userid2idx.items()}
        self.idx2bookid = {i:o for o,i in self.bookid2idx.items()}
        
        self.ratings.bookId = ratings_df.bookId.apply(lambda x: self.bookid2idx[x])
        self.ratings.userId = ratings_df.userId.apply(lambda x: self.userid2idx[x])
        
        self.x = self.ratings.drop(['rating', 'timestamp'], axis=1).values
        self.y = self.ratings['rating'].values
        self.x, self.y = torch.tensor(self.x), torch.tensor(self.y)

    def __getitem__(self, index):
        return (self.x[index], self.y[index])

    def __len__(self):
        return len(self.ratings)