In [3]:
import numpy as np
from scipy.sparse import rand as sprand
from scipy.sparse import lil_matrix
import torch
from torch.autograd import Variable
import pandas as pd

In [None]:
names = ['user_id', 'item_id', 'rating', 'timestamp']
df_train = pd.read_csv('ml-10M100K/r3.train', sep='::', names=names,engine='python')
df_test = pd.read_csv('ml-10M100K/r3.test', sep='::', names=names,engine='python')

In [None]:
df_train.head()

In [None]:
def get_movielens_ratings(df):
    n_users = max(df.user_id.unique())
    n_items = max(df.item_id.unique())

    interactions = lil_matrix( (n_users,n_items), dtype=float) #np.zeros((n_users, n_items))
    for row in df.itertuples():
        interactions[row[1] - 1, row[2] - 1] = row[3]
    return interactions

In [None]:
ratings = get_movielens_ratings(df_train)
ratings.shape

In [None]:
test_ratings = get_movielens_ratings(df_test)
test_ratings.shape

In [None]:
class MatrixFactorization(torch.nn.Module):

    currentLoss = 2
    loss_func = torch.nn.MSELoss()
    reg_loss_func = torch.optim.SGD(model.parameters(), lr=1e-6, weight_decay=1e-3)

    def __init__(self, n_users, n_items, n_factors=5):
        super().__init__()
        self.user_factors = torch.nn.Embedding(n_users, 
                                               n_factors,
                                               sparse=False)
        self.item_factors = torch.nn.Embedding(n_items, 
                                               n_factors,
                                               sparse=False)
        # Also should consider fitting overall bias (self.mu term) and both user and item bias vectors
        # Mu is 1x1, user_bias is 1xn_users. item_bias is 1xn_items
    
    # For convenience when we want to predict a sinble user-item pair. 
    def predict(self, user, item):
        # Need to fit bias factors
        return (pred + self.user_factors(user) * self.item_factors(item)).sum(1)
    
    # Much more efficient batch operator. This should be used for training purposes
    def forward(self, users, items):
        # Need to fit bias factors
        return torch.mm(self.user_factors(users),torch.transpose(self.item_factors(items),0,1))
    
    def get_batch(batch_size,ratings):
        # Sort our data and scramble it
        rows, cols = ratings.shape
        p = np.random.permutation(rows)

        # create batches
        sindex = 0
        eindex = batch_size
        while eindex < rows:
            batch = p[sindex:eindex]
            temp = eindex
            eindex = eindex + batch_size
            sindex = temp
            yield batch

        if eindex >= rows:
            batch = range(sindex,rows)
            yield batch    
    
    def run_epoch(self,batch_size, ratings):
        for i,batch in enumerate(self.get_batch(batch_size, ratings)):
            # Set gradients to zero
            self.reg_loss_func.zero_grad()

            # Turn data into variables
            interactions = Variable(torch.FloatTensor(ratings[batch, :].toarray()))
            rows = Variable(torch.LongTensor(batch))
            cols = Variable(torch.LongTensor(np.arange(ratings.shape[1])))

            # Predict and calculate loss
            predictions = model(rows, cols)
            self.currentLoss = self.loss_func(predictions, interactions)

            # Backpropagate
            self.currentLoss.backward()

            # Update the parameters
            self.reg_loss_func.step()
    
    def train(self, numEpochs, batch_size, ratings)
        for i in range(numEpochs):
            print(i)
            self.run_epoch(batch_size)



In [None]:
model = MatrixFactorization(ratings.shape[0], ratings.shape[1], n_factors=2)
# if torch.cuda.is_available():
#     model.cuda()

In [None]:
model.train(2,1000,ratings)

In [None]:
EPOCH = 2
BATCH_SIZE = 1000 #50
LR = 0.001

In [None]:
# for i,batch in enumerate(get_batch(BATCH_SIZE,test_ratings)):
print(test_ratings)
rows = Variable(torch.LongTensor(batch))
cols = Variable(torch.LongTensor(np.arange(test_ratings.shape[1])))
predictions = model(rows, cols)
print(predictions.data.cpu().numpy().shape)

In [None]:
np.random.permutation(test_ratings.shape[0])

In [None]:
for i in range(test_ratings.shape[0]):
    test_ratings[i:,].todense().max()
    if (test_ratings[i:,].todense().max() > 0):
        print(i)
        break

In [None]:
for i in range(test_ratings.shape[0]):
    test_ratings[:,i].todense().max()
    if (test_ratings[:,i].todense().max() > 0):
        print(i)
        break

In [None]:
test_ratings[28665,:]

In [None]:
test_ratings[:,0]

In [None]:
test_ratings[28665].toarray()[0]

In [None]:
test_ratings[:,0].T.toarray()[0]

In [None]:
rows = Variable(torch.LongTensor(test_ratings[28665].toarray()[0]))
cols = Variable(torch.LongTensor(test_ratings[:,0].T.toarray()[0]))

In [None]:
model(rows, cols)