In [62]:
import pandas as pd
import numpy as np

train   = pd.read_csv('./data/ratings_train.csv')
vali    = pd.read_csv('./data/ratings_vali.csv') 

In [31]:
movie_user = train.pivot('userId', 'movieId', 'rating')
movie_user =  movie_user.fillna(0.0)
ratings = movie_user.to_numpy()

In [60]:
import numpy as np
from scipy.sparse import rand as sprand
import torch
import pandas as pd

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# open training file
train   = pd.read_csv('./data/ratings_train.csv')
# open validation file
vali    = pd.read_csv('./data/ratings_vali.csv') 

# create user-movie matrix
user_movie = train.pivot('userId', 'movieId', 'rating')
# fill empty values with 0
user_movie =  user_movie.fillna(0.0)

# Make up some random explicit feedback ratings
# and convert to a numpy array
n_users = len(train.userId.unique())
n_items = len(train.movieId.unique())
userIds = list(user_movie.index)
movieIds = list(user_movie.columns)
# convert to numpy
ratings = user_movie.to_numpy()

class DenseNet(torch.nn.Module):

    def __init__(self, n_users, n_items):
        super().__init__()
   	# user and item embedding layers
        factor_len = 5
        self.user_factors = torch.nn.Embedding(n_users, factor_len)
        self.item_factors = torch.nn.Embedding(n_items, factor_len)
   	    # linear layers
        self.inputs = torch.nn.Linear(factor_len*2, 50)
        # hidden liers
        self.linear1 = torch.nn.Linear(50, 30)
        self.linear2 = torch.nn.Linear(30, 20)
        # output lyer
        self.outputs = torch.nn.Linear(20, 1)

        self.to(DEVICE)

    def forward(self, users, items, dim):
        users_embedding = self.user_factors(users)
        items_embedding = self.item_factors(items)
	# concatenate user and item embeddings to form input
        x = torch.cat([users_embedding, items_embedding], dim)
        x = torch.relu(self.inputs(x))
        x = torch.relu(self.linear1(x))
        x = torch.relu(self.linear2(x))
        output_scores = self.outputs(x)
        return output_scores

    # def predict(self, users, items):
    #     # return the score
    #     output_scores = self.forward(users, items)
    #     return output_scores
def batch(iterable1, iterable2, n=1):
    l = len(iterable1)
    for ndx in range(0, l, n):
        yield iterable1[ndx:min(ndx + n, l)], iterable2[ndx:min(ndx + n, l)]

# model instance
model = DenseNet(n_users, n_items)
# model.load_state_dict(torch.load('./param.data'))
# model.eval()

def train():
    # loss function
    loss_func = torch.nn.MSELoss()

    optimizer = torch.optim.SGD(model.parameters(), 
                                lr=0.0005) # learning rate

    # Get indexes of nonzero elements (row indexes, col indexes)
    rows, cols = ratings.nonzero()

    # randomly shuffle array
    p = np.random.permutation(len(rows)) # returns shuffled indexes
    rows, cols = rows[p], cols[p]

    for epoch in range(50):
        loss_sum = 0
        batch_num = 1
        for row, col in batch(rows, cols, 5):
            # Turn data into tensors
            rating = torch.FloatTensor(ratings[row, col]).to(DEVICE)
            #print(rating)
            row = torch.LongTensor([row]).to(DEVICE)
            col = torch.LongTensor([col]).to(DEVICE)
            
            # Predict and calculate loss
            prediction = model(row, col, 2)
            loss = loss_func(prediction, rating)
            # save total loss
            loss_sum += loss.item()
            batch_num += 1
            # Backpropagate
            loss.backward()
            # Update the parameters
            optimizer.step()
        print('Epoch: {}, loss: {}'.format(epoch+1, loss_sum/batch_num))

    torch.save(model.state_dict(), './param.data')

def predict(uid, mid):
    row = userIds.index(uid)
    col = movieIds.index(mid)
    print(row)
    print(col)
    row = torch.LongTensor([row]).to(DEVICE)
    col = torch.LongTensor([col]).to(DEVICE)
    prediction = model(row, col, 1)
    return prediction

train()

Epoch: 1, loss: 1.9806071222109287
Epoch: 2, loss: 1.3336575415978145
Epoch: 3, loss: 1.813534037116158
Epoch: 4, loss: 1.5921745781766408
Epoch: 5, loss: 1.4875911936653385
Epoch: 6, loss: 1.8861634648731136
Epoch: 7, loss: 1.3151973085461273
Epoch: 8, loss: 1.883840241640437
Epoch: 9, loss: 1.4914070424214108
Epoch: 10, loss: 1.5879191761626161
Epoch: 11, loss: 1.81698696844556
Epoch: 12, loss: 1.332249520106191
Epoch: 13, loss: 1.9242112372759732
Epoch: 14, loss: 1.4058433598151345
Epoch: 15, loss: 1.6923320386996934
Epoch: 16, loss: 1.725541415648188
Epoch: 17, loss: 1.3828504001249438
Epoch: 18, loss: 1.9300682838576768
Epoch: 19, loss: 1.345343642159231
Epoch: 20, loss: 1.788851893539892
Epoch: 21, loss: 1.6223568647146036
Epoch: 22, loss: 1.4645156243454782
Epoch: 23, loss: 1.7875130864298876
Epoch: 24, loss: 1.3578819988558835
Epoch: 25, loss: 1.763399062439431
Epoch: 26, loss: 1.5060143126365442
Epoch: 27, loss: 1.5367380805116995
Epoch: 28, loss: 1.7421406012666705
Epoch: 29,

In [9]:
def batch(iterable1, iterable2, n=1):
    l = len(iterable1)
    for ndx in range(0, l, n):
        yield iterable1[ndx:min(ndx + n, l)], iterable2[ndx:min(ndx + n, l)]

l1 = [1,2,3,4,5,6,7,8,9,10]
l2 = [1,2,3,4,5,6,7,8,9,10]

for r,c in batch(l1,l2, 3):
    print(r,c)

[1, 2, 3] [1, 2, 3]
[4, 5, 6] [4, 5, 6]
[7, 8, 9] [7, 8, 9]
[10] [10]
