In [191]:
import pandas as pd
import numpy as np
import torch
from sklearn.metrics import mean_squared_error, r2_score

In [192]:
def get_evaluation(R, P, Q, non_zeros):
    error = 0
    full_prd_matrix = np.dot(P, Q.T)

    x_non_zero_ind = [non_zero[0] for non_zero in non_zeros]
    y_non_zero_ind = [non_zero[1] for non_zero in non_zeros]
    R_non_zeros = R[x_non_zero_ind, y_non_zero_ind]

    full_prd_matrix_non_zeros = full_prd_matrix[x_non_zero_ind, y_non_zero_ind]

    mse = mean_squared_error(R_non_zeros, full_prd_matrix_non_zeros)
    r2 = r2_score(R_non_zeros, full_prd_matrix_non_zeros)
    rmse = np.sqrt(mse)

    return rmse, r2

In [195]:
class MatrixFactorization(torch.nn.Module):
    def __init__(self, num_users, num_items, n_factors=20):
        super(MatrixFactorization, self).__init__()
        self.n_factors = n_factors

        self.user_factors = torch.randn(num_users, n_factors).requires_grad_()
        self.item_factors = torch.randn(num_items, n_factors).requires_grad_()

    def forward(self, user, item):
        return (self.user_factors(user) * self.item_factors(item)).sum(1)

In [214]:
def NMF(input_matrix, factors, epochs=200, learning_rate=0.008, weight_decay=0.01):
    num_users, num_items = input_matrix.shape

    model = MatrixFactorization(num_users, num_items, factors)
    optimizer = torch.optim.Adam([model.item_factors, model.user_factors], lr=learning_rate)
    loss_function = torch.nn.MSELoss()

    loss_curve = []

    for epoch in range(epochs):
        optimizer.zero_grad()
        mask = ~torch.isnan(input_matrix)

        # print((model.user_factors @ model.item_factors.t())[mask])
        # print(input_matrix[mask])
        loss = loss_function((model.user_factors @ model.item_factors.t())[mask], input_matrix[mask])
        loss.backward()
        optimizer.step()

        model.user_factors.data.clamp_(min=0)
        model.item_factors.data.clamp_(min=0)

        loss_curve.append(loss.item())

        # if epoch % 10 == 0:
        print(f'[{epoch}] loss: {loss.item():.05f}')

    return model.user_factors, model.item_factors

In [188]:
movies = pd.read_csv('../dataset/Netflix_Dataset_Movie.csv')
ratings = pd.read_csv('../dataset/Netflix_Dataset_Rating.csv')

ratings = ratings[['User_ID', 'Movie_ID', 'Rating']]
rating_movies = pd.merge(ratings, movies, on='Movie_ID')
ratings_matrix = rating_movies.pivot_table('Rating', index='User_ID', columns='Name')

In [197]:
ratings_matrix_tensor = torch.Tensor(ratings_matrix.values)

In [201]:
ratings_matrix_tensor

tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, 3.,  ..., nan, 3., nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan]])

In [215]:
nmf_run = NMF(ratings_matrix_tensor, 50, epochs=500)

[0] loss: 63.35484
[1] loss: 29.50241
[2] loss: 28.14599
[3] loss: 26.74527
[4] loss: 25.38014
[5] loss: 24.06735
[6] loss: 22.81307
[7] loss: 21.62005
[8] loss: 20.48888
[9] loss: 19.41856
[10] loss: 18.40804
[11] loss: 17.45552
[12] loss: 16.55931
[13] loss: 15.71704
[14] loss: 14.92669
[15] loss: 14.18606
[16] loss: 13.49268
[17] loss: 12.84393
[18] loss: 12.23733
[19] loss: 11.67080
[20] loss: 11.14189
[21] loss: 10.64830
[22] loss: 10.18781
[23] loss: 9.75830
[24] loss: 9.35768
[25] loss: 8.98429
[26] loss: 8.63595
[27] loss: 8.31097
[28] loss: 8.00761
[29] loss: 7.72442
[30] loss: 7.45986
[31] loss: 7.21248
[32] loss: 6.98103
[33] loss: 6.76426
[34] loss: 6.56098
[35] loss: 6.37016
[36] loss: 6.19082
[37] loss: 6.02199
[38] loss: 5.86283
[39] loss: 5.71257
[40] loss: 5.57051
[41] loss: 5.43603
[42] loss: 5.30857
[43] loss: 5.18759
[44] loss: 5.07259
[45] loss: 4.96313
[46] loss: 4.85880
[47] loss: 4.75927
[48] loss: 4.66419
[49] loss: 4.57322
[50] loss: 4.48612
[51] loss: 4.40263

KeyboardInterrupt: 

In [132]:
pd.DataFrame((nmf_run[1] @ nmf_run[0].t()).detach().numpy())

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,143448,143449,143450,143451,143452,143453,143454,143455,143456,143457
0,2.625538,3.431872,2.672669,3.709504,3.731326,4.976661,3.864340,3.340634,4.341229,3.605201,...,3.160424,3.257455,3.219889,2.555960,3.581506,3.559414,2.259343,3.390995,3.698290,6.101260
1,3.231055,3.704476,5.512858,2.594706,4.040402,4.003501,2.928669,3.937875,2.845485,3.529291,...,5.122814,3.709512,5.209528,4.452822,3.942453,3.933462,3.815676,3.757327,4.151972,5.317791
2,2.389268,4.776480,2.118403,3.782547,3.688638,4.122883,2.835291,3.726233,3.207073,3.018428,...,3.762223,3.493170,3.012693,4.475636,3.520341,3.706087,3.514571,4.254776,3.864866,4.007757
3,3.869022,4.619966,2.866062,3.570616,3.354860,2.107881,3.196456,1.590494,3.119542,3.519136,...,1.926454,3.480530,5.935805,5.453445,3.077968,3.155723,2.234697,2.595825,3.799029,3.010624
4,5.040861,3.637879,4.055535,4.685837,5.602990,2.047171,3.208319,4.454806,3.039951,5.140816,...,5.695179,3.051153,4.459912,3.445697,2.047769,2.638229,2.175391,3.274747,4.197145,1.427637
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1337,4.036912,4.409901,2.952969,4.057939,6.793875,3.793667,5.437775,3.914649,5.042058,4.427708,...,3.746997,4.052462,5.495807,4.054692,3.654995,4.408473,2.811813,3.616958,4.185738,4.653183
1338,3.107593,3.252981,2.765983,3.941231,4.370182,3.588746,2.553835,3.831107,3.079155,3.735458,...,3.318310,3.036486,4.603397,2.359532,3.745549,2.854896,2.573550,3.713430,3.219836,4.560983
1339,3.006012,3.895520,4.241758,2.845791,5.028035,3.925111,2.701851,3.440524,2.851184,3.561110,...,3.991698,3.926734,4.758273,3.742675,3.775186,4.169005,3.242388,3.062944,3.324450,3.851436
1340,3.520503,3.065695,4.195328,2.632923,4.841404,2.751479,2.655208,3.266983,1.999800,4.114881,...,3.706350,4.165103,4.986201,3.432314,3.192564,2.627454,3.310305,3.944461,3.173165,3.351204


In [166]:
def get_unseen_movies(ratings_matrix, userId):
    user_rating = ratings_matrix.loc[userId, :]
    already_seen = user_rating[user_rating > 0].index.tolist()
    movies_list = ratings_matrix.columns.tolist()
    unseen_list = [movie for movie in movies_list if movie not in already_seen]
    return unseen_list

In [176]:
def recommend_movie_by_userid(prd_df, userId, unseen_list, top_n=10):
    recommend_movies = prd_df.loc[userId, unseen_list].sort_values(ascending=False)[:top_n]
    return recommend_movies

In [177]:
unseen_movies = get_unseen_movies(ratings_matrix, 6)

In [178]:
prd_df = pd.DataFrame((nmf_run[1] @ nmf_run[0].t()).detach().numpy())
prd_df.columns = ratings_matrix.index
prd_df.index = ratings_matrix.columns

In [181]:
recommend_movie_by_userid(prd_df, 6, unseen_movies, top_n=10)

KeyError: 6