In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
import numpy as np
import pandas as pd
from sklearn.metrics import ndcg_score
from sklearn.model_selection import KFold
from itertools import chain
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

drive = 'gdrive/My Drive/TUBES_RECSYS/'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## **Collaborative Filtering using Matrix Factorization**

In [None]:
# get ratings dataset
df_rating = pd.read_csv(drive + 'ratings.csv', sep=',')

# get movies dataset
df_movies = pd.read_csv(drive + 'movies.csv', sep=',')

In [None]:
# display first 5 rows of rating dataset
df_rating.head(5)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [None]:
# display first 5 rows of movie dataset
df_movies.head(5)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [None]:
dataset_rating = df_rating
dataset_movies = df_movies

In [None]:
# locate features and label for rating prediction
X = df_rating.iloc[:,[0,1]].values
y = df_rating.iloc[:,2].values

In [None]:
"""
Input:
    U     : a set of users
    D     : a set of items
    R     : a matrix of dimension |U| x |D| that contains all users ratings towards a movie
    P     : an initial matrix of association between users and features with dimension |U| x K
    Q     : an initial matrix of association between items and features with dimension |D| x K
    K     : the number of latent features
    steps : the maximum number of steps to perform the optimization
    alpha : the learning rate
    beta  : the regularization parameter
@OUTPUT:
    the final matrices P and Q
"""

def matrix_factorization(R, P, Q, K, steps=6, alpha=0.0002, beta=0.02):
    Q = Q.T # transpose matrix Q
    
    for step in range(steps):
      for i in range(len(R)):
          for j in range(len(R[i])):
              if R[i][j] > 0: # only minimise errors of the observed user-item pairs
                  eij = R[i][j] - np.dot(P[i,:],Q[:,j]) # count error between real rating and estimated rating
                  for k in range(K): 
                      P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k]) # set update rules for P
                      Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j]) # set update rules for Q
      eR = np.dot(P,Q)
      e = 0
      for i in range(len(R)):
          for j in range(len(R[i])):
              if R[i][j] > 0:
                  e = e + pow(R[i][j] - np.dot(P[i,:],Q[:,j]), 2) # check the overall error 
                  for k in range(K):
                      e = e + (beta/2) * ( pow(P[i][k],2) + pow(Q[k][j],2) ) # add beta to avoid overfitting
      if e < 0.001:
          break
    return P, Q.T

In [None]:
cv = KFold(n_splits=5, random_state=42, shuffle=True) # do cross-validation for 5 folds

df_user_items_pred = [] # initialize array that stores all dataframe of user and items prediction
score = [] # initialize NDCG score of test set

for train_index, test_index in cv.split(X):
    
    df_rating = dataset_rating
    
    X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index] # split into data train and data test
    real_val = y_test # save real rating of data test
    
    for i in range(len(test_index)):
        df_rating['rating'].iloc[test_index[i]] = 0 # mask data test result
    
    df_rating = df_rating.pivot(index='userId', columns='movieId', values='rating') # change dataframe to matrix of user and item (movie)
    df_rating = df_rating.fillna(0) # fill unrated movie's ratings with 0
    
    R = df_rating.to_numpy() 

    U = len(R) # intialize user latent features matrix
    D = len(R[0]) # initialize item latent features matrix
    K = 4 # set number of latent features

    P = np.random.rand(U,K) # initialize user latent features matrix with random values
    Q = np.random.rand(D,K) # initialize item latent features matrix with random values

    print("The Original Matrix")
    print(R)

    user_latent_features , item_latent_features  = matrix_factorization(R, P, Q, K)

    user_items_matrix = np.dot(user_latent_features, item_latent_features.T)
    print("\nThe Approximation matrix by MF")
    print(user_items_matrix)
    
    df_user_items =  pd.DataFrame(user_items_matrix, columns = df_rating.columns, index = df_rating.index) # set user items matrix as dataframe
    df_user_items_pred.append(df_user_items)
    
    predicted = [] # initialize empty list for predicted data test
    for i in range(len(X_test)):
        predicted.append(df_user_items.loc[X_test[i][0]][X_test[i][1]]) # get rating prediction of data test
    
    predicted = np.asarray([predicted]) 
    real_val = np.asarray([real_val])
    score.append(ndcg_score(real_val,predicted))  # evaluate NDCG score of data test prediction

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


The Original Matrix
[[4.  0.  4.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 ...
 [2.5 2.  2.  ... 0.  0.  0. ]
 [3.  0.  0.  ... 0.  0.  0. ]
 [5.  0.  0.  ... 0.  0.  0. ]]

The Approximation matrix by MF
[[3.93355118 3.51816958 2.90023802 ... 0.99344018 2.43608675 3.17497707]
 [2.8828714  2.44792476 2.00889575 ... 0.78666753 1.87176033 2.31429061]
 [1.99544556 1.55540968 1.47072015 ... 0.62817827 1.41822956 1.5639089 ]
 ...
 [4.52438748 3.68143995 2.82638576 ... 1.08180049 2.80391971 3.46119549]
 [2.95641747 2.33261766 1.9543789  ... 0.79679814 1.94682175 2.26764897]
 [5.31652507 4.61448665 3.81508727 ... 1.42735118 3.40872205 4.29181511]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


The Original Matrix
[[4.  0.  4.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 ...
 [2.5 0.  0.  ... 0.  0.  0. ]
 [3.  0.  0.  ... 0.  0.  0. ]
 [5.  0.  0.  ... 0.  0.  0. ]]

The Approximation matrix by MF
[[2.90838362 3.38804026 2.79416413 ... 1.43767089 1.51395786 2.23478706]
 [2.07891206 2.47430811 2.32896863 ... 0.89271945 1.3505083  1.31925064]
 [1.97129672 2.26062258 1.74708625 ... 1.09909826 0.87998249 1.65577926]
 ...
 [3.26986107 3.81796879 3.27127419 ... 1.61038079 1.78262261 2.42874411]
 [1.27565305 1.52113159 1.1067211  ... 0.61183842 0.66860017 0.87628543]
 [4.17598492 4.93719272 3.87484682 ... 2.21749363 2.22133854 2.84023249]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


The Original Matrix
[[0.  0.  4.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 ...
 [2.5 0.  0.  ... 0.  0.  0. ]
 [3.  0.  0.  ... 0.  0.  0. ]
 [5.  0.  0.  ... 0.  0.  0. ]]

The Approximation matrix by MF
[[2.7505596  1.73402065 1.53518037 ... 0.83624608 1.4744098  1.93863167]
 [1.63557902 0.98530107 1.26226059 ... 0.49310456 0.93334164 1.3602988 ]
 [1.90715374 1.14321729 1.05758142 ... 0.3089623  0.9267034  1.17832693]
 ...
 [3.50640733 2.72647922 1.82755835 ... 0.69916372 1.77648114 2.10402361]
 [2.03859539 1.53904955 0.79607634 ... 0.21946564 0.9202223  0.95616055]
 [4.63295717 3.40009618 2.4294215  ... 1.02640546 2.36763563 2.87719813]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


The Original Matrix
[[0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 ...
 [2.5 0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]]

The Approximation matrix by MF
[[1.8730311  2.2547664  2.29338119 ... 2.03164997 1.81140997 1.10087956]
 [1.08763028 1.42911902 1.55648582 ... 1.3507249  0.94646333 0.81189635]
 [1.58130366 2.15272412 2.27605707 ... 1.68992087 1.36204983 1.09807971]
 ...
 [1.37114427 1.88655819 2.11427157 ... 1.44018665 1.10662381 0.98602345]
 [1.48712266 1.38643424 1.40287356 ... 1.3705948  1.62732467 0.51904526]
 [2.41129195 2.71912426 2.96758433 ... 2.62117497 2.31409005 1.35757232]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


The Original Matrix
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

The Approximation matrix by MF
[[0.40925971 0.22548503 0.0591031  ... 0.32943765 0.28144137 0.36759473]
 [1.16879957 1.01886582 0.33068396 ... 1.00549073 0.5774399  1.03520879]
 [1.21151993 0.88590136 0.30225539 ... 1.14687976 0.71341121 1.09085563]
 ...
 [1.21797234 0.84165576 0.24575409 ... 1.00218044 0.73160453 1.08348572]
 [2.13619523 1.4562597  0.38787921 ... 1.8605431  1.23245908 1.80876668]
 [1.40142735 1.05406037 0.2968452  ... 1.25558198 0.74502923 1.17650541]]


In [None]:
# print NDCG score for 5 fold validation and its average
print("NDCG score : ", score)
print("NDCG average score : ", np.mean(score))

NDCG score :  [0.9759309390384933, 0.9746077361206373, 0.9722639105289763, 0.9681632445226911, 0.9687211633470916]
NDCG average score :  0.9719373987115778


In [None]:
# get user items dataframe of best data prediction
df_user_items = df_user_items_pred[score.index(max(score))]
df_user_items

movieId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,36,38,39,40,41,42,43,...,185135,185435,185473,185585,186587,187031,187541,187593,187595,187717,188189,188301,188675,188751,188797,188833,189043,189111,189333,189381,189547,189713,190183,190207,190209,190213,190215,190219,190221,191005,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,3.933551,3.518170,2.900238,2.191680,2.228772,3.470723,2.333630,2.337178,1.617999,3.142980,2.406717,2.477636,2.460978,2.548688,3.105085,2.467899,2.570575,3.089799,2.466033,2.535024,2.307281,1.165386,2.080718,0.943954,2.480314,2.236066,2.994162,2.873483,3.095408,3.185012,2.437456,3.933151,3.853861,2.922037,1.336782,3.668945,1.885620,1.895250,3.089494,2.980357,...,2.978562,2.604165,3.287825,3.908263,2.689202,3.025906,0.863219,2.560115,3.422883,1.974451,2.982466,2.897366,2.455920,2.593788,1.474776,2.592755,2.898438,2.536438,2.195493,1.919989,2.900504,2.233070,2.305862,1.155227,1.810754,2.448407,2.565241,1.443273,1.582396,3.803327,2.005426,2.177204,1.135564,3.561162,1.789010,1.766322,2.464406,0.993440,2.436087,3.174977
2,2.882871,2.447925,2.008896,1.778990,1.603670,2.300799,1.884748,1.657102,1.384422,2.311077,1.741864,1.463183,1.989687,1.889664,2.129608,1.957739,1.920177,2.245370,1.889040,1.927269,1.630053,0.840276,1.256013,0.721936,1.723581,1.597551,2.255585,2.010114,2.307036,2.146629,1.916212,2.638147,2.859429,1.914083,1.246788,2.418036,1.112245,1.447436,2.045210,2.234516,...,1.982895,1.930734,2.029215,2.680199,1.619614,2.266757,0.513410,1.890117,2.298314,1.692541,1.993815,1.809879,1.515647,1.885155,1.191997,1.835773,1.762272,1.830727,1.615304,1.298558,1.954700,1.638727,1.805653,0.921202,1.171579,1.783063,1.600985,1.079230,1.449067,2.350366,1.399998,1.629004,0.692003,2.230337,1.512485,1.113038,1.717107,0.786668,1.871760,2.314291
3,1.995446,1.555410,1.470720,1.384182,0.986945,1.619393,1.375066,1.451073,0.936067,1.794792,1.216156,1.114412,1.354316,1.246012,1.593197,1.457753,1.397241,1.609103,1.417771,1.466731,1.064829,0.550409,1.048366,0.529818,1.188063,1.163201,1.555462,1.605823,1.574980,1.693207,1.362196,1.768911,2.054971,1.530360,0.908029,1.588283,0.867067,0.988258,1.397656,1.619113,...,1.442255,1.474060,1.397481,1.926123,1.220123,1.619866,0.339587,1.415628,1.719965,1.156887,1.206980,1.302992,1.131999,1.288683,0.928837,1.230878,1.245729,1.168409,1.134569,0.845095,1.484245,1.229422,1.305383,0.670489,0.835004,1.435655,1.189770,0.757963,1.009357,1.612745,0.946243,1.232142,0.447367,1.594459,1.096219,0.712182,1.113697,0.628178,1.418230,1.563909
4,3.078355,2.690658,2.414947,2.054772,1.709250,2.800217,1.956482,2.087774,1.411768,2.697569,1.862694,1.854733,2.059951,2.044486,2.622071,2.101892,2.227340,2.520136,2.112931,2.272481,1.814562,0.906101,1.735443,0.824340,1.962710,1.856597,2.471339,2.445203,2.492691,2.588481,2.065697,3.043222,3.252302,2.410369,1.268919,2.820742,1.459050,1.485624,2.402111,2.453234,...,2.282755,2.180857,2.455183,3.127601,1.997876,2.529579,0.591465,2.199910,2.808203,1.792869,2.211296,2.170997,1.891077,1.992157,1.272345,2.029883,2.311307,2.019031,1.850395,1.388187,2.312260,1.949260,1.870638,1.042776,1.404755,2.151465,2.005901,1.226004,1.477096,2.880005,1.620635,1.830934,0.813023,2.735257,1.514156,1.291586,1.931838,0.918664,2.073177,2.533924
5,2.584809,2.447476,1.689851,1.418293,1.671801,2.032540,1.601456,0.967580,1.345874,1.767518,1.532304,1.065330,1.885322,1.838166,1.736135,1.616285,1.665317,1.939830,1.542011,1.569660,1.590693,0.813616,0.797184,0.619256,1.560474,1.354972,2.083310,1.440213,2.139738,1.536935,1.708402,2.460073,2.499757,1.341251,1.117782,2.313018,0.787157,1.327885,1.857451,1.903876,...,1.613493,1.507490,1.778457,2.280127,1.202503,1.991214,0.457353,1.551654,1.842166,1.646155,2.084863,1.476333,1.171895,1.708370,0.906959,1.724420,1.532698,1.852027,1.464180,1.226849,1.498783,1.352418,1.535096,0.813391,0.992678,1.279811,1.262011,0.981960,1.383613,2.086733,1.317956,1.298037,0.651839,1.866981,1.305180,1.076090,1.688713,0.590908,1.495623,2.169299
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,4.939485,4.259701,3.671055,2.969933,2.694733,4.292434,3.064836,3.161774,2.111104,4.117961,3.018594,3.035800,3.162864,3.170580,3.950246,3.251352,3.316764,3.924023,3.229547,3.340098,2.829071,1.434933,2.648485,1.228969,3.076737,2.845636,3.795023,3.747913,3.894791,4.086021,3.156257,4.798121,4.935656,3.742160,1.842193,4.433844,2.329800,2.393349,3.778994,3.828560,...,3.706077,3.394515,3.960411,4.894679,3.297486,3.874736,1.015889,3.320626,4.323830,2.594711,3.531282,3.539904,3.028290,3.234810,1.983942,3.202153,3.521571,3.107361,2.787040,2.316272,3.674322,2.897255,2.988581,1.519997,2.228661,3.242570,3.170186,1.840168,2.133546,4.583072,2.480744,2.837173,1.339316,4.353257,2.374375,2.094355,3.006529,1.347277,3.200714,3.960659
607,3.459668,2.763920,2.330562,2.212804,1.817424,2.581927,2.373479,2.083210,1.702842,2.827711,2.094876,1.649922,2.425944,2.205099,2.466074,2.451807,2.280636,2.678600,2.325079,2.330212,1.857598,0.975638,1.428754,0.869979,1.991229,1.886395,2.679742,2.434749,2.741092,2.580960,2.338572,2.972631,3.403312,2.260364,1.602284,2.674635,1.246827,1.762975,2.301751,2.717207,...,2.329966,2.378642,2.234960,3.120951,1.847364,2.716906,0.564932,2.272431,2.665134,2.060053,2.178704,2.054205,1.716152,2.256594,1.534332,2.126203,1.876100,2.077800,1.891896,1.496832,2.323214,1.950044,2.258362,1.114870,1.329467,2.188119,1.806587,1.271420,1.819123,2.566401,1.589474,2.003604,0.747774,2.493504,1.934447,1.210511,1.936574,0.984765,2.325968,2.712618
608,4.524387,3.681440,2.826386,2.390568,2.407427,3.236335,2.918363,2.368001,2.000317,3.356898,2.773626,2.294601,2.963576,2.793421,2.935136,2.961541,2.653952,3.350017,2.774178,2.616555,2.400061,1.278684,1.719514,1.008603,2.564174,2.306144,3.302207,2.907828,3.470435,3.253377,2.819448,3.920402,4.090612,2.787788,1.761968,3.549051,1.662861,2.301832,3.021306,3.377292,...,3.127715,2.910628,3.091037,3.976402,2.588084,3.307916,0.863998,2.680309,3.293959,2.308494,3.002411,2.834750,2.295936,3.007266,1.843464,2.768573,2.372835,2.642799,2.267066,2.124162,2.987803,2.247392,2.864256,1.246952,1.755489,2.524831,2.367600,1.512152,2.013770,3.486392,1.989440,2.439502,1.085674,3.334911,2.367086,1.707751,2.508890,1.081800,2.803920,3.461195
609,2.956417,2.332618,1.954379,1.754504,1.508693,2.185978,1.975115,1.792553,1.339806,2.377133,1.810221,1.541866,1.965285,1.817956,2.064853,2.036708,1.851327,2.257746,1.929486,1.882168,1.551046,0.821084,1.272104,0.705310,1.691948,1.580844,2.203722,2.084571,2.284437,2.273704,1.912000,2.546775,2.802255,1.984375,1.238782,2.287999,1.147695,1.493545,1.981592,2.284145,...,2.072844,2.019172,2.004375,2.676188,1.722878,2.247022,0.533912,1.879310,2.283352,1.583170,1.854898,1.863835,1.550646,1.943708,1.288093,1.798848,1.615693,1.702033,1.544006,1.327234,2.043287,1.594627,1.911957,0.880394,1.167537,1.836931,1.608843,1.031663,1.391395,2.274419,1.318925,1.689394,0.677667,2.216416,1.596343,1.070486,1.617666,0.796798,1.946822,2.267649


In [None]:
# Get top-20 recommendations for all user
def recommendation(df_user_items, dataset_rating, dataset_movies):
    n_recommendations = 20 # set number of recommendations
    n_users = dataset_rating.userId.unique().shape[0] # get all unique users id count

    for i in range(1,n_users+1):
      user_rating_pred = df_user_items.loc[i].sort_values(ascending=False) # get user rating prediction
      user_rated = (dataset_rating[dataset_rating['userId'] == i].merge(dataset_movies, how = 'left', on='movieId').
                      sort_values(['rating'], ascending=False)) # get user rated movie information

      recommendations = (dataset_movies[~dataset_movies['movieId'].isin(user_rated['movieId'])].
            merge(pd.DataFrame(user_rating_pred).reset_index(), how = 'left',
                  on = 'movieId').
            rename(columns = {i: 'Predictions'}).
            sort_values('Predictions', ascending = False).
                          iloc[:n_recommendations, :-1]
                        ) # get 20-top rated movie recommendation for user that hasn't being watched by user

      print("User id = ", i)
      print("Recommended movies: ")
      for j in range(recommendations.shape[0]):
        print(j+1,".", recommendations['title'].values[j])
      print("\n")

recommendation(df_user_items, dataset_rating, dataset_movies)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
16 . One Flew Over the Cuckoo's Nest (1975)
17 . Death Note: Desu nôto (2006–2007)
18 . Memento (2000)
19 . Game, The (1997)
20 . Amadeus (1984)


User id =  403
Recommended movies: 
1 . Forrest Gump (1994)
2 . Shawshank Redemption, The (1994)
3 . Godfather, The (1972)
4 . Star Wars: Episode IV - A New Hope (1977)
5 . Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)
6 . Star Wars: Episode VI - Return of the Jedi (1983)
7 . Fight Club (1999)
8 . Terminator 2: Judgment Day (1991)
9 . Schindler's List (1993)
10 . One Flew Over the Cuckoo's Nest (1975)
11 . Fugitive, The (1993)
12 . Matrix, The (1999)
13 . Star Wars: Episode V - The Empire Strikes Back (1980)
14 . Memento (2000)
15 . Amadeus (1984)
16 . Silence of the Lambs, The (1991)
17 . Dark Knight, The (2008)
18 . Usual Suspects, The (1995)
19 . Inception (2010)
20 . Game, The (1997)


User id =  404
Recommended movies: 
1 . Fight Club (1999

# **Content Based Filtering (clustering over Movie genre)**

In [None]:
# get movie dataset
df_movies = dataset_movies
df_movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [None]:
# display all unique genres in movie dataset
movie_genres = df_movies['genres'].str.split("|").to_list()
flatten_list = list(chain.from_iterable(movie_genres)) 
set(flatten_list)

{'(no genres listed)',
 'Action',
 'Adventure',
 'Animation',
 'Children',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Fantasy',
 'Film-Noir',
 'Horror',
 'IMAX',
 'Musical',
 'Mystery',
 'Romance',
 'Sci-Fi',
 'Thriller',
 'War',
 'Western'}

In [None]:
# delete movie data where genre is not listed
df_movies = df_movies[df_movies['genres'] != "(no genres listed)"]
df_movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [None]:
# function to count cosine similarity of a specific column towards movie_id

def count_cosine_similarity(column, movies_id):
  # weight keyword tf-idf of column in dataset movies
  tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 1), min_df=0, token_pattern=r"(?u)\w[\w-]*\w")
  tfidf_matrix = tf.fit_transform(column)

  # calculate cosine similarities to find the column values similarity
  cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

  # get dataframe of movie id to column cosine similarities
  recommender_df = pd.DataFrame(cosine_similarities, columns=column, index=movies_id)

  # delete duplicate column (duplicated genres) in dataframe
  recommender_df = recommender_df.loc[:,~recommender_df.columns.duplicated()]
  return recommender_df

recommender_df = count_cosine_similarity(df_movies.genres, df_movies.movieId)
recommender_df

genres,Adventure|Animation|Children|Comedy|Fantasy,Adventure|Children|Fantasy,Comedy|Romance,Comedy|Drama|Romance,Comedy,Action|Crime|Thriller,Adventure|Children,Action,Action|Adventure|Thriller,Comedy|Horror,Adventure|Animation|Children,Drama,Action|Adventure|Romance,Crime|Drama,Drama|Romance,Action|Comedy|Crime|Drama|Thriller,Comedy|Crime|Thriller,Crime|Drama|Horror|Mystery|Thriller,Drama|Sci-Fi,Children|Drama,Adventure|Drama|Fantasy|Mystery|Sci-Fi,Mystery|Sci-Fi|Thriller,Children|Comedy,Drama|War,Action|Crime|Drama,Action|Adventure|Fantasy,Comedy|Drama|Thriller,Mystery|Thriller,Animation|Children|Drama|Musical|Romance,Crime|Mystery|Thriller,Adventure|Drama,Drama|Thriller,Comedy|Crime,Action|Sci-Fi|Thriller,Action|Comedy|Horror|Thriller,Comedy|Drama,Documentary,Action|Crime|Drama|Thriller,Crime|Drama|Romance,Action|Adventure|Drama,...,Adventure|Romance|Sci-Fi,Horror|Sci-Fi|Western,Action|Adventure|Children|Comedy|Sci-Fi,Action|Adventure|Animation|Comedy|Sci-Fi,Horror|Romance|Sci-Fi,Action|Adventure|Children|Mystery|Sci-Fi,Comedy|Crime|Sci-Fi,Action|Comedy|Fantasy|Sci-Fi,Adventure|Animation|Children|Comedy|Drama|Fantasy,Adventure|Children|Comedy|Sci-Fi,Action|Animation|Crime|Sci-Fi,Action|Adventure|Romance|Sci-Fi,Action|Crime|Drama|Sci-Fi,Animation|Children|Comedy|Drama|Romance,Action|Fantasy|Mystery,Comedy|Horror|Mystery,Comedy|Crime|Fantasy,Animation|Horror|Mystery,Comedy|Romance|Western,Adventure|Drama|Fantasy|Sci-Fi,Adventure|Children|Drama|Sci-Fi,Adventure|Children|Comedy|Drama|Fantasy|Sci-Fi,Animation|Fantasy|Horror|Mystery,Adventure|Animation|Fantasy|Horror|Sci-Fi,Action|Animation|Crime|Drama,Action|Adventure|Animation|Drama|Fantasy|Sci-Fi,Action|Animation|Mystery,Animation|Drama|Sci-Fi,Animation|Drama|Fantasy|Romance,Action|Adventure|Comedy|Fantasy|Sci-Fi|Thriller,Action|Adventure|Fantasy|Horror|Thriller,Comedy|Sci-Fi|War,Comedy|Mystery|Romance|Thriller,Fantasy|Horror|Sci-Fi|Western,Animation|Crime|Drama,Adventure|Mystery|Sci-Fi|Thriller,Action|Comedy|Crime|Horror,Action|Adventure|Children|Sci-Fi,Action|Adventure|Comedy|Fantasy|Sci-Fi,Action|Animation|Comedy|Fantasy
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,1.000000,0.813604,0.152600,0.135000,0.267388,0.000000,0.654710,0.000000,0.262413,0.136174,0.833785,0.000000,0.257325,0.000000,0.000000,0.094145,0.115720,0.000000,0.000000,0.453566,0.418129,0.0,0.571328,0.000000,0.000000,0.553347,0.139422,0.0,0.499667,0.0,0.358605,0.000000,0.142647,0.000000,0.097274,0.196445,0.0,0.000000,0.000000,0.286092,...,0.239614,0.0,0.546316,0.555184,0.000000,0.419854,0.105978,0.380183,0.970845,0.596059,0.300996,0.213889,0.000000,0.662153,0.290937,0.096285,0.437887,0.308524,0.085418,0.496561,0.514715,0.734062,0.505002,0.647657,0.332858,0.650416,0.324124,0.365645,0.593709,0.491736,0.434947,0.091356,0.090383,0.220210,0.374265,0.196220,0.093125,0.489566,0.529580,0.680280
2,0.813604,1.000000,0.000000,0.000000,0.000000,0.000000,0.804704,0.000000,0.322532,0.000000,0.631874,0.000000,0.316278,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.557477,0.513923,0.0,0.548409,0.000000,0.000000,0.680119,0.000000,0.0,0.300220,0.0,0.440762,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.351635,...,0.294509,0.0,0.575488,0.231637,0.000000,0.516042,0.000000,0.357674,0.789883,0.627887,0.000000,0.262891,0.000000,0.349878,0.357591,0.000000,0.411962,0.000000,0.000000,0.610322,0.632637,0.814286,0.289707,0.481025,0.000000,0.483075,0.000000,0.000000,0.340596,0.514091,0.534593,0.000000,0.000000,0.270660,0.000000,0.241174,0.000000,0.601726,0.553656,0.341424
3,0.152600,0.000000,1.000000,0.884671,0.570705,0.000000,0.000000,0.000000,0.000000,0.290645,0.000000,0.000000,0.467949,0.000000,0.691079,0.200940,0.246990,0.000000,0.000000,0.000000,0.000000,0.0,0.267096,0.000000,0.000000,0.000000,0.297578,0.0,0.302712,0.0,0.000000,0.000000,0.304461,0.000000,0.207619,0.419287,0.0,0.000000,0.506727,0.000000,...,0.435742,0.0,0.166688,0.165543,0.423647,0.000000,0.226197,0.190339,0.148151,0.181865,0.000000,0.388961,0.000000,0.523186,0.000000,0.205509,0.219229,0.000000,0.559750,0.000000,0.000000,0.152728,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.375237,0.156812,0.000000,0.194988,0.592292,0.000000,0.000000,0.000000,0.198764,0.000000,0.168880,0.181692
4,0.135000,0.000000,0.884671,1.000000,0.504886,0.000000,0.000000,0.000000,0.000000,0.257126,0.000000,0.466216,0.413981,0.234640,0.863186,0.329343,0.218505,0.124779,0.223666,0.204814,0.118251,0.0,0.236292,0.182459,0.188046,0.000000,0.487734,0.0,0.378099,0.0,0.237617,0.263068,0.269348,0.000000,0.183675,0.687217,0.0,0.161948,0.632923,0.189568,...,0.385488,0.0,0.147464,0.146451,0.374788,0.000000,0.200110,0.168387,0.242821,0.160891,0.000000,0.344102,0.151327,0.591391,0.000000,0.181808,0.193945,0.000000,0.495194,0.140432,0.138227,0.250323,0.000000,0.000000,0.143747,0.111153,0.000000,0.157906,0.468687,0.138727,0.000000,0.172500,0.523983,0.000000,0.161629,0.000000,0.175841,0.000000,0.149403,0.160737
5,0.267388,0.000000,0.570705,0.504886,1.000000,0.000000,0.000000,0.000000,0.000000,0.509274,0.000000,0.000000,0.000000,0.000000,0.000000,0.352090,0.432780,0.000000,0.000000,0.000000,0.000000,0.0,0.468011,0.000000,0.000000,0.000000,0.521421,0.0,0.000000,0.0,0.000000,0.000000,0.533483,0.000000,0.363794,0.734682,0.0,0.000000,0.000000,0.000000,...,0.000000,0.0,0.292074,0.290068,0.000000,0.000000,0.396346,0.333515,0.259592,0.318668,0.000000,0.000000,0.000000,0.298585,0.000000,0.360096,0.384137,0.000000,0.319452,0.000000,0.000000,0.267612,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.274768,0.000000,0.341662,0.338024,0.000000,0.000000,0.000000,0.348278,0.000000,0.295915,0.318363
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0.680280,0.341424,0.181692,0.160737,0.318363,0.239449,0.000000,0.435923,0.241080,0.162134,0.380637,0.000000,0.236405,0.000000,0.000000,0.322253,0.137781,0.000000,0.000000,0.000000,0.285358,0.0,0.148998,0.000000,0.260724,0.594614,0.166001,0.0,0.304099,0.0,0.000000,0.000000,0.169841,0.233183,0.332965,0.233896,0.0,0.224538,0.000000,0.262833,...,0.000000,0.0,0.267323,0.609776,0.000000,0.156329,0.126182,0.651735,0.660446,0.101452,0.538605,0.196501,0.209813,0.449457,0.545429,0.114642,0.521368,0.367342,0.101702,0.338885,0.000000,0.363215,0.601277,0.572246,0.595619,0.728796,0.579989,0.435353,0.706896,0.536935,0.467384,0.108773,0.107614,0.262191,0.445616,0.000000,0.318764,0.182285,0.578258,1.000000
193583,0.755880,0.379367,0.201883,0.178600,0.353744,0.000000,0.000000,0.000000,0.000000,0.180152,0.422938,0.000000,0.000000,0.000000,0.000000,0.124550,0.153093,0.000000,0.000000,0.000000,0.317071,0.0,0.165556,0.000000,0.000000,0.419607,0.184449,0.0,0.337894,0.0,0.000000,0.000000,0.188716,0.000000,0.128690,0.259889,0.0,0.000000,0.000000,0.000000,...,0.000000,0.0,0.103319,0.485160,0.000000,0.000000,0.140205,0.502967,0.733842,0.112727,0.398206,0.000000,0.000000,0.499406,0.384899,0.127382,0.579308,0.408165,0.113004,0.376545,0.000000,0.403579,0.668098,0.635840,0.440359,0.638548,0.428803,0.483734,0.785454,0.414372,0.329823,0.120861,0.119574,0.291329,0.495138,0.000000,0.123201,0.000000,0.446262,0.899984
193585,0.000000,0.000000,0.000000,0.466216,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.503286,0.540111,0.325123,0.000000,0.267641,0.479747,0.439311,0.253641,0.0,0.000000,0.391360,0.403346,0.000000,0.481485,0.0,0.236584,0.0,0.509670,0.564262,0.000000,0.000000,0.000000,0.678412,0.0,0.347366,0.396032,0.406610,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.239709,0.000000,0.000000,0.000000,0.324586,0.275716,0.000000,0.000000,0.000000,0.000000,0.000000,0.301217,0.296486,0.247115,0.000000,0.000000,0.308327,0.238416,0.000000,0.338697,0.293266,0.000000,0.000000,0.000000,0.000000,0.000000,0.346682,0.000000,0.000000,0.000000,0.000000,0.000000
193587,0.421142,0.000000,0.000000,0.000000,0.000000,0.317743,0.000000,0.578459,0.319907,0.000000,0.505096,0.000000,0.313703,0.000000,0.000000,0.278877,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.345974,0.287919,0.000000,0.0,0.403532,0.0,0.000000,0.000000,0.000000,0.309428,0.288148,0.000000,0.0,0.297957,0.000000,0.348773,...,0.000000,0.0,0.231341,0.686615,0.000000,0.207444,0.000000,0.264165,0.408863,0.000000,0.714715,0.260751,0.278417,0.470278,0.264104,0.000000,0.000000,0.487454,0.000000,0.000000,0.000000,0.000000,0.425475,0.404931,0.790372,0.611160,0.769631,0.577703,0.500212,0.217634,0.226313,0.000000,0.000000,0.000000,0.591322,0.000000,0.275858,0.241888,0.234383,0.753593


In [None]:
# get one-hot-encoded table of movie genres
df_movie_genre = pd.get_dummies(df_movies['genres'])

# get total of users id in dataframe
n_users = len(df_user_items.index)

# get user-genre similarities dataframe
df_user_genre = pd.DataFrame(columns = recommender_df.columns)
for i in range(1,n_users+1):
    df_user_genre.loc[i] = df_movie_genre.mul(df_user_items.loc[i], axis=0).mean() # multiply one-hot-encoded movie column with user-items dataframe from recommender 1
df_user_genre

genres,Adventure|Animation|Children|Comedy|Fantasy,Adventure|Children|Fantasy,Comedy|Romance,Comedy|Drama|Romance,Comedy,Action|Crime|Thriller,Adventure|Children,Action,Action|Adventure|Thriller,Comedy|Horror,Adventure|Animation|Children,Drama,Action|Adventure|Romance,Crime|Drama,Drama|Romance,Action|Comedy|Crime|Drama|Thriller,Comedy|Crime|Thriller,Crime|Drama|Horror|Mystery|Thriller,Drama|Sci-Fi,Children|Drama,Adventure|Drama|Fantasy|Mystery|Sci-Fi,Mystery|Sci-Fi|Thriller,Children|Comedy,Drama|War,Action|Crime|Drama,Action|Adventure|Fantasy,Comedy|Drama|Thriller,Mystery|Thriller,Animation|Children|Drama|Musical|Romance,Crime|Mystery|Thriller,Adventure|Drama,Drama|Thriller,Comedy|Crime,Action|Sci-Fi|Thriller,Action|Comedy|Horror|Thriller,Comedy|Drama,Documentary,Action|Crime|Drama|Thriller,Crime|Drama|Romance,Action|Adventure|Drama,...,Adventure|Romance|Sci-Fi,Horror|Sci-Fi|Western,Action|Adventure|Children|Comedy|Sci-Fi,Action|Adventure|Animation|Comedy|Sci-Fi,Horror|Romance|Sci-Fi,Action|Adventure|Children|Mystery|Sci-Fi,Comedy|Crime|Sci-Fi,Action|Comedy|Fantasy|Sci-Fi,Adventure|Animation|Children|Comedy|Drama|Fantasy,Adventure|Children|Comedy|Sci-Fi,Action|Animation|Crime|Sci-Fi,Action|Adventure|Romance|Sci-Fi,Action|Crime|Drama|Sci-Fi,Animation|Children|Comedy|Drama|Romance,Action|Fantasy|Mystery,Comedy|Horror|Mystery,Comedy|Crime|Fantasy,Animation|Horror|Mystery,Comedy|Romance|Western,Adventure|Drama|Fantasy|Sci-Fi,Adventure|Children|Drama|Sci-Fi,Adventure|Children|Comedy|Drama|Fantasy|Sci-Fi,Animation|Fantasy|Horror|Mystery,Adventure|Animation|Fantasy|Horror|Sci-Fi,Action|Animation|Crime|Drama,Action|Adventure|Animation|Drama|Fantasy|Sci-Fi,Action|Animation|Mystery,Animation|Drama|Sci-Fi,Animation|Drama|Fantasy|Romance,Action|Adventure|Comedy|Fantasy|Sci-Fi|Thriller,Action|Adventure|Fantasy|Horror|Thriller,Comedy|Sci-Fi|War,Comedy|Mystery|Romance|Thriller,Fantasy|Horror|Sci-Fi|Western,Animation|Crime|Drama,Adventure|Mystery|Sci-Fi|Thriller,Action|Comedy|Crime|Horror,Action|Adventure|Children|Sci-Fi,Action|Adventure|Comedy|Fantasy|Sci-Fi,Action|Animation|Comedy|Fantasy
1,0.002034,0.004694,0.089590,0.071089,0.226125,0.014416,0.007837,0.015100,0.010423,0.014395,0.004472,0.285284,0.002539,0.035824,0.100540,0.001843,0.002617,0.000428,0.008005,0.004853,0.000533,0.002136,0.021506,0.031581,0.011604,0.008605,0.001989,0.008325,0.000967,0.003783,0.011828,0.046612,0.021611,0.011471,0.001363,0.106071,0.063544,0.015420,0.004685,0.005630,...,0.0,0.000642,0.0,0.0,0.0,0.000366,0.0,0.0,0.0,0.000410,0.000537,0.0,0.000367,0.000532,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.001380,0.003362,0.063143,0.049247,0.158062,0.010152,0.005397,0.010670,0.007390,0.010150,0.002943,0.199783,0.001768,0.025119,0.070818,0.001291,0.001883,0.000303,0.005687,0.003308,0.000373,0.001537,0.014862,0.022355,0.008196,0.005914,0.001504,0.005876,0.000644,0.002625,0.008189,0.032781,0.015045,0.008221,0.000928,0.074467,0.044801,0.010616,0.003329,0.004058,...,0.0,0.000399,0.0,0.0,0.0,0.000222,0.0,0.0,0.0,0.000339,0.000338,0.0,0.000278,0.000362,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.000995,0.002367,0.045359,0.035701,0.113950,0.007298,0.003832,0.007890,0.005307,0.007514,0.002096,0.143610,0.001217,0.018107,0.051152,0.000988,0.001384,0.000198,0.004014,0.002366,0.000298,0.001085,0.010644,0.016050,0.005878,0.004207,0.001098,0.004244,0.000471,0.001864,0.005875,0.023619,0.010892,0.005936,0.000700,0.054157,0.032420,0.007646,0.002286,0.002962,...,0.0,0.000279,0.0,0.0,0.0,0.000191,0.0,0.0,0.0,0.000223,0.000246,0.0,0.000226,0.000287,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.001648,0.003783,0.072583,0.057703,0.183676,0.011639,0.006245,0.012439,0.008484,0.011856,0.003504,0.231791,0.002004,0.029309,0.081690,0.001525,0.002180,0.000337,0.006419,0.003874,0.000454,0.001739,0.017237,0.025662,0.009431,0.006827,0.001642,0.006738,0.000793,0.003076,0.009544,0.037889,0.017644,0.009456,0.001109,0.086598,0.051848,0.012347,0.003736,0.004594,...,0.0,0.000491,0.0,0.0,0.0,0.000303,0.0,0.0,0.0,0.000351,0.000425,0.0,0.000331,0.000430,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.001174,0.002972,0.054125,0.041585,0.134822,0.008676,0.004676,0.008769,0.006353,0.008323,0.002511,0.171277,0.001603,0.021509,0.060226,0.000998,0.001575,0.000295,0.004993,0.002832,0.000267,0.001360,0.012729,0.019209,0.007057,0.005096,0.001266,0.004988,0.000537,0.002298,0.007017,0.028000,0.012773,0.007049,0.000727,0.062734,0.038060,0.009002,0.003044,0.003398,...,0.0,0.000345,0.0,0.0,0.0,0.000127,0.0,0.0,0.0,0.000340,0.000280,0.0,0.000199,0.000255,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.002546,0.005905,0.112995,0.089555,0.285008,0.018177,0.009788,0.019242,0.013168,0.018340,0.005528,0.359432,0.003149,0.045208,0.127016,0.002368,0.003346,0.000525,0.010056,0.006061,0.000696,0.002695,0.026958,0.039874,0.014639,0.010744,0.002567,0.010519,0.001211,0.004740,0.014847,0.058826,0.027257,0.014565,0.001731,0.134216,0.080364,0.019341,0.005838,0.007175,...,0.0,0.000779,0.0,0.0,0.0,0.000471,0.0,0.0,0.0,0.000525,0.000661,0.0,0.000494,0.000685,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,0.001589,0.003963,0.074304,0.057568,0.185183,0.011964,0.006274,0.012667,0.008707,0.012041,0.003359,0.233941,0.002047,0.029380,0.083427,0.001549,0.002240,0.000345,0.006696,0.003832,0.000452,0.001811,0.017343,0.026330,0.009635,0.006888,0.001842,0.006943,0.000733,0.003036,0.009545,0.038524,0.017577,0.009717,0.001095,0.087662,0.052753,0.012402,0.003877,0.004862,...,0.0,0.000440,0.0,0.0,0.0,0.000260,0.0,0.0,0.0,0.000407,0.000377,0.0,0.000346,0.000441,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,0.001990,0.005012,0.093435,0.072165,0.231992,0.015109,0.008029,0.015654,0.010890,0.014882,0.004382,0.292935,0.002645,0.036478,0.104584,0.001909,0.002733,0.000446,0.008516,0.004892,0.000540,0.002263,0.022059,0.032999,0.012067,0.008864,0.002286,0.008747,0.000902,0.003790,0.012041,0.048247,0.021876,0.011984,0.001373,0.109273,0.065801,0.015798,0.004952,0.006086,...,0.0,0.000591,0.0,0.0,0.0,0.000319,0.0,0.0,0.0,0.000484,0.000484,0.0,0.000390,0.000561,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,0.001352,0.003346,0.063058,0.049006,0.157143,0.010181,0.005370,0.010747,0.007362,0.010223,0.002923,0.198234,0.001741,0.024797,0.070812,0.001329,0.001879,0.000288,0.005682,0.003287,0.000387,0.001519,0.014829,0.022290,0.008152,0.005919,0.001550,0.005908,0.000623,0.002560,0.008126,0.032659,0.014888,0.008157,0.000947,0.074367,0.044674,0.010637,0.003268,0.004129,...,0.0,0.000389,0.0,0.0,0.0,0.000236,0.0,0.0,0.0,0.000320,0.000329,0.0,0.000287,0.000390,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# initialize user-movie similarity dataframe
df_user_movie_sim_1 = pd.DataFrame(columns = df_user_items.columns)

# get user-movie similarity 
for i in range(1,n_users+1):
  df_user_movie_sim_1.loc[i] = recommender_df.mul(df_user_genre.loc[i]).sum(axis=1)
df_user_movie_sim_1

movieId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,36,38,39,40,41,42,43,...,185135,185435,185473,185585,186587,187031,187541,187593,187595,187717,188189,188301,188675,188751,188797,188833,189043,189111,189333,189381,189547,189713,190183,190207,190209,190213,190215,190219,190221,191005,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
1,0.366588,0.220308,0.567280,0.815695,0.564695,0.411073,0.567280,0.193593,0.250303,0.400723,0.815695,0.441255,0.195613,0.673164,0.414427,0.506066,0.614686,0.564695,0.564695,0.778477,0.539538,0.523156,0.411073,0.453649,0.614686,0.673164,0.390724,0.614686,0.423965,0.506066,0.673164,0.282855,0.390724,0.506066,0.357734,0.567280,0.673164,0.339926,0.555279,0.673164,...,0.193575,0.535181,0.564695,0.279702,0.324700,0.594799,0.279742,0.484305,0.326255,0.081292,0.379611,0.482243,0.506066,0.567280,0.564695,0.400206,0.871553,0.081292,0.400723,0.411073,0.273347,0.714596,0.289746,0.614686,0.564695,0.673164,0.673164,0.070410,0.081292,0.501867,0.421131,0.353948,0.871553,0.070410,0.081292,0.394677,0.317299,0.673164,0.202224,0.564695
2,0.255980,0.153657,0.397660,0.571913,0.395029,0.288511,0.397660,0.135261,0.175215,0.280963,0.571913,0.308901,0.136600,0.472131,0.290488,0.354977,0.431504,0.395029,0.395029,0.545808,0.378383,0.366953,0.288511,0.317863,0.431504,0.472131,0.273741,0.431504,0.296439,0.354977,0.472131,0.198229,0.273741,0.354977,0.250128,0.397660,0.472131,0.238408,0.389283,0.472131,...,0.135815,0.374478,0.395029,0.195322,0.227097,0.416909,0.195519,0.338802,0.228113,0.057476,0.265133,0.337022,0.354977,0.397660,0.395029,0.279524,0.610520,0.057476,0.280963,0.288511,0.191229,0.500735,0.203351,0.431504,0.395029,0.472131,0.472131,0.049078,0.057476,0.351036,0.294496,0.247970,0.610520,0.049078,0.057476,0.275722,0.221494,0.472131,0.141388,0.395029
3,0.184786,0.110853,0.287023,0.412812,0.285293,0.207815,0.287023,0.097504,0.126469,0.202459,0.412812,0.223402,0.098525,0.340811,0.209500,0.255964,0.311372,0.285293,0.285293,0.393650,0.272721,0.264664,0.207815,0.228959,0.311372,0.340811,0.197575,0.311372,0.213590,0.255964,0.340811,0.142445,0.197575,0.255964,0.180595,0.287023,0.340811,0.172179,0.280777,0.340811,...,0.097717,0.270392,0.285293,0.140701,0.163469,0.300365,0.141059,0.244222,0.164270,0.041572,0.191134,0.242996,0.255964,0.287023,0.285293,0.201775,0.440809,0.041572,0.202459,0.207815,0.137597,0.361281,0.146079,0.311372,0.285293,0.340811,0.340811,0.035468,0.041572,0.253013,0.212341,0.179036,0.440809,0.035468,0.041572,0.199138,0.160011,0.340811,0.102089,0.285293
4,0.297211,0.178246,0.460973,0.663075,0.458873,0.333706,0.460973,0.156537,0.202994,0.324888,0.663075,0.358993,0.158289,0.547525,0.336108,0.411430,0.499773,0.458873,0.458873,0.632466,0.438309,0.425197,0.333706,0.368485,0.499773,0.547525,0.317349,0.499773,0.343795,0.411430,0.547525,0.229268,0.317349,0.411430,0.290323,0.460973,0.547525,0.276423,0.451140,0.547525,...,0.157233,0.434564,0.458873,0.226658,0.262947,0.482658,0.226547,0.392998,0.264133,0.066463,0.308014,0.391019,0.411430,0.460973,0.458873,0.324602,0.708573,0.066463,0.324888,0.333706,0.221518,0.580858,0.235016,0.499773,0.458873,0.547525,0.547525,0.057124,0.066463,0.406970,0.341727,0.287758,0.708573,0.057124,0.066463,0.320337,0.257613,0.547525,0.164021,0.458873
5,0.217635,0.130637,0.338586,0.487009,0.335994,0.246392,0.338586,0.115123,0.149030,0.239625,0.487009,0.262272,0.116193,0.402114,0.247431,0.302826,0.367674,0.335994,0.335994,0.465290,0.322911,0.312867,0.246392,0.271526,0.367674,0.402114,0.233027,0.367674,0.252877,0.302826,0.402114,0.169840,0.233027,0.302826,0.212706,0.338586,0.402114,0.202865,0.331827,0.402114,...,0.116243,0.318514,0.335994,0.166623,0.193914,0.355834,0.166306,0.288899,0.194613,0.048920,0.226030,0.287148,0.302826,0.338586,0.335994,0.237718,0.519647,0.048920,0.239625,0.246392,0.163454,0.426692,0.174386,0.367674,0.335994,0.402114,0.402114,0.041658,0.048920,0.299283,0.250999,0.211069,0.519647,0.041658,0.048920,0.234350,0.188208,0.402114,0.120189,0.335994
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.462046,0.277509,0.715733,1.029245,0.712254,0.518448,0.715733,0.243888,0.315657,0.505285,1.029245,0.556925,0.246457,0.849515,0.522671,0.638438,0.775775,0.712254,0.712254,0.982007,0.680509,0.660024,0.518448,0.571954,0.775775,0.849515,0.492887,0.775775,0.534246,0.638438,0.849515,0.356332,0.492887,0.638438,0.451082,0.715733,0.849515,0.429028,0.700453,0.849515,...,0.244051,0.674986,0.712254,0.352314,0.409031,0.749960,0.352572,0.610464,0.411002,0.102879,0.478329,0.607730,0.638438,0.715733,0.712254,0.504458,1.099601,0.102879,0.505285,0.518448,0.344341,0.901442,0.365120,0.775775,0.712254,0.849515,0.849515,0.088742,0.102879,0.632526,0.530827,0.446571,1.099601,0.088742,0.102879,0.497617,0.400024,0.849515,0.254982,0.712254
607,0.300194,0.180191,0.466899,0.671473,0.463370,0.338798,0.466899,0.158768,0.205747,0.330003,0.671473,0.362556,0.160277,0.554294,0.341229,0.416636,0.506871,0.463370,0.463370,0.640719,0.444117,0.430802,0.338798,0.372784,0.506871,0.554294,0.321365,0.506871,0.347550,0.416636,0.554294,0.232455,0.321365,0.416636,0.293453,0.466899,0.554294,0.279995,0.456959,0.554294,...,0.159316,0.439440,0.463370,0.228869,0.266336,0.489334,0.229474,0.397298,0.267571,0.067710,0.310630,0.395168,0.416636,0.466899,0.463370,0.327782,0.716469,0.067710,0.330003,0.338798,0.224183,0.587581,0.238550,0.506871,0.463370,0.554294,0.554294,0.057506,0.067710,0.411720,0.345314,0.291023,0.716469,0.057506,0.067710,0.323350,0.259626,0.554294,0.165925,0.463370
608,0.376999,0.226847,0.584880,0.840784,0.580371,0.424902,0.584880,0.200046,0.258352,0.414519,0.840784,0.453518,0.201758,0.693579,0.428473,0.521569,0.634582,0.580371,0.580371,0.802770,0.556471,0.539522,0.424902,0.467094,0.634582,0.693579,0.402794,0.634582,0.436330,0.521569,0.693579,0.291976,0.402794,0.521569,0.368121,0.584880,0.693579,0.350469,0.572518,0.693579,...,0.199626,0.550927,0.580371,0.287537,0.334915,0.613943,0.288605,0.498397,0.336592,0.084262,0.389659,0.496182,0.521569,0.584880,0.580371,0.411437,0.896920,0.084262,0.414519,0.424902,0.281648,0.735710,0.299402,0.634582,0.580371,0.693579,0.693579,0.072152,0.084262,0.516924,0.433192,0.364328,0.896920,0.072152,0.084262,0.405552,0.325484,0.693579,0.208301,0.580371
609,0.255234,0.153402,0.396235,0.569704,0.393367,0.287476,0.396235,0.135168,0.174868,0.280327,0.569704,0.307683,0.136414,0.470097,0.289897,0.353307,0.429917,0.393367,0.393367,0.543652,0.376752,0.365434,0.287476,0.316202,0.429917,0.470097,0.272825,0.429917,0.295216,0.353307,0.470097,0.197282,0.272825,0.353307,0.249328,0.396235,0.470097,0.237547,0.387737,0.470097,...,0.135067,0.373217,0.393367,0.194494,0.226359,0.415388,0.195210,0.337358,0.227496,0.057240,0.263810,0.335784,0.353307,0.396235,0.393367,0.278600,0.607919,0.057240,0.280327,0.287476,0.190404,0.498473,0.202316,0.429917,0.393367,0.470097,0.470097,0.048896,0.057240,0.349763,0.293258,0.246929,0.607919,0.048896,0.057240,0.274755,0.220588,0.470097,0.141039,0.393367


In [None]:
# get user recommendation 
recommendation(df_user_movie_sim_1, dataset_rating, dataset_movies)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
16 . New York Stories (1989)
17 . How to Kill Your Neighbor's Dog (2000)
18 . Avanti! (1972)
19 . The Longest Week (2014)
20 . Masked & Anonymous (2003)


User id =  403
Recommended movies: 
1 . Strike! (a.k.a. All I Wanna Do, The Hairy Bird) (1998)
2 . Parenthood (1989)
3 . Inkwell, The (1994)
4 . Mystery Train (1989)
5 . Lucky You (2007)
6 . I'll Do Anything (1994)
7 . North Dallas Forty (1979)
8 . Network (1976)
9 . Mr. Saturday Night (1992)
10 . Ro.Go.Pa.G. (1963)
11 . Full Monty, The (1997)
12 . Reefer Madness (a.k.a. Tell Your Children) (1938)
13 . Indian Summer (a.k.a. Alive & Kicking) (1996)
14 . Empire Records (1995)
15 . House of Yes, The (1997)
16 . Funny Bones (1995)
17 . Goon (2011)
18 . With Honors (1994)
19 . Jeff, Who Lives at Home (2012)
20 . Kitchen Stories (Salmer fra kjøkkenet) (2003)


User id =  404
Recommended movies: 
1 . Squid and the Whale, The (2005)
2 . Calendar Girl (1993)
3 . Take Me Home Ton

# **Content-Based Filtering (Clustering over tags)**

In [None]:
# get ratings dataset
df_tags = pd.read_csv(drive+'tags.csv', sep=',')
df_tags.head(10)

Unnamed: 0,userId,movieId,tag,timestamp
0,2,60756,funny,1445714994
1,2,60756,Highly quotable,1445714996
2,2,60756,will ferrell,1445714992
3,2,89774,Boxing story,1445715207
4,2,89774,MMA,1445715200
5,2,89774,Tom Hardy,1445715205
6,2,106782,drugs,1445715054
7,2,106782,Leonardo DiCaprio,1445715051
8,2,106782,Martin Scorsese,1445715056
9,7,48516,way too long,1169687325


In [None]:
# lowercase folding tag column values
df_tags['tag'] = df_tags['tag'].str.lower()

In [None]:
# get unique values of tag column
x = df_tags['tag'].str.split("|").to_list()
flatten_list = list(chain.from_iterable(x)) 
set(flatten_list)

{'french',
 'scotland',
 'simon pegg',
 'adultery',
 'mafia',
 'beautiful scenery',
 'investor corruption',
 'metaphorical',
 'multiple stories',
 'sisterhood',
 'live action/animation',
 'poor dialogue',
 'philosophical issues',
 'space travel',
 'stand-up comedy',
 'non-linear timeline',
 'trippy',
 'invisibility',
 'ocean',
 'new composer',
 'biopic',
 'witty',
 'tilda swinton',
 'nightclub',
 'depressing',
 'thrilling',
 'prison',
 'lord of the rings',
 'american indians',
 'great humor',
 'whimsical',
 'financial crisis',
 'engrossing adventure',
 'meaningless violence',
 'awkward',
 'ancient rome',
 'trey parker',
 'statue',
 'teachers',
 'philosophy',
 'space epic',
 'may-december romance',
 'cult',
 'psychopaths',
 'british gangster',
 'tolkein',
 'moon',
 'backwards. memory',
 'leonardo dicaprio',
 'race',
 'deep throat',
 'hannibal lecter',
 'long takes',
 'amazing dialogues',
 'monologue',
 'dreamlike',
 'boxing story',
 'ireland',
 'eccentric',
 'nightmare',
 'quentin taran

In [None]:
# sort tag values by movie id
df_tags = df_tags.sort_values(by=['movieId'])
df_tags

Unnamed: 0,userId,movieId,tag,timestamp
2886,567,1,fun,1525286013
981,474,1,pixar,1137206825
629,336,1,pixar,1139045764
35,62,2,robin williams,1528843907
34,62,2,magic board game,1528843932
...,...,...,...,...
402,62,187595,star wars,1528934552
528,184,193565,comedy,1537098587
527,184,193565,anime,1537098582
530,184,193565,remaster,1537098592


In [None]:
# get movie dataset
df_movies = dataset_movies

In [None]:
# add tag column in movie dataset and set it with empty values
df_movies["tag"] = np.nan
df_movies

Unnamed: 0,movieId,title,genres,tag
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,
1,2,Jumanji (1995),Adventure|Children|Fantasy,
2,3,Grumpier Old Men (1995),Comedy|Romance,
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,
4,5,Father of the Bride Part II (1995),Comedy,
...,...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy,
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy,
9739,193585,Flint (2017),Drama,
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation,


In [None]:
# set value of tag in movie dataset
for index,row in df_tags.iterrows():
  idx = df_movies.index[df_movies["movieId"] == row[1]].tolist()[0]
  if pd.isnull(df_movies["tag"].iloc[idx]):
    df_movies["tag"].iloc[idx] = row[2]
  else:
    if row[2] not in df_movies["tag"].iloc[idx]:
      df_movies["tag"].iloc[idx] += "|"+row[2]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [None]:
# delete movie rows where tag column value is empty
df_movies = df_movies[~df_movies["tag"].isnull()].iloc[:,[0,1,3]]
df_movies

Unnamed: 0,movieId,title,tag
0,1,Toy Story (1995),fun|pixar
1,2,Jumanji (1995),robin williams|magic board game|fantasy
2,3,Grumpier Old Men (1995),old|moldy
4,5,Father of the Bride Part II (1995),remake|pregnancy
6,7,Sabrina (1995),remake
...,...,...,...
9681,183611,Game Night (2018),comedy|rachel mcadams|funny
9692,184471,Tomb Raider (2018),adventure|alicia vikander|video game adaptation
9709,187593,Deadpool 2 (2018),josh brolin|ryan reynolds|sarcasm
9710,187595,Solo: A Star Wars Story (2018),emilia clarke|star wars


In [None]:
# get dataframe of cosine similarity
recommender_df = count_cosine_similarity(df_movies.tag, df_movies.movieId)
recommender_df

tag,fun|pixar,robin williams|magic board game|fantasy,old|moldy,remake|pregnancy,remake,president|politics,politics|president,mafia,jane austen,hollywood,serial killer,alcoholism,shakespeare,jane austen|in netflix queue,kidnapping,teacher|high school,brad pitt|bruce willis|mindfuck|twist ending|time travel|post apocalyptic|remake|post-apocalyptic,animal movie|pigs|villain nonexistent or not needed for good story,death penalty|nun,twins,emma|jane austen|seen more than once|quotable|paul rudd|funny|chick flick,south africa|in netflix queue,england,journalism,wedding,serial killer|mystery|twist ending,mindfuck|thriller|suspense|twist ending|heist|tricky,adoption|prostitution,writing,music,jekyll and hyde,theater,crime|quirky|off-beat comedy,adam sandler|test tag|golf,muppets,beautiful scenery|oscar (best cinematography)|mel gibson|scotland|sword fight|revenge|medieval|inspirational|historical|epic,assassination,holocaust,dating,mark wahlberg|heroin|addiction|leonardo dicaprio,...,dystopia|jennifer lawrence|love story|revolution|rebellion|politics|nonsense,ethics|ryan reynolds|immortality|ben kingsley,poorly paced|harley quinn|superhero|visually appealing|will smith|poor story|margot robbie|harley quinn's ass|joker|jared leto|horrible directing|bad story|bad writing|batman|good soundtrack|dc comics|comic book|ben affleck|lack of plot,superman|wonderwoman|gal gadot|superhero|action|batman|ben affleck|dark|dc comics,stylish|slick,visually striking|visually appealing|cinematography|tom hardy|survival|leonardo dicarpio,atmospheric|tension,touching|moving,moving,tragic|thought-provoking|philosophical|contemplative,sustainability|dumpster diving,funny|interesting|witty,existential|surreal,suspense|creepy,procedural|murder|human rights|crime|cambodia,nightmare,thought-provoking|suspense,80's,sweet|understated|quirky,crude humor|mindfuck|sarcasm|satire|vulgar,creative|viggo mortensen|individualism|good writing|freedom|building a family,intense,beautiful visuals|visually appealing|thought-provoking|smart|good cinematography|cerebral,visually appealing|bittersweet,heartwarming|funny,action|heroic bloodshed|keanu reeves|secret society|organized crime|hitman|gun tactics|dark hero,predictible plot|heartbreaking|gritty|emotional|dark,paranoia|suspenseful,dystopia|thought provoking|jon hamm|interesting|future,inspiring|tense|brilliant,dark|atmospheric|beautiful|cinematography|philosophical|moody|existentialism|dreamlike,unsettling|uncomfortable|allegorical,dwayne johnson|comedy|funny|action,dreamlike|atmospheric,bad music,comedy|rachel mcadams|funny,adventure|alicia vikander|video game adaptation,josh brolin|ryan reynolds|sarcasm,emilia clarke|star wars,comedy|anime|remaster|gintama
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,1.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000
2,0.0,1.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.178864,0.0,0.0,0.000000
3,0.0,0.000000,1.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000
5,0.0,0.000000,0.0,1.000000,0.675448,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.177816,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000
7,0.0,0.000000,0.0,0.675448,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.263257,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183611,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.085739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.147868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.205478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.267894,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.271288,0.0,0.0,1.000000,0.000000,0.0,0.0,0.125494
184471,0.0,0.178864,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,1.000000,0.0,0.0,0.000000
187593,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.316665,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.175915,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,1.0,0.0,0.000000
187595,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,1.0,0.000000


In [None]:
# get total users from user_movie_matrix from recommender 2
n_users = len(df_user_movie_sim_1.index)

In [None]:
# get one-hot-encoded movie tag
df_movie_tag = pd.get_dummies(df_movies['tag'])

# get user similarity values to all tag
df_user_tag = pd.DataFrame(columns = recommender_df.columns)
for i in range(1,n_users+1):
    df_user_tag.loc[i] = df_movie_tag.mul(df_user_movie_sim_1.loc[i], axis=0).mean() # multiply one-hot-encoded movie tag with user_movie_matrix from recommender 2
df_user_tag

tag,fun|pixar,robin williams|magic board game|fantasy,old|moldy,remake|pregnancy,remake,president|politics,politics|president,mafia,jane austen,hollywood,serial killer,alcoholism,shakespeare,jane austen|in netflix queue,kidnapping,teacher|high school,brad pitt|bruce willis|mindfuck|twist ending|time travel|post apocalyptic|remake|post-apocalyptic,animal movie|pigs|villain nonexistent or not needed for good story,death penalty|nun,twins,emma|jane austen|seen more than once|quotable|paul rudd|funny|chick flick,south africa|in netflix queue,england,journalism,wedding,serial killer|mystery|twist ending,mindfuck|thriller|suspense|twist ending|heist|tricky,adoption|prostitution,writing,music,jekyll and hyde,theater,crime|quirky|off-beat comedy,adam sandler|test tag|golf,muppets,beautiful scenery|oscar (best cinematography)|mel gibson|scotland|sword fight|revenge|medieval|inspirational|historical|epic,assassination,holocaust,dating,mark wahlberg|heroin|addiction|leonardo dicaprio,...,dystopia|jennifer lawrence|love story|revolution|rebellion|politics|nonsense,ethics|ryan reynolds|immortality|ben kingsley,poorly paced|harley quinn|superhero|visually appealing|will smith|poor story|margot robbie|harley quinn's ass|joker|jared leto|horrible directing|bad story|bad writing|batman|good soundtrack|dc comics|comic book|ben affleck|lack of plot,superman|wonderwoman|gal gadot|superhero|action|batman|ben affleck|dark|dc comics,stylish|slick,visually striking|visually appealing|cinematography|tom hardy|survival|leonardo dicarpio,atmospheric|tension,touching|moving,moving,tragic|thought-provoking|philosophical|contemplative,sustainability|dumpster diving,funny|interesting|witty,existential|surreal,suspense|creepy,procedural|murder|human rights|crime|cambodia,nightmare,thought-provoking|suspense,80's,sweet|understated|quirky,crude humor|mindfuck|sarcasm|satire|vulgar,creative|viggo mortensen|individualism|good writing|freedom|building a family,intense,beautiful visuals|visually appealing|thought-provoking|smart|good cinematography|cerebral,visually appealing|bittersweet,heartwarming|funny,action|heroic bloodshed|keanu reeves|secret society|organized crime|hitman|gun tactics|dark hero,predictible plot|heartbreaking|gritty|emotional|dark,paranoia|suspenseful,dystopia|thought provoking|jon hamm|interesting|future,inspiring|tense|brilliant,dark|atmospheric|beautiful|cinematography|philosophical|moody|existentialism|dreamlike,unsettling|uncomfortable|allegorical,dwayne johnson|comedy|funny|action,dreamlike|atmospheric,bad music,comedy|rachel mcadams|funny,adventure|alicia vikander|video game adaptation,josh brolin|ryan reynolds|sarcasm,emilia clarke|star wars,comedy|anime|remaster|gintama
1,0.0,0.000363,0.000218,0.000808,0.000867,0.000545,0.000194,0.003338,0.001169,0.001339,0.001279,0.001635,0.003224,0.000387,0.000609,0.000502,0.000667,0.000280,0.0,0.002358,0.0,0.000502,0.001777,0.002699,0.002106,0.000667,0.000609,0.000239,0.001961,0.000667,0.000577,0.001473,0.000370,0.000547,0.000397,0.000502,0.000859,0.001733,0.001414,0.0,...,0.0,0.000530,0.000667,0.0,0.000356,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.000254,0.000152,0.000567,0.000607,0.000382,0.000135,0.002340,0.000820,0.000939,0.000897,0.001146,0.002260,0.000271,0.000428,0.000352,0.000468,0.000196,0.0,0.001652,0.0,0.000352,0.001247,0.001892,0.001478,0.000468,0.000428,0.000168,0.001375,0.000468,0.000405,0.001033,0.000260,0.000384,0.000278,0.000352,0.000602,0.001216,0.000991,0.0,...,0.0,0.000371,0.000468,0.0,0.000248,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.000183,0.000110,0.000409,0.000438,0.000275,0.000098,0.001690,0.000591,0.000677,0.000647,0.000826,0.001630,0.000196,0.000309,0.000254,0.000338,0.000141,0.0,0.001192,0.0,0.000254,0.000899,0.001364,0.001066,0.000338,0.000309,0.000121,0.000993,0.000338,0.000292,0.000745,0.000187,0.000277,0.000201,0.000254,0.000434,0.000876,0.000715,0.0,...,0.0,0.000268,0.000338,0.0,0.000179,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.000295,0.000177,0.000657,0.000703,0.000442,0.000157,0.002714,0.000950,0.001088,0.001039,0.001328,0.002619,0.000315,0.000495,0.000408,0.000543,0.000227,0.0,0.001915,0.0,0.000408,0.001444,0.002192,0.001711,0.000543,0.000495,0.000194,0.001594,0.000543,0.000469,0.001198,0.000300,0.000445,0.000322,0.000408,0.000698,0.001408,0.001149,0.0,...,0.0,0.000431,0.000543,0.0,0.000288,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.000216,0.000129,0.000483,0.000518,0.000326,0.000115,0.001993,0.000699,0.000801,0.000765,0.000977,0.001925,0.000231,0.000364,0.000300,0.000399,0.000168,0.0,0.001407,0.0,0.000300,0.001063,0.001613,0.001261,0.000399,0.000364,0.000143,0.001171,0.000399,0.000346,0.000879,0.000221,0.000327,0.000237,0.000300,0.000513,0.001037,0.000844,0.0,...,0.0,0.000316,0.000399,0.0,0.000211,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.0,0.000458,0.000275,0.001020,0.001093,0.000686,0.000244,0.004212,0.001475,0.001689,0.001613,0.002062,0.004067,0.000488,0.000769,0.000633,0.000842,0.000353,0.0,0.002974,0.0,0.000633,0.002242,0.003405,0.002657,0.000842,0.000769,0.000301,0.002475,0.000842,0.000728,0.001859,0.000467,0.000691,0.000501,0.000633,0.001084,0.002186,0.001784,0.0,...,0.0,0.000669,0.000842,0.0,0.000448,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,0.0,0.000298,0.000179,0.000665,0.000713,0.000448,0.000159,0.002748,0.000962,0.001102,0.001053,0.001344,0.002653,0.000318,0.000502,0.000413,0.000549,0.000230,0.0,0.001939,0.0,0.000413,0.001464,0.002221,0.001736,0.000549,0.000502,0.000197,0.001615,0.000549,0.000476,0.001212,0.000305,0.000451,0.000327,0.000413,0.000707,0.001427,0.001163,0.0,...,0.0,0.000436,0.000549,0.0,0.000291,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,0.0,0.000374,0.000225,0.000833,0.000895,0.000563,0.000200,0.003441,0.001204,0.001381,0.001319,0.001685,0.003324,0.000399,0.000629,0.000517,0.000687,0.000289,0.0,0.002429,0.0,0.000517,0.001833,0.002784,0.002174,0.000687,0.000629,0.000247,0.002022,0.000687,0.000596,0.001518,0.000383,0.000564,0.000411,0.000517,0.000886,0.001789,0.001456,0.0,...,0.0,0.000546,0.000687,0.0,0.000366,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,0.0,0.000253,0.000152,0.000565,0.000606,0.000380,0.000135,0.002331,0.000816,0.000935,0.000893,0.001141,0.002251,0.000270,0.000426,0.000350,0.000466,0.000196,0.0,0.001646,0.0,0.000350,0.001242,0.001885,0.001472,0.000466,0.000426,0.000167,0.001370,0.000466,0.000403,0.001029,0.000259,0.000383,0.000278,0.000350,0.000600,0.001211,0.000987,0.0,...,0.0,0.000370,0.000466,0.0,0.000248,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# initialize new user-movie similarity dataframe
df_user_movie_sim_2 = pd.DataFrame(columns = df_user_items.columns)

# map user-movie similarity to dataframe
for i in range(1,n_users+1):
  df_user_movie_sim_2.loc[i] = recommender_df.mul(df_user_tag.loc[i]).sum(axis=1)

df_user_movie_sim_2

movieId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,36,38,39,40,41,42,43,...,185135,185435,185473,185585,186587,187031,187541,187593,187595,187717,188189,188301,188675,188751,188797,188833,189043,189111,189333,189381,189547,189713,190183,190207,190209,190213,190215,190219,190221,191005,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
1,0.000936,0.001099,0.000218,,0.002672,,0.002719,,,,0.002172,,,0.002172,,0.003929,0.001497,,,,0.001931,0.002236,,,0.001635,0.004258,,0.020119,0.001742,,0.003701,0.004951,0.001789,0.001960,0.002358,0.001005,0.020536,0.004258,,0.002341,...,,,,,,,,0.000341,0.000396,,,,,,,,,,,,,,,,,,,,,,0.002571,,,,,,,,,
2,0.000656,0.000770,0.000152,,0.001873,,0.001906,,,,0.001522,,,0.001522,,0.002755,0.001050,,,,0.001354,0.001567,,,0.001146,0.002985,,0.014102,0.001222,,0.002593,0.003471,0.001254,0.001374,0.001652,0.000705,0.014394,0.002985,,0.001642,...,,,,,,,,0.000239,0.000278,,,,,,,,,,,,,,,,,,,,,,0.001802,,,,,,,,,
3,0.000474,0.000556,0.000110,,0.001351,,0.001375,,,,0.001097,,,0.001097,,0.001989,0.000758,,,,0.000977,0.001131,,,0.000826,0.002154,,0.010175,0.000882,,0.001871,0.002505,0.000904,0.000992,0.001192,0.000509,0.010387,0.002154,,0.001185,...,,,,,,,,0.000172,0.000200,,,,,,,,,,,,,,,,,,,,,,0.001301,,,,,,,,,
4,0.000761,0.000893,0.000177,,0.002171,,0.002209,,,,0.001764,,,0.001764,,0.003194,0.001217,,,,0.001569,0.001817,,,0.001328,0.003460,,0.016349,0.001417,,0.003008,0.004024,0.001453,0.001593,0.001915,0.000817,0.016688,0.003460,,0.001903,...,,,,,,,,0.000277,0.000322,,,,,,,,,,,,,,,,,,,,,,0.002090,,,,,,,,,
5,0.000559,0.000655,0.000129,,0.001595,,0.001624,,,,0.001297,,,0.001297,,0.002346,0.000895,,,,0.001154,0.001336,,,0.000977,0.002543,,0.012013,0.001041,,0.002208,0.002956,0.001069,0.001170,0.001407,0.000600,0.012262,0.002543,,0.001399,...,,,,,,,,0.000203,0.000237,,,,,,,,,,,,,,,,,,,,,,0.001535,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.001181,0.001386,0.000275,,0.003371,,0.003430,,,,0.002740,,,0.002740,,0.004957,0.001889,,,,0.002436,0.002820,,,0.002062,0.005372,,0.025381,0.002199,,0.004669,0.006247,0.002257,0.002473,0.002974,0.001269,0.025908,0.005372,,0.002954,...,,,,,,,,0.000430,0.000500,,,,,,,,,,,,,,,,,,,,,,0.003244,,,,,,,,,
607,0.000770,0.000904,0.000179,,0.002199,,0.002238,,,,0.001786,,,0.001786,,0.003235,0.001233,,,,0.001590,0.001840,,,0.001344,0.003504,,0.016553,0.001435,,0.003043,0.004075,0.001472,0.001613,0.001939,0.000828,0.016896,0.003504,,0.001927,...,,,,,,,,0.000280,0.000326,,,,,,,,,,,,,,,,,,,,,,0.002116,,,,,,,,,
608,0.000965,0.001132,0.000225,,0.002755,,0.002805,,,,0.002239,,,0.002239,,0.004050,0.001543,,,,0.001991,0.002304,,,0.001685,0.004390,,0.020735,0.001796,,0.003811,0.005102,0.001844,0.002020,0.002429,0.001036,0.021164,0.004390,,0.002414,...,,,,,,,,0.000351,0.000408,,,,,,,,,,,,,,,,,,,,,,0.002650,,,,,,,,,
609,0.000654,0.000767,0.000152,,0.001866,,0.001899,,,,0.001516,,,0.001516,,0.002744,0.001046,,,,0.001349,0.001561,,,0.001141,0.002974,,0.014046,0.001217,,0.002582,0.003457,0.001249,0.001369,0.001646,0.000702,0.014337,0.002974,,0.001635,...,,,,,,,,0.000238,0.000276,,,,,,,,,,,,,,,,,,,,,,0.001795,,,,,,,,,


In [None]:
# convert all nan values with 0
df_user_movie_sim_2 = df_user_movie_sim_2.fillna(0)
df_user_movie_sim_2

movieId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,36,38,39,40,41,42,43,...,185135,185435,185473,185585,186587,187031,187541,187593,187595,187717,188189,188301,188675,188751,188797,188833,189043,189111,189333,189381,189547,189713,190183,190207,190209,190213,190215,190219,190221,191005,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
1,0.000936,0.001099,0.000218,0.0,0.002672,0.0,0.002719,0.0,0.0,0.0,0.002172,0.0,0.0,0.002172,0.0,0.003929,0.001497,0.0,0.0,0.0,0.001931,0.002236,0.0,0.0,0.001635,0.004258,0.0,0.020119,0.001742,0.0,0.003701,0.004951,0.001789,0.001960,0.002358,0.001005,0.020536,0.004258,0.0,0.002341,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000341,0.000396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.000656,0.000770,0.000152,0.0,0.001873,0.0,0.001906,0.0,0.0,0.0,0.001522,0.0,0.0,0.001522,0.0,0.002755,0.001050,0.0,0.0,0.0,0.001354,0.001567,0.0,0.0,0.001146,0.002985,0.0,0.014102,0.001222,0.0,0.002593,0.003471,0.001254,0.001374,0.001652,0.000705,0.014394,0.002985,0.0,0.001642,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000239,0.000278,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001802,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.000474,0.000556,0.000110,0.0,0.001351,0.0,0.001375,0.0,0.0,0.0,0.001097,0.0,0.0,0.001097,0.0,0.001989,0.000758,0.0,0.0,0.0,0.000977,0.001131,0.0,0.0,0.000826,0.002154,0.0,0.010175,0.000882,0.0,0.001871,0.002505,0.000904,0.000992,0.001192,0.000509,0.010387,0.002154,0.0,0.001185,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000172,0.000200,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001301,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.000761,0.000893,0.000177,0.0,0.002171,0.0,0.002209,0.0,0.0,0.0,0.001764,0.0,0.0,0.001764,0.0,0.003194,0.001217,0.0,0.0,0.0,0.001569,0.001817,0.0,0.0,0.001328,0.003460,0.0,0.016349,0.001417,0.0,0.003008,0.004024,0.001453,0.001593,0.001915,0.000817,0.016688,0.003460,0.0,0.001903,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000277,0.000322,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002090,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.000559,0.000655,0.000129,0.0,0.001595,0.0,0.001624,0.0,0.0,0.0,0.001297,0.0,0.0,0.001297,0.0,0.002346,0.000895,0.0,0.0,0.0,0.001154,0.001336,0.0,0.0,0.000977,0.002543,0.0,0.012013,0.001041,0.0,0.002208,0.002956,0.001069,0.001170,0.001407,0.000600,0.012262,0.002543,0.0,0.001399,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000203,0.000237,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001535,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.001181,0.001386,0.000275,0.0,0.003371,0.0,0.003430,0.0,0.0,0.0,0.002740,0.0,0.0,0.002740,0.0,0.004957,0.001889,0.0,0.0,0.0,0.002436,0.002820,0.0,0.0,0.002062,0.005372,0.0,0.025381,0.002199,0.0,0.004669,0.006247,0.002257,0.002473,0.002974,0.001269,0.025908,0.005372,0.0,0.002954,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000430,0.000500,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003244,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,0.000770,0.000904,0.000179,0.0,0.002199,0.0,0.002238,0.0,0.0,0.0,0.001786,0.0,0.0,0.001786,0.0,0.003235,0.001233,0.0,0.0,0.0,0.001590,0.001840,0.0,0.0,0.001344,0.003504,0.0,0.016553,0.001435,0.0,0.003043,0.004075,0.001472,0.001613,0.001939,0.000828,0.016896,0.003504,0.0,0.001927,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000280,0.000326,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002116,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,0.000965,0.001132,0.000225,0.0,0.002755,0.0,0.002805,0.0,0.0,0.0,0.002239,0.0,0.0,0.002239,0.0,0.004050,0.001543,0.0,0.0,0.0,0.001991,0.002304,0.0,0.0,0.001685,0.004390,0.0,0.020735,0.001796,0.0,0.003811,0.005102,0.001844,0.002020,0.002429,0.001036,0.021164,0.004390,0.0,0.002414,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000351,0.000408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002650,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,0.000654,0.000767,0.000152,0.0,0.001866,0.0,0.001899,0.0,0.0,0.0,0.001516,0.0,0.0,0.001516,0.0,0.002744,0.001046,0.0,0.0,0.0,0.001349,0.001561,0.0,0.0,0.001141,0.002974,0.0,0.014046,0.001217,0.0,0.002582,0.003457,0.001249,0.001369,0.001646,0.000702,0.014337,0.002974,0.0,0.001635,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000238,0.000276,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001795,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# get top-20 recommended movies for every user
recommendation(df_user_movie_sim_2, dataset_rating, dataset_movies)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
16 . My Left Foot (1989)
17 . Buena Vista Social Club (1999)
18 . Little Voice (1998)
19 . Frankenstein (1931)
20 . Purple Rose of Cairo, The (1985)


User id =  403
Recommended movies: 
1 . Himalaya (Himalaya - l'enfance d'un chef) (1999)
2 . Battle of Algiers, The (La battaglia di Algeri) (1966)
3 . Letter to Three Wives, A (1949)
4 . Letter, The (1940)
5 . Alive (1993)
6 . My Family (1995)
7 . River's Edge (1986)
8 . Terrorist, The (a.k.a. Malli) (Theeviravaathi) (1998)
9 . Rainmaker, The (1997)
10 . That's Entertainment (1974)
11 . Gentlemen Prefer Blondes (1953)
12 . Faust (1926)
13 . Alice Doesn't Live Here Anymore (1974)
14 . Brassed Off (1996)
15 . Our Hospitality (1923)
16 . Tokyo Godfathers (2003)
17 . Cold Comfort Farm (1995)
18 . Dresser, The (1983)
19 . Lone Star (1996)
20 . Black Robe (1991)


User id =  404
Recommended movies: 
1 . How to Steal a Million (1966)
2 . Limelight (1952)
3 . Pride & Prejudice (20