# Collaborative Filtering

* Not all Users have rated every movie (This is logical, and not really a missing data)
* 

In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.metrics.pairwise import pairwise_distances

In [3]:
r_cols = ["user_id", "movie_id", "rating", "unix_timestamp"]
ratings = pd.read_csv("data/ml-100k/u.data", names=r_cols, sep="\t", encoding="latin-1")
ratings.head()

Unnamed: 0,user_id,movie_id,rating,unix_timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [26]:
ratings.isna().sum()

user_id           0
movie_id          0
rating            0
unix_timestamp    0
dtype: int64

In [4]:
X = ratings.pivot_table(values='rating', index='user_id', columns='movie_id')

In [5]:
X.shape

(943, 1682)

In [6]:
n_users, n_items = X.shape

In [7]:
user_sim = np.zeros((n_users, n_users))
item_sim = np.zeros((n_items, n_items))

In [8]:
X_isna = X.isna()

In [9]:
def calc_dist_nan(curr_id, other_id, calc, metric):
    if calc == "user":
        curr = X.loc[curr_id, :]
        other = X.loc[other_id, :]
        curr_isna = X_isna.loc[curr_id, :]
        other_isna = X_isna.loc[other_id, :]
        
    elif calc == "item":
        curr = X.loc[:, curr_id]
        other = X.loc[:, other_id]
        curr_isna = X_isna.loc[:, curr_id]
        other_isna = X_isna.loc[:, other_id]
        
        
    valid_idx = ~(curr_isna | other_isna)
    other = other.loc[valid_idx].values.reshape(1, -1)
    curr = curr.loc[valid_idx].values.reshape(1, -1)
    
    try:
        res = pairwise_distances(curr, other, metric=metric)[0, 0]
    except ValueError:
        res = np.nan
    
    return res

In [10]:
calc_dist_nan(1, 5, "user", "correlation")

0.5791913817512466

In [11]:
# for user_id in user_ids:
#     for other_id in user_ids:
#         user_sim[user_id-1, other_id-1] = calc_dist_nan(user_id, other_id, "user", "correlation")

* Cosine Similarity is not affected by ZERO padding (X -> X_zf (zero-fill))
* Pearson Correlation is not affected by MEAN padding (X -> X_umf, X_imf (user and item mean-fill))

In [12]:
X_zf = X.fillna(0)
X_umf = X.T.fillna(X.mean(axis=1), axis=0).T
X_imf = X.fillna(X.mean(axis=0), axis=0)

Here I choose the pearson correlation similarity

In [18]:
user_sim = 1 - pairwise_distances(X_umf, X_umf, metric="correlation")

In [19]:
item_sim = 1 - pairwise_distances(X_imf.T, X_imf.T, metric="correlation")

In [20]:
user_sim = pd.DataFrame(user_sim, index=X.index, columns=X.index)
item_sim = pd.DataFrame(item_sim, index=X.columns, columns=X.columns)

Item similarity has NaN values as some of the columns had the same entries for each movie 

In [21]:
item_sim.fillna(0, inplace=True)

In [22]:
user_sim.head()

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.043411,0.011051,0.059303,0.134514,0.103373,0.110556,0.180891,0.012253,-0.000621,...,0.025835,-0.047952,0.087224,0.007718,0.074378,0.078714,0.067433,0.02879,-0.03127,0.032123
2,0.043411,1.0,0.013658,-0.017016,0.03577,0.094503,0.089408,0.05564,0.027294,0.097846,...,0.012853,-0.028798,0.056659,0.197835,0.090009,0.032505,0.015053,-0.017344,0.012068,0.039173
3,0.011051,0.013658,1.0,-0.059638,0.016037,-0.017158,0.016141,0.041177,-0.010093,0.023856,...,0.001615,0.000658,-0.006888,0.036157,-0.018513,-0.00624,-0.023907,0.034414,-0.009187,0.001489
4,0.059303,-0.017016,-0.059638,1.0,0.007373,-0.053929,-0.025604,0.136046,0.016082,-0.013588,...,0.011895,0.002174,-0.028,-0.025021,0.022882,-0.00596,0.279818,0.258594,0.064504,-0.019222
5,0.134514,0.03577,0.016037,0.007373,1.0,0.038484,0.067874,0.140106,0.010195,0.014335,...,0.070014,-0.070821,0.024278,0.038672,0.093567,0.051782,0.02954,0.036234,0.043318,0.099324


In [23]:
item_sim.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.091331,0.074554,0.051377,0.114383,0.073914,0.109929,0.11684,0.055881,0.068413,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.091331,1.0,0.073814,0.136619,0.061872,-0.007447,0.070359,0.13144,-0.060372,0.04421,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.074554,0.073814,1.0,-0.05746,0.027472,0.118627,0.023926,-0.016949,0.003838,0.025616,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.051377,0.136619,-0.05746,1.0,-0.106871,0.007538,0.072354,0.145265,0.087573,0.050025,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.114383,0.061872,0.027472,-0.106871,1.0,-0.01728,0.073628,0.058144,0.019236,-0.028606,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## User-User Collaborative filtering

In [41]:
X.shape

(943, 1682)

In [43]:
user_sim.shape

(943, 943)

In [60]:
abs(user_sim).sum(axis=1).values.reshape(-1, 1)

array([[56.13453344],
       [53.62953355],
       [36.78857378],
       [38.4154689 ],
       [42.19835711],
       [54.01155478],
       [46.7017578 ],
       [54.81784314],
       [32.20056127],
       [45.78174319],
       [43.17620915],
       [29.14143259],
       [51.62787931],
       [39.74530558],
       [42.67077857],
       [44.31878931],
       [46.95286984],
       [40.15706092],
       [45.54480418],
       [36.97779127],
       [44.79639597],
       [52.0145942 ],
       [53.05736904],
       [49.64881511],
       [41.80696091],
       [71.7161227 ],
       [36.78610031],
       [51.49038508],
       [52.63811199],
       [48.25512184],
       [30.08964823],
       [46.42728497],
       [44.36937465],
       [53.1887937 ],
       [40.15271008],
       [31.04003688],
       [42.95514754],
       [28.80683684],
       [37.32122741],
       [36.15513581],
       [44.51019862],
       [42.47406016],
       [49.4639602 ],
       [46.97878446],
       [48.74746208],
       [54

In [61]:
user_sim.dot(X_umf)/abs(user_sim).sum(axis=1).values.reshape(-1, 1)

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.367443,3.050156,3.062636,3.138132,3.063545,3.140949,3.317156,3.166494,3.320031,3.131295,...,3.117925,3.117871,3.117319,3.116092,3.117508,3.116702,3.117814,3.117258,3.117933,3.116667
2,2.829395,2.698213,2.685199,2.729978,2.712074,2.740429,2.801614,2.785215,2.847168,2.679894,...,2.729620,2.731041,2.730528,2.729381,2.730714,2.731522,2.731082,2.731302,2.731072,2.730731
3,0.841239,0.788052,0.792466,0.809196,0.784121,0.806564,0.824980,0.831088,0.861476,0.798661,...,0.803095,0.803975,0.802722,0.800010,0.804083,0.804415,0.803994,0.804205,0.803969,0.803878
4,1.362202,1.291815,1.276135,1.285394,1.289735,1.300162,1.332971,1.345606,1.302443,1.314609,...,1.297848,1.295785,1.295315,1.294086,1.295298,1.296374,1.295908,1.296141,1.295935,1.295792
5,3.025851,2.687103,2.664768,2.772461,2.702478,2.736314,2.957923,2.850895,2.823500,2.747223,...,2.737413,2.736412,2.735702,2.734220,2.736154,2.735610,2.736338,2.735974,2.736433,2.735931
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,1.796197,1.610830,1.566916,1.644655,1.616724,1.625843,1.724781,1.642666,1.772313,1.614328,...,1.622947,1.622901,1.621806,1.619863,1.622981,1.622037,1.622660,1.622348,1.622696,1.622085
940,2.674363,2.513265,2.507046,2.482143,2.533126,2.541270,2.694532,2.702369,2.570070,2.554114,...,2.544097,2.542958,2.542332,2.541087,2.542416,2.541659,2.542832,2.542245,2.542945,2.542792
941,2.249569,1.914620,1.907002,1.939932,1.932045,1.949071,2.018211,2.016364,2.011199,1.966351,...,1.938171,1.937393,1.937271,1.937347,1.937046,1.937433,1.937247,1.937340,1.937257,1.936839
942,1.821803,1.655311,1.657911,1.694862,1.679812,1.684175,1.676933,1.771825,1.749000,1.686660,...,1.683364,1.683558,1.682296,1.679815,1.683272,1.682454,1.683379,1.682916,1.683471,1.683075


In [53]:
user_sim.dot(X_umf)/abs(user_sim).sum(axis=)

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.367443,3.192627,4.673180,4.585589,4.075293,3.264407,3.987152,3.242551,5.787737,3.839385,...,,,,,,,,,,
2,2.703133,2.698213,3.914422,3.811159,3.446752,2.721046,3.217207,2.724839,4.741913,3.139275,...,,,,,,,,,,
3,0.551318,0.540585,0.792466,0.774927,0.683598,0.549370,0.649865,0.557748,0.984221,0.641775,...,,,,,,,,,,
4,0.932218,0.925342,1.332569,1.285394,1.174117,0.924734,1.096462,0.942979,1.553822,1.103089,...,,,,,,,,,,
5,2.274642,2.114345,3.056624,3.045473,2.702478,2.137838,2.672694,2.194597,3.700155,2.532195,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.931353,0.874251,1.239718,1.246118,1.115143,0.876158,1.074956,0.872203,1.602018,1.026336,...,,,,,,,,,,
940,2.179144,2.143532,3.117059,2.955401,2.745724,2.152087,2.639039,2.254854,3.650708,2.551782,...,,,,,,,,,,
941,1.526039,1.359487,1.973944,1.922990,1.743486,1.374161,1.645622,1.400695,2.378418,1.635555,...,,,,,,,,,,
942,1.395777,1.327457,1.938175,1.897462,1.712025,1.341052,1.544285,1.390093,2.335991,1.584456,...,,,,,,,,,,


In [47]:
X

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,,,,,,,,,,
2,4.0,,,,,,,,,2.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,3.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,,,,,,,,,5.0,,...,,,,,,,,,,
940,,,,2.0,,,4.0,5.0,3.0,,...,,,,,,,,,,
941,5.0,,,,,,4.0,,,,...,,,,,,,,,,
942,,,,,,,,,,,...,,,,,,,,,,


In [46]:
X.T @ user_sim

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1678,,,,,,,,,,,...,,,,,,,,,,
1679,,,,,,,,,,,...,,,,,,,,,,
1680,,,,,,,,,,,...,,,,,,,,,,
1681,,,,,,,,,,,...,,,,,,,,,,


In [None]:
def predict_rating(ratings, similarity, method):
    if method == "user-user":
        
    elif method == "item-item":
        pass
    
    return None

In [33]:
from sklearn.base import BaseEstimator, ClassifierMixin

In [183]:
class Recommender(BaseEstimator, ClassifierMixin):
    def __init__(self, engine, method):
        '''
        Parameters
        ----------
        engine: str, ['user', 'item']
            user or item based collaborative filtering approach
            
        method: str, ['correlation', 'cosine']
            Similarity metric to use
            'correlation': pearson correlation,
            'cosine': cosine similarity
        '''
        self.engine = engine
        self.method = method
        
    
    @staticmethod
    def _impute(X, engine, method):

        if engine == 'user':
            if method == 'correlation':
                X = X.T.fillna(X.mean(axis=1), axis=0).T
            elif method == 'cosine':
                X = X.fillna(0)
                
        elif engine == 'item':
            if method == 'correlation':
                X = X.fillna(X.mean(axis=0), axis=0).T
            elif method == 'cosine':
                X = X.fillna(0).T
                
                
        return X
        
    def fit(self, user_item_ids, ratings):
        ''' fit method
        
        Parameters
        ----------
        user_item_ids: np.array
        ratings: np.array
            ratings
        '''
        
        user_ids, item_ids = user_item_ids[:, 0], user_item_ids[:, 1]
        X = (pd.DataFrame(
                {'user_id': user_ids, 'item_id': item_ids, 'rating': ratings}).
                 pivot(values='rating', index='user_id', columns='item_id')
                )
        X = self._impute(X, self.engine, self.method)
        self.ratings = X
        
        self.similarity = pd.DataFrame(1 - pairwise_distances(X, X, metric=self.method),
                                       index=X.index, columns=X.index).fillna(0)
        
        return self
    
    def predict(self, user_item_ids):
#         if self.engine == 'user':
#             similarity = self.similarity.loc[user_ids, :]
#             ratings = self.ratings.loc[user_ids, :]
#         elif self.engine == 'item':
#             similarity = self.similarity.loc[item_ids, :]
#             ratings = self.ratings.loc[item_ids, :]
            
            
        pred = self.similarity.dot(self.ratings)/abs(self.similarity).sum(axis=1).values.reshape(-1, 1)
        
        if self.engine == "item":
            pred = pred.T
            
        return pred
    


In [187]:
train_df.iloc[:, 0]

53532    115
10504    372
89718    763
51836    757
66257    673
        ... 
30475    526
21208    244
41739    217
21228    303
25716      5
Name: user_id, Length: 70000, dtype: int64

In [184]:
from sklearn.model_selection import cross_val_score

In [None]:
cross_val_score()

In [128]:
rcmdr = Recommender("user", "cosine")
rcmdr.fit(ratings.user_id, ratings.movie_id, ratings.rating)
pred = rcmdr.predict(ratings.user_id, ratings.movie_id)

In [188]:
pred.loc

item_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2.424921,0.758241,0.444433,1.299298,0.458865,0.139783,2.147416,1.405618,1.581953,0.509051,...,0.003467,0.006575,0.003651,0.002434,0.005605,0.000269,0.000806,0.000538,0.006510,0.007361
2,1.882536,0.325506,0.302482,0.641897,0.240190,0.117447,1.559090,0.768078,1.388130,0.400763,...,0.002868,0.002305,0.003679,0.002453,0.004346,0.001684,0.005052,0.003368,0.002770,0.001912
3,1.328492,0.290918,0.236176,0.536108,0.228197,0.070488,1.244122,0.602516,0.965598,0.269873,...,0.001396,0.001058,0.006671,0.004447,0.003134,0.004178,0.012534,0.008356,0.002619,0.001182
4,1.603710,0.350917,0.268652,0.623992,0.260725,0.071486,1.416678,0.691470,1.049920,0.255325,...,0.002610,0.002723,0.003532,0.002355,0.004192,0.002945,0.008836,0.005891,0.002817,0.001429
5,2.627383,0.873003,0.450599,1.371067,0.493523,0.104848,2.200992,1.489508,1.436195,0.468190,...,0.003604,0.006660,0.002835,0.001890,0.003849,0.000004,0.000012,0.000008,0.006884,0.006284
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,2.465487,0.468914,0.403929,0.795590,0.341403,0.095980,1.956634,0.936914,1.718421,0.387813,...,0.002582,0.002406,0.005511,0.003674,0.006229,0.000354,0.001062,0.000708,0.004352,0.002503
940,2.017521,0.569425,0.336205,1.060893,0.360052,0.108366,1.830259,1.213920,1.405653,0.405918,...,0.003367,0.005870,0.003157,0.002105,0.004671,0.001117,0.003350,0.002233,0.004769,0.004245
941,2.453219,0.448412,0.396939,0.743403,0.287704,0.089838,2.117515,0.883792,1.421974,0.357328,...,0.001604,0.001828,0.002207,0.001471,0.004642,0.000612,0.001836,0.001224,0.004006,0.003029
942,2.039942,0.587604,0.303291,1.024146,0.343023,0.100319,1.663655,1.250355,1.324813,0.397533,...,0.004830,0.008097,0.004170,0.002780,0.004204,0.001042,0.003125,0.002083,0.005335,0.003315


In [161]:
def rmse(actual, pred):
    size = (~actual.isna()).sum().sum()
    return (((pred - actual)**(2)).sum().sum()/size)**(1/2)

In [166]:
from sklearn.model_selection import train_test_split

In [167]:
train_df, test_df = train_test_split(ratings, test_size=0.3)

In [178]:
train_df.pivot(values="rating", index="user_id", columns="movie_id")

movie_id,1,2,3,4,5,6,7,8,9,10,...,1669,1672,1673,1674,1675,1676,1677,1678,1680,1681
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,,4.0,3.0,,5.0,,1.0,5.0,3.0,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,3.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,,,,,,,,,5.0,,...,,,,,,,,,,
940,,,,2.0,,,,5.0,,,...,,,,,,,,,,
941,5.0,,,,,,4.0,,,,...,,,,,,,,,,
942,,,,,,,,,,,...,,,,,,,,,,


In [180]:
rcmdr = Recommender("user", "cosine")
rcmdr.fit(train_df["user_id"], train_df["movie_id"], train_df["rating"])

Recommender(engine='user', method='cosine')

In [189]:
pred = rcmdr.predict(test_df["user_id"], test_df["movie_id"])

In [199]:
pred.melt()

Unnamed: 0,item_id,value
0,1,1.836429
1,1,1.252478
2,1,0.862163
3,1,1.158935
4,1,1.923743
...,...,...
1535199,1681,0.004873
1535200,1681,0.003970
1535201,1681,0.003974
1535202,1681,0.005912


In [198]:
pred.melt(id_vars=["item_id", "user_id"])

KeyError: "The following 'id_vars' are not present in the DataFrame: ['item_id', 'user_id']"