In [2]:
import pandas as pd
import numpy as np

In [3]:
ratings_df = pd.read_csv('../data/ml-latest-small/ratings.csv')
user_item_matrix = ratings_df.pivot(index='userId', columns='movieId', values='rating')
user_item_matrix.tail()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,
610,5.0,,,,,5.0,,,,,...,,,,,,,,,,


In [4]:
user_correlation_matrix = user_item_matrix.T.corr()
user_correlation_matrix.tail()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
606,0.066378,0.583333,-0.791334,0.144603,0.244321,-0.049192,0.137771,0.253582,0.5727,-0.382955,...,0.29049,0.140613,0.318473,0.682949,0.167062,1.0,0.114191,0.240842,0.533002,0.389185
607,0.174557,,-0.333333,0.116518,0.23108,0.255639,0.402792,0.25128,,-0.241121,...,0.698241,0.21721,0.192787,0.035806,-0.299641,0.114191,1.0,0.200814,0.190117,0.106605
608,0.26807,-0.125,-0.395092,-0.170501,-0.020546,0.125428,0.008081,0.434423,0.336625,-0.571043,...,0.473967,0.297646,0.086423,0.053986,-0.075673,0.240842,0.200814,1.0,0.488929,0.147606
609,-0.175412,,,-0.27735,0.384111,0.193649,0.420288,0.14186,,,...,1.0,0.188512,0.343303,0.641624,-0.55,0.533002,0.190117,0.488929,1.0,-0.521773
610,-0.032086,0.623288,0.569562,-0.043786,0.040582,0.11558,0.341233,0.167931,0.615638,-0.205081,...,0.007025,0.049263,0.270908,0.310611,0.462274,0.389185,0.106605,0.147606,-0.521773,1.0


In [5]:
users_seen_films = user_item_matrix.T.notna()

In [9]:
class User:
    '''
    User in a streaming site.
    
    Params
    ------
    userId:      Id of the user
    '''
    
    def __init__(self, user_id):
        self.user_id = user_id
        #self.new_films = []
        #self.recommender = recommender

        
    def get_user_information(self):
        
        self.user_seen_boolean_series = users_seen_films[[self.user_id]]
        self.seen_films = self.user_seen_boolean_series.index[self.user_seen_boolean_series[self.user_id]].tolist()  
        
        self.sorted_films = user_item_matrix.loc[[self.user_id]].T.sort_values(by=self.user_id, \
                                                                              ascending=False).dropna().index
        
        self.user_correlation = user_correlation_matrix.iloc[user_correlation_matrix.index==self.user_id].T \
                                        .sort_values(by=self.user_id, ascending=False).dropna()
        self.best_similar_users = self.user_correlation[self.user_correlation>0.7].dropna()[1:].index
        
        
    def update_seen_films(self, suggested_seen_film):
        self.seen_films = self.seen_films.append(suggested_seen_film)
        
    def update_new_seen_films(self, suggested_seen_film):
        self.new_films = self.new_films.append(suggested_seen_film)
        
    def recommend(self):
        u = User(user_id)
        u.get_user_information()
        
        next_user_id = u.best_similar_users[0]
        v = User(next_user_id)
        v.get_user_information()
        
        suggestion = v.sorted_films[0]    
        
        i = 0
        j = 0 
    
        while suggestion in u.seen_films:
            i+=1
            suggestion = v.sorted_films[i]
            
            if i == len(v.sorted_films) -1 :            
                j+=1
                next_user_id = u.best_similar_users[j]
                v = User(next_user_id)
                v.get_user_information()
                i = -1
        u.update_new_seen_films(suggestion)
        return u.new_films, suggestion

In [10]:
u = User(1)

In [11]:
u.get_user_information()

In [12]:
u.seen_films

[1,
 3,
 6,
 47,
 50,
 70,
 101,
 110,
 151,
 157,
 163,
 216,
 223,
 231,
 235,
 260,
 296,
 316,
 333,
 349,
 356,
 362,
 367,
 423,
 441,
 457,
 480,
 500,
 527,
 543,
 552,
 553,
 590,
 592,
 593,
 596,
 608,
 648,
 661,
 673,
 733,
 736,
 780,
 804,
 919,
 923,
 940,
 943,
 954,
 1009,
 1023,
 1024,
 1025,
 1029,
 1030,
 1031,
 1032,
 1042,
 1049,
 1060,
 1073,
 1080,
 1089,
 1090,
 1092,
 1097,
 1127,
 1136,
 1196,
 1197,
 1198,
 1206,
 1208,
 1210,
 1213,
 1214,
 1219,
 1220,
 1222,
 1224,
 1226,
 1240,
 1256,
 1258,
 1265,
 1270,
 1275,
 1278,
 1282,
 1291,
 1298,
 1348,
 1377,
 1396,
 1408,
 1445,
 1473,
 1500,
 1517,
 1552,
 1573,
 1580,
 1587,
 1617,
 1620,
 1625,
 1644,
 1676,
 1732,
 1777,
 1793,
 1804,
 1805,
 1920,
 1927,
 1954,
 1967,
 2000,
 2005,
 2012,
 2018,
 2028,
 2033,
 2046,
 2048,
 2054,
 2058,
 2078,
 2090,
 2093,
 2094,
 2096,
 2099,
 2105,
 2115,
 2116,
 2137,
 2139,
 2141,
 2143,
 2161,
 2174,
 2193,
 2253,
 2268,
 2273,
 2291,
 2329,
 2338,
 2353,
 2366,
 

In [14]:
u.recommend()

NameError: name 'user_id' is not defined

In [106]:
u.best_similar_users

Int64Index([146, 550, 106, 333, 598, 473, 511,   9,  13, 366, 401, 535, 481,
            154,  90, 499, 157, 139, 476, 487, 210, 114, 530,  49, 162, 297,
            207,  44, 430, 394, 574, 421, 248, 173,  65, 505, 369, 435, 375,
            431, 351],
           dtype='int64', name='userId')

In [107]:
class Recommender:
    
    def __init__(self):
        ...
        
    def recommend(self,user_id):
        u = User(user_id)
        u.get_user_information()
        
        next_user_id = u.best_similar_users[0]
        v = User(next_user_id)
        v.get_user_information()
        
        suggestion = v.sorted_films[0]    
        
        i = 0
        j = 0 
    
        while suggestion in u.seen_films:
            i+=1
            suggestion = v.sorted_films[i]
            
            if i == len(v.sorted_films) -1 :            
                j+=1
                next_user_id = u.best_similar_users[j]
                v = User(next_user_id)
                v.get_user_information()
                i = -1
        u.update_new_seen_films(suggestion)
        return u.new_films, suggestion

In [121]:
r = Recommender()

In [122]:
r.recommend(199)

(None, 2949)