Preparing Data

In [1]:
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [2]:
rating = pd.read_csv("C:\\Users\\TBONG KHMOM\\Desktop\\ratings_small.csv")
movies_df = pd.read_csv("C:\\Users\\TBONG KHMOM\\Desktop\\movies_metadata.csv")

In [3]:
movies_df = movies_df[["id","overview","release_date","title","vote_average","vote_count"]]

In [4]:
rating["date"] = pd.to_datetime(rating["timestamp"],unit="s")

In [5]:
rating = rating.drop("timestamp",axis=1)

In [6]:
movies_df = movies_df.rename(columns={"id":"movieId"})

In [7]:
movies_df = movies_df[pd.to_numeric(movies_df['movieId'], errors='coerce').notna()]

In [8]:
print(movies_df['movieId'].dtype)
print(rating['movieId'].dtype)

object
int64


In [9]:
rating['movieId'] = pd.to_numeric(rating['movieId'], errors='coerce')

In [10]:
rating = rating.dropna(subset=['movieId'])

In [11]:
movies_df['movieId'] = pd.to_numeric(movies_df['movieId'], errors='coerce')
rating['movieId'] = pd.to_numeric(rating['movieId'], errors='coerce')

In [12]:
concatenated_df = pd.concat([movies_df, rating], axis=0, ignore_index=True)

In [13]:
df = pd.merge(movies_df, rating, how='inner', on='movieId')

In [14]:
df.head()

Unnamed: 0,movieId,overview,release_date,title,vote_average,vote_count,userId,rating,date
0,949,"Obsessive master thief, Neil McCauley leads a ...",1995-12-15,Heat,7.7,1886.0,23,3.5,2006-05-27 09:11:32
1,949,"Obsessive master thief, Neil McCauley leads a ...",1995-12-15,Heat,7.7,1886.0,102,4.0,2000-04-24 17:55:42
2,949,"Obsessive master thief, Neil McCauley leads a ...",1995-12-15,Heat,7.7,1886.0,232,2.0,2000-04-07 07:31:37
3,949,"Obsessive master thief, Neil McCauley leads a ...",1995-12-15,Heat,7.7,1886.0,242,5.0,2000-04-25 18:53:45
4,949,"Obsessive master thief, Neil McCauley leads a ...",1995-12-15,Heat,7.7,1886.0,263,3.0,2005-06-04 00:56:15


Creating User-Movie DataFrame

In [15]:
df.shape

(44994, 9)

In [16]:
df["title"].nunique()

2794

In [17]:
values_pd = df["title"].value_counts()
values_pd

title
Terminator 3: Rise of the Machines          324
The Million Dollar Hotel                    311
Solaris                                     305
The 39 Steps                                291
Monsoon Wedding                             274
                                           ... 
Once                                          1
Pirates of the Caribbean: At World's End      1
Mr. Brooks                                    1
Paranoid Park                                 1
The One-Man Band                              1
Name: count, Length: 2794, dtype: int64

In [18]:
rare_movies = values_pd[values_pd < 5].index

rare_movies

Index(['Hotel Rwanda', 'Cruel Intentions 3', 'Tokyo Sonata', 'Jinxed!',
       'Aparajito', 'Jekyll and Hyde ... Together Again',
       'Frankenstein and the Monster from Hell', 'Enigma', 'Double Trouble',
       'Maradona by Kusturica',
       ...
       'Fighting Elegy', 'The Canterbury Tales', 'Omagh', 'Spider-Man 3',
       'Knocked Up', 'Once', 'Pirates of the Caribbean: At World's End',
       'Mr. Brooks', 'Paranoid Park', 'The One-Man Band'],
      dtype='object', name='title', length=1445)

In [19]:
df_ = df[~df["title"].isin(rare_movies)]

In [20]:
df_.head()

Unnamed: 0,movieId,overview,release_date,title,vote_average,vote_count,userId,rating,date
0,949,"Obsessive master thief, Neil McCauley leads a ...",1995-12-15,Heat,7.7,1886.0,23,3.5,2006-05-27 09:11:32
1,949,"Obsessive master thief, Neil McCauley leads a ...",1995-12-15,Heat,7.7,1886.0,102,4.0,2000-04-24 17:55:42
2,949,"Obsessive master thief, Neil McCauley leads a ...",1995-12-15,Heat,7.7,1886.0,232,2.0,2000-04-07 07:31:37
3,949,"Obsessive master thief, Neil McCauley leads a ...",1995-12-15,Heat,7.7,1886.0,242,5.0,2000-04-25 18:53:45
4,949,"Obsessive master thief, Neil McCauley leads a ...",1995-12-15,Heat,7.7,1886.0,263,3.0,2005-06-04 00:56:15


In [21]:
user_title_df = df_.groupby(["userId","title"])["rating"].mean().unstack().notnull()

In [22]:
user_title_df.shape

(671, 1349)

In [23]:
user_title_df.head()

title,10 Items or Less,10 Things I Hate About You,15 Minutes,1984,2 Days in Paris,"20,000 Leagues Under the Sea",2001: A Space Odyssey,24 Hour Party People,25th Hour,28 Days Later,...,Young Adam,Young Frankenstein,Young and Innocent,Z,Zatoichi,Zazie dans le métro,Zodiac,eXistenZ,xXx,À nos amours
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
4,False,False,False,False,False,True,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False


In [24]:
user_title_df.columns

Index(['10 Items or Less', '10 Things I Hate About You', '15 Minutes', '1984',
       '2 Days in Paris', '20,000 Leagues Under the Sea',
       '2001: A Space Odyssey', '24 Hour Party People', '25th Hour',
       '28 Days Later',
       ...
       'Young Adam', 'Young Frankenstein', 'Young and Innocent', 'Z',
       'Zatoichi', 'Zazie dans le métro', 'Zodiac', 'eXistenZ', 'xXx',
       'À nos amours'],
      dtype='object', name='title', length=1349)

In [25]:
sample_guy = user_title_df.sample(1,random_state=45).index[0]

In [26]:
random_user_df = user_title_df[user_title_df.index == sample_guy]

In [27]:
movies_watched = random_user_df.dropna(axis=1).columns.tolist() 

In [28]:
movies_watched_df = user_title_df[movies_watched]

In [29]:
user_movie_count = movies_watched_df.notnull().sum(axis=1) 

In [30]:
user_movie_count.max()

1349

In [31]:
users_same_movies = user_movie_count[user_movie_count > (movies_watched_df.shape[1] * 60 ) / 100].index

users_same_movies

Index([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,
       ...
       662, 663, 664, 665, 666, 667, 668, 669, 670, 671],
      dtype='int64', name='userId', length=671)

Determination of Similarity

In [32]:
filted_df = movies_watched_df[movies_watched_df.index.isin(users_same_movies)]

filted_df

title,10 Items or Less,10 Things I Hate About You,15 Minutes,1984,2 Days in Paris,"20,000 Leagues Under the Sea",2001: A Space Odyssey,24 Hour Party People,25th Hour,28 Days Later,...,Young Adam,Young Frankenstein,Young and Innocent,Z,Zatoichi,Zazie dans le métro,Zodiac,eXistenZ,xXx,À nos amours
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
4,False,False,False,False,False,True,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
667,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
668,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
669,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
670,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [33]:
corr_df = filted_df.T.corr().unstack().drop_duplicates()

In [34]:
corr_df.sort_values(ascending=False).head(20)

userId  userId
1       1         1.000000
151     369       0.856042
191     513       0.840495
151     279       0.827968
191     317       0.818695
151     400       0.806348
375     568       0.802295
279     400       0.797441
317     415       0.796990
279     369       0.796423
151     535       0.780618
47      568       0.775797
82      400       0.774960
191     415       0.772295
369     400       0.772045
225     375       0.768959
191     568       0.767086
145     400       0.761718
317     556       0.759037
        375       0.759037
dtype: float64

In [35]:
corr_df

userId  userId
1       1         1.000000
        2        -0.014167
        3        -0.009185
        4         0.101010
        5         0.043823
                    ...   
667     670       0.174478
        671       0.080663
668     670       0.235638
        671       0.183059
670     671       0.195972
Length: 184753, dtype: float64

In [36]:
corr_df.loc[(1, 3)]

-0.009184671558107973

In [37]:
corr_df[sample_guy].sort_values(ascending=False)

userId
541    0.233658
476    0.141424
525    0.132188
222    0.120666
592    0.110954
         ...   
391   -0.037683
559   -0.040722
287   -0.041016
396   -0.046860
434   -0.049462
Length: 364, dtype: float64

Score Calculation

In [38]:
top_users = pd.DataFrame(corr_df[sample_guy][corr_df[sample_guy] > 0.10], columns=["corr"])

top_users

Unnamed: 0_level_0,corr
userId,Unnamed: 1_level_1
222,0.120666
294,0.103268
412,0.100172
463,0.104341
476,0.141424
525,0.132188
541,0.233658
575,0.102029
592,0.110954


In [39]:
top_users_ratings = pd.merge(top_users, rating[["userId", "movieId", "rating"]], how='inner', on="userId")

top_users_ratings

Unnamed: 0,userId,corr,movieId,rating
0,222,0.120666,229,4.0
1,222,0.120666,899,5.0
2,222,0.120666,900,5.0
3,222,0.120666,901,5.0
4,222,0.120666,912,5.0
...,...,...,...,...
2625,592,0.110954,4299,4.0
2626,592,0.110954,4340,3.0
2627,592,0.110954,4344,5.0
2628,592,0.110954,4369,5.0


In [40]:
top_users_ratings['weighted_rating'] = top_users_ratings['corr'] * top_users_ratings['rating']

In [41]:
recommendation_df = top_users_ratings.pivot_table(values="weighted_rating", index="movieId", aggfunc="mean")

recommendation_df

Unnamed: 0_level_0,weighted_rating
movieId,Unnamed: 1_level_1
1,0.339449
2,0.336245
5,0.361438
6,0.528752
7,0.447273
...,...
42738,0.413072
44613,0.361438
45028,0.413072
45499,0.413072


In [42]:
recommendation_df.sort_values(by= "weighted_rating" , ascending=False).head(20)

Unnamed: 0_level_0,weighted_rating
movieId,Unnamed: 1_level_1
2841,0.934631
2693,0.934631
3219,0.934631
1093,0.820876
2692,0.750164
2707,0.750164
2959,0.707718
3160,0.707121
16,0.707121
1918,0.66094


In [43]:
movies_to_be_recommend = recommendation_df[recommendation_df["weighted_rating"] > 0.7].sort_values(by="weighted_rating", ascending=False).head(10)

In [44]:
movies_df["title"][movies_df["movieId"].isin(movies_to_be_recommend.index)]

3786                  Dancer in the Dark
9399              A Very Long Engagement
9805             Elevator to the Gallows
11922                     License to Wed
21863    Frankenstein Conquers the World
24036                      The Red Elvis
Name: title, dtype: object

In [45]:
df["movieId"].nunique()

2830

In [46]:
user_movie_df = df.groupby(["userId","movieId"])["rating"].mean().unstack().notnull()

In [47]:
user_title_df.head()

title,10 Items or Less,10 Things I Hate About You,15 Minutes,1984,2 Days in Paris,"20,000 Leagues Under the Sea",2001: A Space Odyssey,24 Hour Party People,25th Hour,28 Days Later,...,Young Adam,Young Frankenstein,Young and Innocent,Z,Zatoichi,Zazie dans le métro,Zodiac,eXistenZ,xXx,À nos amours
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
4,False,False,False,False,False,True,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False


In [48]:
sample_movie = user_movie_df.sample(1,random_state=45).index[0]

sample_movie

196

In [49]:
filtered = user_movie_df[sample_movie]

In [50]:
user_movie_df_wo = user_movie_df.drop(sample_movie,axis=1)

In [51]:
movies_similarity = user_movie_df_wo.corrwith(filtered)

In [52]:
movies_similarity.sort_values(ascending=False).head(20)

movieId
160    0.388101
172    0.359755
435    0.346429
173    0.342352
316    0.331515
253    0.331465
22     0.326370
317    0.324579
165    0.324017
592    0.321798
145    0.321120
198    0.321120
587    0.315399
329    0.313288
292    0.307931
204    0.296570
153    0.295394
344    0.292703
379    0.289515
426    0.287628
dtype: float64

In [53]:
movies_similarity = movies_similarity.sort_values(ascending=False).reset_index()
movies_similarity.columns = ["movieId","movies_similarity"]

In [54]:
movies_similarity.head()

Unnamed: 0,movieId,movies_similarity
0,160,0.388101
1,172,0.359755
2,435,0.346429
3,173,0.342352
4,316,0.331515


In [55]:
filtered_movies = df[df['movieId'].isin([160, 172, 435, 173, 316])]

filtered_movies

Unnamed: 0,movieId,overview,release_date,title,vote_average,vote_count,userId,rating,date
3585,173,A ship sent to investigate a wave of mysteriou...,1954-12-23,"20,000 Leagues Under the Sea",6.9,160.0,4,3.0,2000-02-06 04:29:06
3586,173,A ship sent to investigate a wave of mysteriou...,1954-12-23,"20,000 Leagues Under the Sea",6.9,160.0,6,2.0,2005-02-24 15:17:08
3587,173,A ship sent to investigate a wave of mysteriou...,1954-12-23,"20,000 Leagues Under the Sea",6.9,160.0,22,1.5,2005-11-10 22:48:50
3588,173,A ship sent to investigate a wave of mysteriou...,1954-12-23,"20,000 Leagues Under the Sea",6.9,160.0,32,3.0,1996-06-15 08:43:47
3589,173,A ship sent to investigate a wave of mysteriou...,1954-12-23,"20,000 Leagues Under the Sea",6.9,160.0,41,4.0,2004-08-30 17:45:39
...,...,...,...,...,...,...,...,...,...
40247,160,A group of people are standing along the platf...,1896-01-25,The Arrival of a Train at La Ciotat,6.9,87.0,597,3.0,1999-10-23 20:01:58
40248,160,A group of people are standing along the platf...,1896-01-25,The Arrival of a Train at La Ciotat,6.9,87.0,619,3.0,1996-05-12 17:54:39
40249,160,A group of people are standing along the platf...,1896-01-25,The Arrival of a Train at La Ciotat,6.9,87.0,639,2.0,1996-06-05 16:26:26
40250,160,A group of people are standing along the platf...,1896-01-25,The Arrival of a Train at La Ciotat,6.9,87.0,649,1.0,1996-06-10 16:40:54


In [56]:
filtered_movies['title'].value_counts()

title
Grill Point                            145
20,000 Leagues Under the Sea            70
The Arrival of a Train at La Ciotat     63
The Day After Tomorrow                  55
Star Trek V: The Final Frontier         48
Name: count, dtype: int64

✨Model-Based Recommender System✨

Data Preparing

In [57]:
import pandas as pd
from surprise import Reader, SVD, Dataset, accuracy
from surprise.model_selection import GridSearchCV, train_test_split, cross_validate

In [58]:
movie = pd.read_csv("C:\\Users\\TBONG KHMOM\\Desktop\\movie.csv")
rating = pd.read_csv("C:\\Users\\TBONG KHMOM\\Desktop\\rating.csv")

In [59]:
df = pd.merge(movie,rating, how="inner", on="movieId")
df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3,4.0,1999-12-11 13:36:47
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,6,5.0,1997-03-13 17:50:52
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,8,4.0,1996-06-05 13:37:51
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,10,4.0,1999-11-25 02:44:47
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,11,4.5,2009-01-02 01:13:41


In [60]:
df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3,4.0,1999-12-11 13:36:47
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,6,5.0,1997-03-13 17:50:52
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,8,4.0,1996-06-05 13:37:51
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,10,4.0,1999-11-25 02:44:47
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,11,4.5,2009-01-02 01:13:41


In [61]:
movie_ids = [130219, 356, 4422, 541]

movies = ["The Dark Knight (2011)",
          "Cries and Whispers (Viskningar och rop) (1972)",
          "Forrest Gump (1994)",
          "Blade Runner (1982)"]

In [62]:
sample_df = df[df.movieId.isin(movie_ids)]

sample_df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
2457839,356,Forrest Gump (1994),Comedy|Drama|Romance|War,4,4.0,1996-08-24 09:28:42
2457840,356,Forrest Gump (1994),Comedy|Drama|Romance|War,7,4.0,2002-01-16 19:02:55
2457841,356,Forrest Gump (1994),Comedy|Drama|Romance|War,8,5.0,1996-06-05 13:44:19
2457842,356,Forrest Gump (1994),Comedy|Drama|Romance|War,9,4.0,2001-07-01 20:26:38
2457843,356,Forrest Gump (1994),Comedy|Drama|Romance|War,10,3.0,1999-11-25 02:32:02


In [63]:
sample_df.shape

(97343, 6)

In [64]:
user_movie_df = sample_df.pivot_table(index=["userId"],
                                      columns=["title"],
                                      values="rating")


In [65]:
user_movie_df.shape

(76918, 4)

In [66]:
reader = Reader(rating_scale=(1, 5))

In [67]:
data = Dataset.load_from_df(sample_df[['userId',
                                       'movieId',
                                       'rating']], reader)

Modelling

In [68]:
trainset, testset = train_test_split(data, test_size=.25)
svd_model = SVD()
svd_model.fit(trainset)
predictions = svd_model.test(testset)

In [69]:
accuracy.rmse(predictions)

RMSE: 0.9399


0.9398844030904142

In [70]:
svd_model.predict(uid=1.0, iid=541, verbose=True)

user: 1.0        item: 541        r_ui = None   est = 4.16   {'was_impossible': False}


Prediction(uid=1.0, iid=541, r_ui=None, est=4.163595706436809, details={'was_impossible': False})

In [71]:
svd_model.predict(uid=1.0, iid=356, verbose=True)

user: 1.0        item: 356        r_ui = None   est = 3.90   {'was_impossible': False}


Prediction(uid=1.0, iid=356, r_ui=None, est=3.897333439850477, details={'was_impossible': False})

In [72]:
sample_df[sample_df["userId"] == 1]

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
3612352,541,Blade Runner (1982),Action|Sci-Fi|Thriller,1,4.0,2005-04-02 23:30:03


Model Tuning

In [73]:
param_grid = {'n_epochs': [5, 10, 20],
              'lr_all': [0.002, 0.005, 0.007]}

In [74]:
gs = GridSearchCV(SVD,
                  param_grid,
                  measures=['rmse', 'mae'],
                  cv=3,
                  n_jobs=-1,
                  joblib_verbose=True)

In [75]:
gs.fit(data)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:   31.6s finished


In [76]:
gs.best_score['rmse']

0.9338307488395663

In [77]:
gs.best_params['rmse']

{'n_epochs': 10, 'lr_all': 0.002}

Predict

In [78]:
svd_model.n_epochs

20

In [79]:
svd_model = SVD(**gs.best_params['rmse'])

In [80]:
data = data.build_full_trainset()

In [81]:
svd_model.fit(data)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2867330f6a0>

In [82]:
svd_model.predict(uid=1.0, iid=541, verbose=True)

user: 1.0        item: 541        r_ui = None   est = 4.21   {'was_impossible': False}


Prediction(uid=1.0, iid=541, r_ui=None, est=4.211488322546722, details={'was_impossible': False})

In [83]:
print(type(movie_ids))

<class 'list'>


In [84]:
movie_ids

[130219, 356, 4422, 541]

In [85]:
movie_ids = [(1, 'Title1'), (2, 'Title2'), (3, 'Title3'), (4, 'Title4')]

In [86]:
movie_ids_df = pd.DataFrame(movie_ids, columns=["movieId", "title"])

In [87]:
movie_ids = [[1, 'Title1'], [2, 'Title2'], [3, 'Title3'], [4, 'Title4']]
movie_ids_df = pd.DataFrame(movie_ids, columns=["movieId", "title"])

In [88]:
def suggest(df,user_id,sug):
    
    didnt_watch = df["movieId"][~(df["userId"] == user_id)].drop_duplicates().values.tolist()
    temp_dict={}
    
    for i in didnt_watch:
        
        temp_dict[i] = svd_model.predict(uid=user_id, iid=i)[3]
        
    suggestions = pd.DataFrame(temp_dict.items(),columns=["movieId",'possible_rate']).sort_values(by="possible_rate", ascending=False).head(sug)
    merged = pd.merge(suggestions,movies_df[["movieId","title"]], how="inner", on="movieId")
    
    return merged


In [89]:
suggest(df,21,15).sort_values(by="title", ascending=False)

Unnamed: 0,movieId,possible_rate,title
1,89722,4.078319,Visions of Suffering
0,541,4.283344,The Man with the Golden Arm
2,89720,4.078319,Kiss of the Tarantula
