In [48]:
import pandas as pd
import numpy as np
from ast import literal_eval

In [49]:
df = pd.read_csv('/home/Machine Learning/projects/developia-movie-recommendation/dataset/movie_dataset.csv')

In [50]:
df.columns

Index(['_id', 'url', 'title', 'description', 'genres', 'runtime',
       'runtime_str', 'release_date', 'langs', 'origins',
       'production_companies', 'top_cast', 'director_creator', 'rate', 'type',
       'img_url', 'keywords'],
      dtype='object')

In [51]:
#insert user_id column to the df and fill it with random values between 1 and 300
df.insert(1, 'user_id', np.random.randint(1, 300, df.shape[0]))
df.columns

Index(['_id', 'user_id', 'url', 'title', 'description', 'genres', 'runtime',
       'runtime_str', 'release_date', 'langs', 'origins',
       'production_companies', 'top_cast', 'director_creator', 'rate', 'type',
       'img_url', 'keywords'],
      dtype='object')

In [52]:
df.head(3)

Unnamed: 0,_id,user_id,url,title,description,genres,runtime,runtime_str,release_date,langs,origins,production_companies,top_cast,director_creator,rate,type,img_url,keywords
0,tt5315210,145,https://www.imdb.com/title/tt5315210,Lou,A storm rages. A young girl is kidnapped. Her ...,"['Action', 'Crime', 'Drama']",107.0,1 hour 47 minutes,23-09-22,"['English', 'Ukrainian']",['United States'],['Bad Robot'],"[{'actor': 'Allison Janney', 'character': 'Lou...",['Anna Foerster'],6.1,movie,https://m.media-amazon.com/images/M/MV5BYzdjMD...,
1,tt12593682,11,https://www.imdb.com/title/tt12593682,Bullet Train,Five assassins aboard a swiftly-moving bullet ...,"['Action', 'Comedy', 'Thriller']",127.0,2 hours 7 minutes,05-08-22,"['English', 'Japanese', 'Spanish', 'Russian']","['United States', 'Japan']","['87North', 'CTB Inc.', 'Hill District Media']","[{'actor': 'Brad Pitt', 'character': 'Ladybug'...",['David Leitch'],7.4,movie,https://m.media-amazon.com/images/M/MV5BZTNmYj...,"['train', 'japan', 'assassin', 'revenge', 'mou..."
2,tt10648342,259,https://www.imdb.com/title/tt10648342,Thor: Love and Thunder,"Thor enlists the help of Valkyrie, Korg and ex...","['Action', 'Adventure', 'Comedy']",118.0,1 hour 58 minutes,08-07-22,['English'],"['Australia', 'United States']","['Marvel Studios', 'Fox Studios Australia', 'W...","[{'actor': 'Chris Hemsworth', 'character': 'Th...",['Taika Waititi'],6.4,movie,https://m.media-amazon.com/images/M/MV5BYmQ3MW...,"['superhero action', 'superhero', 'marvel comi..."


In [53]:
#find the number of duplicate users
df['user_id'].value_counts()

33     39
219    36
178    36
108    36
25     35
       ..
78     16
286    16
90     15
156    15
50     13
Name: user_id, Length: 299, dtype: int64

In [54]:
#create pivot table with column movie_id and index is user_id and fill NAs with 0
pt_df = df.pivot_table(index='user_id', columns='_id', values='rate').fillna(0)

In [55]:
#convert pivot table to CSR matrix
from scipy.sparse import csr_matrix # converted to csr matrix because of using making matrix calculations easily
csr_df = csr_matrix(pt_df.values)

In [56]:
#import svds
from locale import normalize
from scipy.sparse.linalg import svds

def normalize(X_pred):
    #normalize the prediction ratings
    X_pred = (X_pred - X_pred.min()) / (X_pred.max() - X_pred.min())
    return X_pred

def generate_prediction_df(csr_df, pt_df, n_factors):

    #matrix factorization from csr_df
    u, s, vt = svds(csr_df, k = n_factors)
    s_diag_matrix = np.diag(s)

    #calculate prediction ratings
    X_pred = np.dot(np.dot(u, s_diag_matrix), vt)
    X_pred = normalize(X_pred)

    #convert prediction ratings to dataframe
    pred_df = pd.DataFrame(X_pred, columns = pt_df.columns, index = list(pt_df.index)).transpose()
    
    return pred_df


def recommend_movies(pred_df, user_id, n_recs):
    user_preds = pred_df[user_id].sort_values(ascending=False).reset_index().rename(columns={user_id: 'similarity'})
    recommended_df = user_preds.sort_values(by='similarity', ascending=False).head(n_recs)

In [57]:
pred_df = generate_prediction_df(csr_df, pt_df, 10)

pred_df.head(3)

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,290,291,292,293,294,295,296,297,298,299
_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
tt0003740,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,...,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14
tt0004972,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,...,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14
tt0006864,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,...,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14,8.999482e-14


In [58]:
print(recommend_movies(pred_df, user_id=94, n_recs=10))

None


# Collaborative Filtering

In [59]:
df.head(3)

Unnamed: 0,_id,user_id,url,title,description,genres,runtime,runtime_str,release_date,langs,origins,production_companies,top_cast,director_creator,rate,type,img_url,keywords
0,tt5315210,145,https://www.imdb.com/title/tt5315210,Lou,A storm rages. A young girl is kidnapped. Her ...,"['Action', 'Crime', 'Drama']",107.0,1 hour 47 minutes,23-09-22,"['English', 'Ukrainian']",['United States'],['Bad Robot'],"[{'actor': 'Allison Janney', 'character': 'Lou...",['Anna Foerster'],6.1,movie,https://m.media-amazon.com/images/M/MV5BYzdjMD...,
1,tt12593682,11,https://www.imdb.com/title/tt12593682,Bullet Train,Five assassins aboard a swiftly-moving bullet ...,"['Action', 'Comedy', 'Thriller']",127.0,2 hours 7 minutes,05-08-22,"['English', 'Japanese', 'Spanish', 'Russian']","['United States', 'Japan']","['87North', 'CTB Inc.', 'Hill District Media']","[{'actor': 'Brad Pitt', 'character': 'Ladybug'...",['David Leitch'],7.4,movie,https://m.media-amazon.com/images/M/MV5BZTNmYj...,"['train', 'japan', 'assassin', 'revenge', 'mou..."
2,tt10648342,259,https://www.imdb.com/title/tt10648342,Thor: Love and Thunder,"Thor enlists the help of Valkyrie, Korg and ex...","['Action', 'Adventure', 'Comedy']",118.0,1 hour 58 minutes,08-07-22,['English'],"['Australia', 'United States']","['Marvel Studios', 'Fox Studios Australia', 'W...","[{'actor': 'Chris Hemsworth', 'character': 'Th...",['Taika Waititi'],6.4,movie,https://m.media-amazon.com/images/M/MV5BYmQ3MW...,"['superhero action', 'superhero', 'marvel comi..."


In [60]:
from surprise import Reader

reader = Reader()

In [61]:
c_movies = df[['user_id', '_id', 'rate']]
# sort the dataset by user_id and copy
c_movies = c_movies.sort_values(by='user_id').copy()
c_movies.head(3)

Unnamed: 0,user_id,_id,rate
1174,1,tt0947798,8.0
7311,1,tt13446128,6.9
2335,1,tt0036775,8.3


In [62]:
#import Dataset and KFold
from surprise import Dataset
from surprise.model_selection import cross_validate, KFold

data = Dataset.load_from_df(c_movies[['_id', 'user_id', 'rate']], reader)
#split data into 5 folds for cross validation
kf = KFold(n_splits=5)
kf.split(data)


print(kf)

<surprise.model_selection.split.KFold object at 0x7f37d91a0670>


In [63]:
#import SVD
from surprise import SVD

svd = SVD()
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    nan     nan     nan     nan     nan     nan     nan     
MAE (testset)     nan     nan     nan     nan     nan     nan     nan     
Fit time          0.53    0.51    0.63    0.50    0.45    0.52    0.06    
Test time         0.06    0.08    0.08    0.06    0.06    0.07    0.01    


{'test_rmse': array([nan, nan, nan, nan, nan]),
 'test_mae': array([nan, nan, nan, nan, nan]),
 'fit_time': (0.5284154415130615,
  0.509331226348877,
  0.6255173683166504,
  0.5022017955780029,
  0.45191311836242676),
 'test_time': (0.06365442276000977,
  0.08333444595336914,
  0.08081531524658203,
  0.05887889862060547,
  0.05623960494995117)}

In [64]:
trainset = data.build_full_trainset()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f37d91a1730>

In [65]:
#get the greatest rate value for user 1
#df[df['user_id'] == 1]['rate'].max()
df[df['user_id'] == 1]

Unnamed: 0,_id,user_id,url,title,description,genres,runtime,runtime_str,release_date,langs,origins,production_companies,top_cast,director_creator,rate,type,img_url,keywords
157,tt0416449,1,https://www.imdb.com/title/tt0416449,300,King Leonidas of Sparta and a force of 300 men...,"['Action', 'Drama']",117.0,1 hour 57 minutes,16-03-07,['English'],"['United States', 'Canada', 'Bulgaria']","['Warner Bros.', 'Legendary Entertainment', 'V...","[{'actor': 'Gerard Butler', 'character': 'King...",['Zack Snyder'],7.6,movie,https://m.media-amazon.com/images/M/MV5BNWMxYT...,"['spartan', 'battle', 'warrior', 'greece', 'ba..."
338,tt0117060,1,https://www.imdb.com/title/tt0117060,Mission: Impossible,"An American agent, under false suspicion of di...","['Action', 'Adventure', 'Thriller']",110.0,1 hour 50 minutes,20-09-96,"['English', 'French', 'Czech']",['United States'],"['Paramount Pictures', 'Cruise/Wagner Producti...","[{'actor': 'Tom Cruise', 'character': 'Ethan H...",['Brian De Palma'],7.1,movie,https://m.media-amazon.com/images/M/MV5BMTc3Nj...,"['train', 'betrayal', 'murder', 'based on tv s..."
790,tt0089489,1,https://www.imdb.com/title/tt0089489,Lifeforce,A race of space vampires arrives in London and...,"['Action', 'Horror', 'Mystery']",101.0,1 hour 41 minutes,,['English'],['United Kingdom'],"['Easedram', 'London-Cannon Films']","[{'actor': 'Steve Railsback', 'character': 'Ca...",['Tobe Hooper'],6.1,movie,https://m.media-amazon.com/images/M/MV5BMTU4MT...,"['alien', 'space vampire', 'zombie', 'alien in..."
1174,tt0947798,1,https://www.imdb.com/title/tt0947798,Black Swan,A committed dancer struggles to maintain her s...,"['Drama', 'Thriller']",108.0,1 hour 48 minutes,25-02-11,"['English', 'French', 'Italian']",['United States'],"['Searchlight Pictures', 'Cross Creek Pictures...","[{'actor': 'Natalie Portman', 'character': 'Ni...",['Darren Aronofsky'],8.0,movie,https://m.media-amazon.com/images/M/MV5BNzY2Nz...,"['ballerina', 'ballet', 'female protagonist', ..."
1379,tt11847410,1,https://www.imdb.com/title/tt11847410,The Fallout,High schooler Vada navigates the emotional fal...,['Drama'],96.0,1 hour 36 minutes,27-01-22,['English'],['United States'],"['SSS Entertainment', 'Good Pals', 'SSS Film C...","[{'actor': 'Jenna Ortega', 'character': 'Vada ...",['Megan Park'],7.0,movie,https://m.media-amazon.com/images/M/MV5BMjM2MT...,"['high school', 'lesbian', 'friendship', 'frie..."
1680,tt9357050,1,https://www.imdb.com/title/tt9357050,Dear Evan Hansen,Film adaptation of the Tony and Grammy Award-w...,"['Drama', 'Musical']",137.0,2 hours 17 minutes,24-09-21,['English'],"['United States', 'Japan', 'China']","['Marc Platt Productions', 'Perfect World Pict...","[{'actor': 'Ben Platt', 'character': 'Evan Han...",['Stephen Chbosky'],6.1,movie,https://m.media-amazon.com/images/M/MV5BYzhhM2...,"['based on broadway musical', 'social anxiety ..."
1786,tt1282140,1,https://www.imdb.com/title/tt1282140,Easy A,A clean-cut high school student relies on the ...,"['Comedy', 'Drama', 'Romance']",92.0,1 hour 32 minutes,17-09-21,['English'],['United States'],"['Screen Gems', 'Olive Bridge Entertainment']","[{'actor': 'Emma Stone', 'character': 'Olive',...",['Will Gluck'],7.0,movie,https://m.media-amazon.com/images/M/MV5BMjE1Mz...,"['high school', 'christian fanatic', 'jealousy..."
2335,tt0036775,1,https://www.imdb.com/title/tt0036775,Double Indemnity,A Los Angeles insurance representative lets an...,"['Crime', 'Drama', 'Film-Noir']",107.0,1 hour 47 minutes,06-09-44,['English'],['United States'],['Paramount Pictures'],"[{'actor': 'Fred MacMurray', 'character': 'Wal...",['Billy Wilder'],8.3,movie,https://m.media-amazon.com/images/M/MV5BOTdlNj...,"['insurance investigation', 'murder disguised ..."
2459,tt5710514,1,https://www.imdb.com/title/tt5710514,I don't feel at home in this world anymore.,"When a depressed woman is burgled, she finds a...","['Comedy', 'Crime', 'Drama']",93.0,1 hour 33 minutes,24-02-17,['English'],['United States'],"['Film Science', 'XYZ Films']","[{'actor': 'Melanie Lynskey', 'character': 'Ru...",['Macon Blair'],6.9,movie,https://m.media-amazon.com/images/M/MV5BYmM5Zj...,"['revenge', 'vigilante', 'burglary', 'depressi..."
2525,tt3647498,1,https://www.imdb.com/title/tt3647498,Blood Father,An ex-con reunites with his estranged wayward ...,"['Crime', 'Drama', 'Thriller']",88.0,1 hour 28 minutes,02-09-16,"['English', 'Spanish']",['France'],"['Lionsgate', 'Why Not Productions', 'Wild Bun...","[{'actor': 'Mel Gibson', 'character': 'Link', ...",['Jean-François Richet'],6.4,movie,https://m.media-amazon.com/images/M/MV5BODQwYW...,"['father daughter relationship', 'ex convict',..."


In [66]:
#predict the rating for user_id 94 for all movies
svd.predict(1, "tt5315210", 5)

Prediction(uid=1, iid='tt5315210', r_ui=5, est=5, details={'was_impossible': False})

In [67]:
#get the all predicted for user_id 94
for movie_id in df['_id'].unique():
    print(svd.predict(1, movie_id, 3))

user: 1          item: tt5315210  r_ui = 3.00   est = 5.00   {'was_impossible': False}
user: 1          item: tt12593682 r_ui = 3.00   est = 5.00   {'was_impossible': False}
user: 1          item: tt10648342 r_ui = 3.00   est = 5.00   {'was_impossible': False}
user: 1          item: tt1745960  r_ui = 3.00   est = 5.00   {'was_impossible': False}
user: 1          item: tt1630029  r_ui = 3.00   est = 5.00   {'was_impossible': False}
user: 1          item: tt8093700  r_ui = 3.00   est = 5.00   {'was_impossible': False}
user: 1          item: tt15445056 r_ui = 3.00   est = 5.00   {'was_impossible': False}
user: 1          item: tt0499549  r_ui = 3.00   est = 5.00   {'was_impossible': False}
user: 1          item: tt14641788 r_ui = 3.00   est = 5.00   {'was_impossible': False}
user: 1          item: tt8912936  r_ui = 3.00   est = 5.00   {'was_impossible': False}
user: 1          item: tt6710474  r_ui = 3.00   est = 5.00   {'was_impossible': False}
user: 1          item: tt13131350 r_ui = 3.

In [68]:
#ge the movie tt5315210
df[df['_id'] == "tt5315210"]

Unnamed: 0,_id,user_id,url,title,description,genres,runtime,runtime_str,release_date,langs,origins,production_companies,top_cast,director_creator,rate,type,img_url,keywords
0,tt5315210,145,https://www.imdb.com/title/tt5315210,Lou,A storm rages. A young girl is kidnapped. Her ...,"['Action', 'Crime', 'Drama']",107.0,1 hour 47 minutes,23-09-22,"['English', 'Ukrainian']",['United States'],['Bad Robot'],"[{'actor': 'Allison Janney', 'character': 'Lou...",['Anna Foerster'],6.1,movie,https://m.media-amazon.com/images/M/MV5BYzdjMD...,


In [69]:
#get the most rated movies for user_id 94 after prediction
df[df['user_id'] == 94].sort_values(by='rate', ascending=False).head(10)

Unnamed: 0,_id,user_id,url,title,description,genres,runtime,runtime_str,release_date,langs,origins,production_companies,top_cast,director_creator,rate,type,img_url,keywords
3809,tt0046250,94,https://www.imdb.com/title/tt0046250,Roman Holiday,A bored and sheltered princess escapes her gua...,"['Comedy', 'Romance']",118.0,1 hour 58 minutes,02-12-54,"['English', 'Italian', 'German']",['United States'],['Paramount Pictures'],"[{'actor': 'Gregory Peck', 'character': 'Joe B...",['William Wyler'],8.0,movie,https://m.media-amazon.com/images/M/MV5BMTE2MD...,"['doctor', 'taxi', 'italy', 'american in italy..."
2011,tt0315733,94,https://www.imdb.com/title/tt0315733,21 Grams,A freak accident brings together a critically ...,"['Crime', 'Drama', 'Thriller']",124.0,2 hours 4 minutes,21-05-04,['English'],['United States'],"['This Is That Productions', 'Y Productions', ...","[{'actor': 'Sean Penn', 'character': 'Paul Riv...",['Alejandro G. Iñárritu'],7.6,movie,https://m.media-amazon.com/images/M/MV5BMjA4Mj...,"['sex in bed', 'heart donor', 'unfaithfulness'..."
5778,tt0039941,94,https://www.imdb.com/title/tt0039941,The Unsuspected,The secretary of an affably suave radio myster...,"['Drama', 'Film-Noir', 'Mystery']",103.0,1 hour 43 minutes,03-11-49,['English'],['United States'],"['Michael Curtiz Productions', 'Warner Bros.']","[{'actor': 'Claude Rains', 'character': 'Victo...",['Michael Curtiz'],7.2,movie,https://m.media-amazon.com/images/M/MV5BMDY0MT...,
462,tt1228705,94,https://www.imdb.com/title/tt1228705,Iron Man 2,With the world now aware of his identity as Ir...,"['Action', 'Adventure', 'Sci-Fi']",124.0,2 hours 4 minutes,07-05-10,"['English', 'French', 'Russian']",['United States'],"['Paramount Pictures', 'Marvel Entertainment',...","[{'actor': 'Robert Downey Jr.', 'character': '...",['Jon Favreau'],6.9,movie,https://m.media-amazon.com/images/M/MV5BMTM0MD...,"['superhero', 'billionaire', 'power suit', 'ba..."
3174,tt0104070,94,https://www.imdb.com/title/tt0104070,Death Becomes Her,When a fading actress learns of an immortality...,"['Comedy', 'Fantasy', 'Horror']",104.0,1 hour 44 minutes,25-12-92,['English'],['United States'],['Universal Pictures'],"[{'actor': 'Meryl Streep', 'character': 'Madel...",['Robert Zemeckis'],6.6,movie,https://m.media-amazon.com/images/M/MV5BNzM2Yj...,"['egocentric woman', 'frenemy', 'betrayal', 'e..."
7054,tt7868082,94,https://www.imdb.com/title/tt7868082,Charming the Hearts of Men,A romantic drama set during the politically ch...,"['Drama', 'History', 'Romance']",107.0,1 hour 47 minutes,13-08-21,['English'],['United States'],['High Hopes Productions'],"[{'actor': 'Anna Friel', 'character': 'Grace G...",['S.E. DeRose'],6.4,movie,https://m.media-amazon.com/images/M/MV5BNmI2NT...,
3013,tt3874544,94,https://www.imdb.com/title/tt3874544,The Boss Baby,"A suit-wearing, briefcase-carrying baby pairs ...","['Animation', 'Adventure', 'Comedy']",97.0,1 hour 37 minutes,31-03-17,"['English', 'Spanish']",['United States'],['DreamWorks Animation'],"[{'actor': 'Alec Baldwin', 'character': 'Boss ...",['Tom McGrath'],6.3,movie,https://m.media-amazon.com/images/M/MV5BYTkxZm...,"['baby', 'brother brother relationship', 'talk..."
4835,tt0086508,94,https://www.imdb.com/title/tt0086508,Uncommon Valor,Ten years after his son went M.I.A. in Vietnam...,"['Action', 'Drama', 'Thriller']",105.0,1 hour 45 minutes,16-12-83,['English'],['United States'],"['Paramount Pictures', 'Milius-Feitshans']","[{'actor': 'Gene Hackman', 'character': 'Col. ...",['Ted Kotcheff'],6.3,movie,https://m.media-amazon.com/images/M/MV5BNzE4Zj...,"['post traumatic stress disorder', 'vietnam', ..."
4060,tt3850590,94,https://www.imdb.com/title/tt3850590,Krampus,A boy who has a bad Christmas accidentally sum...,"['Comedy', 'Drama', 'Fantasy']",98.0,1 hour 38 minutes,04-12-15,"['English', 'German']",['United States'],"['Legendary Entertainment', 'Universal Picture...","[{'actor': 'Adam Scott', 'character': 'Tom', '...",['Michael Dougherty'],6.2,movie,https://m.media-amazon.com/images/M/MV5BMjk0Mj...,"['christmas', 'folklore', 'christmas horror', ..."
4038,tt0101775,94,https://www.imdb.com/title/tt0101775,Drop Dead Fred,A young woman finds her already unstable life ...,"['Comedy', 'Drama', 'Fantasy']",103.0,1 hour 43 minutes,19-03-93,['English'],"['United States', 'United Kingdom']","['PolyGram Filmed Entertainment', 'Working Tit...","[{'actor': 'Phoebe Cates', 'character': 'Eliza...",['Ate de Jong'],5.9,movie,https://m.media-amazon.com/images/M/MV5BMDNkZW...,"['imaginary friend', 'emotional manipulation',..."
