In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
netflix = pd.read_csv("/content/drive/MyDrive/info 577/Data/netflix/netflix_titles.csv")
netflix.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",2020,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",2016,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",2011,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",2009,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",2008,PG-13,123 min,Dramas,A brilliant group of students become card-coun...


# **Recommendation System(based content)**

In [3]:
tf_idf = TfidfVectorizer(stop_words='english')
netflix['description'] = netflix['description'].fillna('')
tfidf_mat = tf_idf.fit_transform(netflix['description'])
tfidf_mat.shape

(7787, 17905)

In [4]:
cos_sim = linear_kernel(tfidf_mat,tfidf_mat)
indice = pd.Series(netflix.index, index = netflix['title']).drop_duplicates()

In [5]:
indice

title
3%                                            0
7:19                                          1
23:59                                         2
9                                             3
21                                            4
                                           ... 
Zozo                                       7782
Zubaan                                     7783
Zulu Man in Japan                          7784
Zumbo's Just Desserts                      7785
ZZ TOP: THAT LITTLE OL' BAND FROM TEXAS    7786
Length: 7787, dtype: int64

In [6]:
def get_recommendations(title, cos_sim=cos_sim):
    idx = indice[title]
    sim_scores = list(enumerate(cos_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return netflix['title'].iloc[movie_indices]

In [7]:
get_recommendations('War')

5651                            Skiptrace
7280                           Twin Peaks
202                      A Man Called God
4636                           One 2 Ka 4
6293                     The Devil Inside
6204                             The Cell
2594    Handsome: A Netflix Mystery Movie
6035                                 Tezz
2295                 From Paris with Love
3542                         Last Knights
Name: title, dtype: object

In [8]:
get_recommendations('Peaky Blinders')

4692                    Our Godfather
4358                   My Stupid Boss
1807                              Don
6344                         The Fear
3219    Jonathan Strange & Mr Norrell
4953                Power Rangers Zeo
6783                       The Prison
6950                       The Tudors
6236                    The Con Is On
6585     The Legend of Michael Mishra
Name: title, dtype: object

In [9]:
get_recommendations('PK')

7321                    Unbroken
4045       Merku Thodarchi Malai
3164              Jhansi Ki Rani
165           A Clockwork Orange
5261                        ROMA
2627    Harishchandrachi Factory
1940          Ek Main Aur Ekk Tu
888      Bhavesh Joshi Superhero
6412                The Governor
6377           The Frozen Ground
Name: title, dtype: object

# **Recommendation System(Collaborative Filtering(model based))**

In [10]:
pip install scikit-surprise

Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/97/37/5d334adaf5ddd65da99fc65f6507e0e4599d092ba048f4302fe8775619e8/scikit-surprise-1.1.1.tar.gz (11.8MB)
[K     |████████████████████████████████| 11.8MB 292kB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1617561 sha256=0d0721425dae22d2cba2b40db55e88306abdfcf06d9326460ff5a649a0288f50
  Stored in directory: /root/.cache/pip/wheels/78/9c/3d/41b419c9d2aff5b6e2b4c0fc8d25c538202834058f9ed110d0
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.1


In [11]:
from surprise import Reader, Dataset, SVD, KNNBasic, NMF
from surprise.model_selection import cross_validate

In [12]:
ratings = pd.read_csv('/content/drive/MyDrive/info 577/Data/ratings/ratings_small.csv', usecols=[0,1,2])
ratings.head(10)

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0
5,1,1263,2.0
6,1,1287,2.0
7,1,1293,2.0
8,1,1339,3.5
9,1,1343,2.0


In [13]:
movies = pd.read_csv('/content/drive/MyDrive/info 577/Data/ratings/movie.csv')
movies.set_index('movieId', inplace = True)
movies.head()

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy


In [14]:
# getting full dataset
reader = Reader()
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

In [15]:
#SVD
svd = SVD()
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8968  0.9020  0.8940  0.9014  0.8882  0.8965  0.0051  
MAE (testset)     0.6899  0.6937  0.6887  0.6901  0.6869  0.6899  0.0022  
Fit time          5.13    5.06    5.01    4.98    5.08    5.05    0.05    
Test time         0.20    0.29    0.25    0.28    0.18    0.24    0.04    


{'fit_time': (5.133920192718506,
  5.062976360321045,
  5.010285139083862,
  4.98326849937439,
  5.080903053283691),
 'test_mae': array([0.68989997, 0.69365375, 0.68872322, 0.69012475, 0.68692912]),
 'test_rmse': array([0.89679549, 0.90198554, 0.89396138, 0.90136434, 0.88816386]),
 'test_time': (0.19567012786865234,
  0.292844295501709,
  0.25478434562683105,
  0.27516770362854004,
  0.1817314624786377)}

In [None]:
#KNN
knn = KNNBasic()
cross_validate(knn, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9632  0.9669  0.9613  0.9688  0.9664  0.9653  0.0027  
MAE (testset)     0.7390  0.7402  0.7414  0.7452  0.7441  0.7420  0.0023  
Fit time          0.15    0.18    0.18    0.20    0.18    0.18    0.02    
Test time         1.88    1.83    1.77    1.89    1.78    1.83    0.05    


{'fit_time': (0.1516118049621582,
  0.18146896362304688,
  0.17958903312683105,
  0.20258569717407227,
  0.18217182159423828),
 'test_mae': array([0.73900752, 0.74015861, 0.74137091, 0.74523777, 0.7440768 ]),
 'test_rmse': array([0.96324073, 0.96692464, 0.96132247, 0.9687639 , 0.96638192]),
 'test_time': (1.8784379959106445,
  1.8264453411102295,
  1.7747266292572021,
  1.8938186168670654,
  1.7836592197418213)}

In [None]:
#NMF
nmf = NMF()
cross_validate(nmf, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9511  0.9407  0.9501  0.9466  0.9395  0.9456  0.0048  
MAE (testset)     0.7333  0.7229  0.7286  0.7256  0.7208  0.7262  0.0044  
Fit time          6.59    6.69    6.57    6.70    6.66    6.64    0.05    
Test time         0.25    0.13    0.15    0.30    0.13    0.19    0.07    


{'fit_time': (6.58836030960083,
  6.690783739089966,
  6.574781179428101,
  6.702477216720581,
  6.661552429199219),
 'test_mae': array([0.7332946 , 0.72287657, 0.72863819, 0.72561564, 0.72079004]),
 'test_rmse': array([0.95114848, 0.94066318, 0.95014025, 0.94661104, 0.9394931 ]),
 'test_time': (0.24692034721374512,
  0.1340622901916504,
  0.14641165733337402,
  0.2990994453430176,
  0.13310003280639648)}

In [16]:
df_1 = ratings[(ratings['userId'] == 1) & (ratings['rating'] == 4)]
df_1 = df_1.set_index('movieId')
df_1 = df_1.join(movies)['title']
df_1

movieId
1172    Cinema Paradiso (Nuovo cinema Paradiso) (1989)
1953                     French Connection, The (1971)
2105                                       Tron (1982)
Name: title, dtype: object

In [17]:
user_1 = movies.copy()
user_1 = user_1.reset_index()

trainset = data.build_full_trainset()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f94cc504f90>

In [18]:
user_1['Estimate_Score'] = user_1['movieId'].apply(lambda x: svd.predict(1, x).est)
user_1 = user_1.drop('movieId', axis = 1)
user_1 = user_1.sort_values('Estimate_Score', ascending=False)
print(user_1.head(10))

                                            title  ... Estimate_Score
843                         Godfather, The (1972)  ...       3.617659
3373                          Modern Times (1936)  ...       3.598688
3001                                Harvey (1950)  ...       3.585312
1195               Godfather: Part II, The (1974)  ...       3.571528
952                     African Queen, The (1951)  ...       3.549166
909                          All About Eve (1950)  ...       3.549065
2233                             Happiness (1998)  ...       3.541815
229   Eat Drink Man Woman (Yin shi nan nu) (1994)  ...       3.522430
1248                        Cool Hand Luke (1967)  ...       3.495233
1172                   Princess Bride, The (1987)  ...       3.480248

[10 rows x 3 columns]
