# ***Movie Recommender System***

It is the combination of both Content Based Filtering and as well as Collaberative based Filtering.

This is ***consolidated form*** of all the EDA for our recommendation system.

***`This is how it will work:`***

    Input: 
Pass any of the below parameters in to get 

1.   User ID
2.   The Genre of Movie
3.   The Title of a Movie
4.   Top N number of Movies



    Output: 
Similar movies sorted on the basis of expected ratings by that particular user.

In [None]:
# importing basic packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
! pip install numpy
! pip install scikit-surprise
#Simple Python RecommendatIon System Engine (SURPRISE)
from surprise import Reader, SVD, Dataset
from surprise.model_selection import cross_validate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# ***Cosine Similarity***

In [None]:
def cos_sim(vct_list):
  #vectors
  tfidfvectors= TfidfVectorizer(stop_words='english', ngram_range=(1, 2)) # stop_words='english' ---> remove the extremely common words in english 
                                                                          #like ‘this’, ’is’, ’are’ etc 
  vectors= tfidfvectors.fit_transform(vct_list)

  #cosine similarity  
  cos_sim= cosine_similarity(vectors, vectors)
  cos_sim_df= pd.DataFrame(data=cos_sim)
  
  return cos_sim_df

# ***Content Based Recommendation System***

In [None]:
def cbr_recommender(title, df, cos_sim_df): 
  #title--> rec. syst  ,  df --> smd, cos_sim_df --> temp_smd['feature_classification'], top_n will be from rc. sytem 
  #all of the above will be pass in rec. system
  df2=df.copy()
  df2 = df2.reset_index()
  titles = df2['title']
  indices = pd.Series(df2.index, index=df2['title']) #find index from title

  idx= indices[title]

  sim_scores= list(enumerate(cos_sim_df[idx]))
  sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
  sim_scores = sim_scores[1:26] #removing the first one

  movie_sim_idx=[tup[0] for tup in sim_scores]

  movies_sim= df2.iloc[movie_sim_idx][['id','movieId','title', 'crew','cast', 'genres', 'year','vote_count', 'vote_average']]
  movies_sim['crew']= df2['crew'].apply(lambda x: list(set(x)))
  movies_sim['vote_count']= movies_sim[movies_sim.vote_count.notnull()]['vote_count'].astype('int')
  movies_sim['vote_average']= movies_sim[movies_sim.vote_average.notnull()]['vote_average'].astype('int')
  

  return movies_sim

# ***Collaberative Filtering User Based Recommendation System***

In [None]:
def type_convertor(x):
  try:
    return int(x)

  except:
    return np.nan


In [None]:
def collaberative_filter(userid, dataframe, random_param=40, k_folds=5):
  validation, algo= filtering_model()
  print(validation)
  
  indices=dataframe[['id','title','movieId']].set_index('id')
  
  #return indices
  #print(index_map)
  dataframe['est_ratings']= dataframe['id'].apply(lambda x: algo.predict(uid=userid, iid=indices.loc[x]['movieId']).est)
  
  dataframe=dataframe.sort_values('est_ratings', ascending=False)

  return dataframe

# ***Hybrid Recommendation System***

In [None]:
def hybrid_recommender(userid, hybd_title, hybd_df, hybd_cos_sim_df):
  cbr_recommend= cbr_recommender(title= hybd_title, df= hybd_df, cos_sim_df= hybd_cos_sim_df)
  #return cbr_recommend
  cbf_recommend= collaberative_filter(userid= userid, dataframe= cbr_recommend)

  hybd_recommend= cbf_recommend

  return hybd_recommend
  

# ***Word Stem Convertor***

In [None]:
def stem_convertor(words_list, word_freq_list=None):
  from nltk.stem.snowball import SnowballStemmer
  stemmer= SnowballStemmer('english')

  if word_freq_list is not None:
    words_list=words_list.apply(lambda x: [stemmer.stem(word) for word in x if word in word_freq_list])
  else:
    words_list=words_list.apply(lambda x: [stemmer.stem(word) for word in x])
  words_list= words_list.apply(lambda x: [word.replace(' ','').lower() for word in x])

  return words_list

# ***Weighted Ratings Based on Watch Performance***

In [None]:
def wr(dataframe, percentile=0.95):
  print(percentile)
  m= dataframe['vote_count'].quantile(percentile) #m is the minimum votes required to be listed in the chart. 
                                                  #Here we are considering 95th percentile as default
  C= dataframe['vote_average'].mean() #C is the mean vote across the whole report
  qualified= dataframe[(dataframe.vote_count >= m) & (dataframe.vote_count.notnull()) & dataframe.vote_average.notnull()]
  v= qualified['vote_count'] #v is the number of votes for the movie
  R= qualified['vote_average'] #R is the average rating of the movie
  
  wr= ((v/(v+m)) * R) + (m/(v+m) * C)
  qualified['wr'] =wr
  
  return qualified.sort_values('wr', ascending=False) 

# ***Pre-Processing and Data Wrangling***

In [None]:
def cleaned_data():
  md= pd.read_csv(r'/content/drive/MyDrive/DATA SCIENCE/Take Aways/7. Data Analysis And ML/Movie Recommender System/movies_metadata.csv')

  #reorganizing the table columns and eliminating 'original_title'
  md=md[['id','imdb_id','title','original_language','spoken_languages','tagline','homepage','genres','overview','belongs_to_collection',
         'adult','budget','status', 'release_date','revenue','runtime','popularity','poster_path','production_companies',
         'production_countries','video','vote_average','vote_count']]
  md.genres=md['genres'].fillna('[]').apply(literal_eval).apply(lambda x: [i['name'] for i in x]) #converting string to python list of genres
  md=md.drop([19730, 29503, 35587])
  md.id=md[md.id.notnull()]['id'].astype('int')
  md.vote_count=md[md.vote_count.notnull()].vote_count.astype('int')
  md.vote_average=md[md.vote_average.notnull()].vote_average.astype('int')
  md['year']=pd.to_datetime(md.release_date, errors='coerce').apply(lambda x: str(x).split('-')[0])

  meta_data= md

  #####################     Top Movies     #####################'
  top= md[['id','title', 'genres', 'year', 'popularity', 'vote_average', 'vote_count']]
  
  top_movies= top

  #####################     Top Genres     #####################
  s = top.apply(lambda x: pd.Series(x['genres']),axis=1).stack().reset_index(level=1, drop=True) #segregating each movies with individual genres
  s.name= 'genres'
  top_genres = top.drop(['genres'], axis=1).join(s)

  #####################     Top Similar Movies ---- Content based Recommender Or Metadata based Recommender     #####################
  links_sml=pd.read_csv(r'/content/drive/MyDrive/DATA SCIENCE/Take Aways/7. Data Analysis And ML/Movie Recommender System/links_small.csv')
  credits= pd.read_csv(r'/content/drive/MyDrive/DATA SCIENCE/Take Aways/7. Data Analysis And ML/Movie Recommender System/credits.csv')
  keywords= pd.read_csv(r'/content/drive/MyDrive/DATA SCIENCE/Take Aways/7. Data Analysis And ML/Movie Recommender System/keywords.csv')

  links_sml.movieId=links_sml.movieId.astype('int')
  links_sml.tmdbId=links_sml[links_sml.tmdbId.notnull()]['tmdbId'].astype('int')
  keywords['id']= keywords.id.astype('int')
  credits['id']= credits.id.astype('int')

  md= md.merge(credits, on='id').merge(keywords, on='id')
  smd= md[md.id.isin(links_sml.tmdbId)] #reducing the no. of records as for compute efficient
  #extracting cast (top 3), crew (only director), and keywords 
  smd['cast']= smd['cast'].apply(literal_eval).apply(lambda x: [cast['name'] for cast in x[:3]])
  smd['crew']= smd['crew'].apply(literal_eval).apply(lambda x: [crew['name'] for crew in x if crew['job']=='Director'])
  smd['keywords']= smd['keywords'].apply(literal_eval).apply(lambda x: [key['name'] for key in x])

  temp_smd=smd.copy()
  temp_smd['crew']=temp_smd['crew'].apply(lambda x: x*4) # NOTE: twiking a bit to give high priority to crew/directors
  #removing white spaces from cast and crew
  temp_smd['cast']=temp_smd['cast'].apply(lambda x: [cast.replace(' ','') for cast in x])
  temp_smd['crew']=temp_smd['crew'].apply(lambda x: [crew.replace(' ','') for crew in x])

  #keywords
  #converting the following to vertical(stack) columns with index as keywords and droping the level 1 i.e column index
  freq = temp_smd.apply(lambda x: pd.Series(x['keywords'], index=x['keywords']), axis=1).stack().reset_index(level=1, drop=True) 
  freq.name='kywrd'
  freq=freq.value_counts() #counting frequency 
  freq=freq[freq>2]  #Removed word occurrence less than 2

  temp_smd['keywords']=stem_convertor(temp_smd['keywords'],freq) #coverting same keywords to single stem

  #concatenation to single classification for vectors
  temp_smd['feature_classification']= temp_smd.crew + temp_smd.cast + temp_smd.keywords + temp_smd.genres 
  temp_smd['feature_classification']= temp_smd.feature_classification.apply(lambda x: ' '.join(x))

  
  #####################     collaberative filter based on user's ratings     #####################
  us_ratings= pd.read_csv(r'/content/drive/MyDrive/DATA SCIENCE/Take Aways/7. Data Analysis And ML/Movie Recommender System/ratings_small.csv')
  us_ratings.userId=us_ratings.userId.astype('int')
  us_ratings.movieId=us_ratings.movieId.astype('int')


  
  #top movies for users (with no specific content intrest)
  user_top_movies= top[['id','title', 'genres', 'year', 'popularity', 'vote_average', 'vote_count']] # passing through wr func.
  user_top_movies= links_sml.merge(user_top_movies, left_on= 'tmdbId', right_on='id', how='left')

  user_moviesIds=user_top_movies[['id','movieId','title']].set_index('id')
  #print(user_moviesIds)
  
  user_top_movies=user_top_movies[['id','movieId','title', 'genres', 'year','vote_count', 'vote_average']]
  #top movies for users (with specific genre intrest)
  user_top_genres= user_top_movies.drop(['genres'], axis=1).join(s)

  #top movies for users (with specific title intrest)
  user_top_titles=temp_smd[['id','title', 'genres', 'cast', 'crew', 'year', 'popularity', 'vote_average', 'vote_count','feature_classification']]
  user_top_titles['movieId']=user_top_titles['id'].apply(lambda x: user_moviesIds.loc[x].movieId)
  





  return {'metadata': meta_data, 'topmovies':top_movies, 'topgenres':top_genres, 'small_metadata':smd, 
          'feature_df': temp_smd, 'user_ratings':us_ratings, 'user_top_movies':user_top_movies, 'user_top_genres': user_top_genres,
          'user_top_titles': user_top_titles
          }


#cleaned_data()

In [None]:
new_data = cleaned_data() 

  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation:

# ***Model Training***

***Model Used :*** *Singular Value Decomposition (SVD)*

In [None]:
def filtering_model(dataframe=new_data['user_ratings'], random_param=40, k_folds=5):
  data= Dataset.load_from_df(dataframe[['userId', 'movieId', 'rating']], Reader())
  algo= SVD(random_state=random_param)
  validation= cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=k_folds, verbose=True)

  trainset = data.build_full_trainset()
  algo.fit(trainset)
  return [validation, algo]

# ***Final Search Based Recommendation System***

In [None]:
'''
Here, 8 cases resides:
0 ---> Everything is None ---> Top Movies Recommend. System (i.e., qualified movies) ======>>>>> Acheived
1 ---> genre is given & else is None ---> Top genre Based Recommend. System ======>>>>> Acheived
2 ---> title is given & else is None ---> Content Based Rocommend. System (i.e., Metadata based Recommendation) ======>>>>>  Acheived
3 ---> user_id is given & else is None ---> SVD in 0 (i.e., CBF but df will be top_movies us_ratings) ======>>>>>  Acheived
4 ---> user_id & genre given & else is None ---> SVD in 1 (i.e., CBF but df will be top_genres us_ratings)) ======>>>>>  Acheived
5 ---> user_id & title is given & else is None ---> SVD in 2 (i.e., CBF + CBR = Hybrid Recommender) ======>>>>>  Acheived
6 ---> genre & title is given & else is None ---> (1 & 2 ) (i.e., append Recommend_Sys1 to Recommend_Sys1 and pass to CBR ) ======>>>>>  Acheived
7 ---> Evrything is given ---> SVD in (1 & 2) (i.e., ) (i.e., append Recommend_Sys1 to Recommend_Sys1 and pass to CBF ) ======>>>>>  Acheived
'''

def Recommendation_system(user_id=None, movie_genre=None, movie_title= None, top_n= 10, data= new_data):
  
  #<<<<<<<<<  USER LEVEL >>>>>>>>>>
  if user_id is None:
    #<<<<<<<<<  GENRE LEVEL >>>>>>>>>>
    if movie_genre is None: 
      #<<<<<<<<<  MOVIE LEVEL >>>>>>>>>>
      if movie_title is None:
        ####### 0 ---> Everything is None ---> Top Movies Recommend. System (i.e., qualified movies) #######
        recommendation= wr(data['topmovies'])
        recommendation= recommendation[['title', 'genres', 'year', 'vote_average', 'vote_count', 'wr']]

      else:
        ####### 2 ---> title is given & else is None ---> Content Based Rocommend. System (i.e., Metadata based Recommendation) #######
        vct_list= data['user_top_titles']
        dataframe= data['user_top_titles']
        content= cbr_recommender(title= movie_title, df= dataframe, cos_sim_df= cos_sim(vct_list['feature_classification']))

        recommendation= wr(dataframe=content, percentile=0.60)
        recommendation= recommendation[['title', 'genres', 'year', 'vote_average', 'vote_count', 'wr']]

      #<<<<<<<<<  MOVIE LEVEL ENDS >>>>>>>>>>

    else:
      if movie_title is None:
        ####### 1 ---> genre is given & else is None ---> Top genre Based Recommend. System ####### 
        top_genres= data['topgenres'] 
        top_genres=top_genres[top_genres['genres']==movie_genre]
        recommendation= wr(dataframe=top_genres, percentile=0.85)
        recommendation= recommendation[['title', 'genres', 'year', 'vote_average', 'vote_count', 'wr']]

      else:
        ####### 6 ---> genre & title is given & else is None ---> 1 & 2 ####### 
        top_titles= data['user_top_titles']

        top_genres= data['topgenres'] 
        top_genres=top_genres[top_genres['genres']==movie_genre]

        dataframe= top_titles[top_titles.id.isin(top_genres.id)]
        vct_list=dataframe['feature_classification']

        try:
          content= cbr_recommender(title= movie_title, df= dataframe, cos_sim_df= cos_sim(vct_list))

        except:
          content= cbr_recommender(title= movie_title, df= top_titles, cos_sim_df= cos_sim(top_titles['feature_classification']))
        
        recommendation= wr(dataframe=content, percentile=0.50)
        recommendation= recommendation[['title', 'genres', 'year', 'vote_average', 'vote_count', 'wr']]

    #<<<<<<<<<  GENRE LEVEL ENDS  >>>>>>>>>>

  else:
    if movie_genre is None: 
      if movie_title is None:
        ####### 3 ---> user_id is given & else is None ---> SVD in 0 (i.e., CBF but df will be top_movies us_ratings) ####### 
        user_movies=wr(data['user_top_movies'],0.85)
        recommendation= collaberative_filter(userid= user_id, dataframe=user_movies)
        
        recommendation= recommendation[['title', 'genres', 'year', 'vote_average', 'vote_count', 'est_ratings']]
        
      else:
        ####### 5 ---> user_id & title is given & else is None ---> SVD in 2 ####### 
        user_movies=data['user_top_titles']        
        user_vect_list= user_movies['feature_classification']
        recommendation= hybrid_recommender(userid=user_id, hybd_title=movie_title, hybd_df= user_movies, hybd_cos_sim_df= cos_sim(user_vect_list))
        
        recommendation= recommendation[['title', 'genres','year','vote_average', 'vote_count', 'est_ratings']]

    else:
      if movie_title is None:
        ####### 4 ---> user_id & genre given & else is None ---> SVD in 1 ####### 
        top_genres= data['user_top_genres'] 
        top_genres=top_genres[top_genres['genres']==movie_genre]
        genres_df= wr(dataframe=top_genres, percentile=0.60)

        gen_recommendation= collaberative_filter(userid= user_id, dataframe=genres_df)        
        recommendation= gen_recommendation[['title', 'genres', 'year',  'vote_average', 'vote_count', 'est_ratings']].drop_duplicates(['title'], 
                                                                                                                                      keep='last')

      else:
        ####### 7 ---> Evrything is given ---> SVD in 0, 1, 2 ####### 
        top_titles= data['user_top_titles']

        top_genres= data['user_top_genres'] 
        top_genres=top_genres[top_genres['genres']==movie_genre]

        dataframe= top_titles[top_titles.id.isin(top_genres.id)]
        vct_list=dataframe['feature_classification']

        try:
          content= hybrid_recommender(userid=user_id, hybd_title=movie_title, hybd_df= dataframe, hybd_cos_sim_df= cos_sim(vct_list))

        except:
          content= hybrid_recommender(userid=user_id, hybd_title=movie_title, hybd_df= top_titles, 
                                      hybd_cos_sim_df= cos_sim(top_titles['feature_classification']))
        
        
        recommendation= content[['title', 'genres','year','vote_average', 'vote_count', 'est_ratings']]

  #<<<<<<<<<  USER LEVEL ENDS  >>>>>>>>>>



  return recommendation.head(top_n)
  

#***Testing Recommendation System***

Here, we are **testing** all of our cases with random values and **expecting** Top 15 similar movies from our Recomendation System Based on there ratings

    Search Cases:
*0 ---> Everything is None ---> Top Movies Recommendation System* 

*1 ---> genre is given & else is None ---> Top Genre Based Recommendation System*

*2 ---> title is given & else is None ---> Content Based Recommendation System*

*3 ---> user_id is given & else is None ---> Top Movies Recommendation for New User*

*4 ---> user_id & genre given & else is None ---> Top Movies Recommendation based on User searched Genre* 

*5 ---> user_id & title is given & else is None ---> Top Movies Recommendation based on User searched Movie Title*

*6 ---> genre & title is given & else is None ---> Top Movies Recommendation based on searched Genre and Title* 

*7 ---> Everything is given ---> Top Movies Recommendation based on User searched Genre and Movie Title* 


In [None]:
#Everything is None ---> Top Movies Recommendation System
user_op=pd.DataFrame(Recommendation_system(user_id= None, movie_genre= None, movie_title= None, top_n=15)) 
user_op

0.95


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


Unnamed: 0,title,genres,year,vote_average,vote_count,wr
15480,Inception,"[Action, Thriller, Science Fiction, Mystery, A...",2010,8.0,14075.0,7.917588
12481,The Dark Knight,"[Drama, Action, Crime, Thriller]",2008,8.0,12269.0,7.905871
22879,Interstellar,"[Adventure, Drama, Science Fiction]",2014,8.0,11187.0,7.897107
2843,Fight Club,[Drama],1999,8.0,9678.0,7.881753
4863,The Lord of the Rings: The Fellowship of the Ring,"[Adventure, Fantasy, Action]",2001,8.0,8892.0,7.871787
292,Pulp Fiction,"[Thriller, Crime]",1994,8.0,8670.0,7.86866
314,The Shawshank Redemption,"[Drama, Crime]",1994,8.0,8358.0,7.864
7000,The Lord of the Rings: The Return of the King,"[Adventure, Fantasy, Action]",2003,8.0,8226.0,7.861927
351,Forrest Gump,"[Comedy, Drama, Romance]",1994,8.0,8147.0,7.860656
5814,The Lord of the Rings: The Two Towers,"[Adventure, Fantasy, Action]",2002,8.0,7641.0,7.851924


In [None]:
#Genre is given & else is None ---> Top Genre Based Recommendation System
user_op=pd.DataFrame(Recommendation_system(user_id= None, movie_genre= 'Fantasy', movie_title= None, top_n=15)) 
user_op

0.85


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


Unnamed: 0,title,genres,year,vote_average,vote_count,wr
4863,The Lord of the Rings: The Fellowship of the Ring,Fantasy,2001,8.0,8892.0,7.888126
7000,The Lord of the Rings: The Return of the King,Fantasy,2003,8.0,8226.0,7.879484
5814,The Lord of the Rings: The Two Towers,Fantasy,2002,8.0,7641.0,7.870711
3030,The Green Mile,Fantasy,1999,8.0,4166.0,7.772216
5481,Spirited Away,Fantasy,2001,8.0,3968.0,7.76188
9698,Howl's Moving Castle,Fantasy,2004,8.0,2049.0,7.574941
2884,Princess Mononoke,Fantasy,1997,8.0,2041.0,7.573545
5833,My Neighbor Totoro,Fantasy,1988,8.0,1730.0,7.511144
926,It's a Wonderful Life,Fantasy,1946,8.0,1103.0,7.306584
14551,Avatar,Fantasy,2009,7.0,12114.0,6.94855


In [None]:
#Title is given & else is None ---> Content Based Recommendation System
user_op=pd.DataFrame(Recommendation_system(user_id= None, movie_genre= None, movie_title= 'Avatar', top_n=15)) 
user_op

0.6


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


Unnamed: 0,title,genres,year,vote_average,vote_count,wr
8865,Star Wars: The Force Awakens,"[Action, Adventure, Science Fiction, Fantasy]",2015,7,7993,6.822522
1376,Titanic,"[Drama, Romance, Thriller]",1997,7,7770,6.81871
987,Alien,"[Horror, Action, Thriller, Science Fiction]",1979,7,4564,6.737748
8401,Star Trek Into Darkness,"[Action, Adventure, Science Fiction]",2013,7,4479,6.734605
522,Terminator 2: Judgment Day,"[Action, Thriller, Science Fiction]",1991,7,4274,6.726708
1011,The Terminator,"[Action, Thriller, Science Fiction]",1984,7,4208,6.724064
1241,The Fifth Element,"[Adventure, Fantasy, Action, Thriller, Science...",1997,7,3962,6.713743
974,Aliens,"[Horror, Action, Thriller, Science Fiction]",1986,7,3282,6.680733
8521,Thor: The Dark World,"[Action, Adventure, Fantasy]",2013,6,4873,6.097778
7878,Pirates of the Caribbean: On Stranger Tides,"[Adventure, Action, Fantasy]",2011,6,5068,6.095297


In [None]:
#user_id is given & else is None ---> Top Movies Recommendation for New User
user_op=pd.DataFrame(Recommendation_system(user_id= 437, movie_genre= None, movie_title= None, top_n=15)) 
user_op

0.85


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8958  0.8976  0.8944  0.8960  0.8999  0.8967  0.0019  
MAE (testset)     0.6922  0.6923  0.6881  0.6897  0.6918  0.6908  0.0016  
Fit time          11.99   16.91   7.09    5.15    5.25    9.28    4.55    
Test time         0.89    0.44    0.38    0.17    0.16    0.41    0.26    
{'test_rmse': array([0.89575878, 0.89757715, 0.89441684, 0.89599097, 0.89987419]), 'test_mae': array([0.69216459, 0.69226462, 0.68812082, 0.68972175, 0.69178598]), 'fit_time': (11.988518714904785, 16.911083698272705, 7.091065168380737, 5.150552272796631, 5.247168302536011), 'test_time': (0.8858435153961182, 0.4444851875305176, 0.3806736469268799, 0.17292070388793945, 0.1550149917602539)}


Unnamed: 0,title,genres,year,vote_average,vote_count,est_ratings
525,The Silence of the Lambs,"[Crime, Drama, Thriller]",1991,8.0,4549.0,4.567696
48,The Usual Suspects,"[Drama, Crime, Thriller]",1995,8.0,3334.0,4.56746
101,Taxi Driver,"[Crime, Drama]",1976,8.0,2632.0,4.508224
696,The Godfather,"[Drama, Crime]",1972,8.0,6024.0,4.478932
284,The Shawshank Redemption,"[Drama, Crime]",1994,8.0,8358.0,4.456483
472,Schindler's List,"[Drama, History, War]",1993,8.0,4436.0,4.45214
266,Pulp Fiction,"[Thriller, Crime]",1994,8.0,8670.0,4.450789
978,Psycho,"[Drama, Horror, Thriller]",1960,8.0,2405.0,4.437119
987,Raging Bull,[Drama],1980,7.0,968.0,4.417649
1009,Chinatown,"[Crime, Drama, Mystery, Thriller]",1974,7.0,939.0,4.393129


In [None]:
#user_id & genre given & else is None ---> Top Movies Recommendation based on User searched Genre
user_op=pd.DataFrame(Recommendation_system(user_id= 437, movie_genre= 'Crime', movie_title= None, top_n=15)) 
user_op

0.6


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9046  0.9012  0.8964  0.8916  0.8999  0.8988  0.0044  
MAE (testset)     0.6989  0.6944  0.6896  0.6871  0.6914  0.6923  0.0041  
Fit time          5.42    8.46    5.26    5.15    5.30    5.92    1.27    
Test time         0.32    0.15    0.20    0.14    0.16    0.19    0.07    
{'test_rmse': array([0.90460001, 0.90123427, 0.89642555, 0.89157327, 0.89992399]), 'test_mae': array([0.69887288, 0.69442466, 0.68962297, 0.68710774, 0.69135701]), 'fit_time': (5.416647911071777, 8.457805156707764, 5.264984130859375, 5.153661251068115, 5.30324649810791), 'test_time': (0.32223033905029297, 0.1465623378753662, 0.19619369506835938, 0.13752055168151855, 0.15751194953918457)}


Unnamed: 0,title,genres,year,vote_average,vote_count,est_ratings
284,The Shawshank Redemption,Crime,1994,8.0,8358.0,4.456483
971,The Third Man,Crime,1949,7.0,431.0,4.369547
2377,Fight Club,Crime,1999,8.0,9678.0,4.343555
728,It Happened One Night,Crime,1934,7.0,283.0,4.286026
4617,Finding Nemo,Crime,2003,7.0,6292.0,4.264072
4816,All the President's Men,Crime,1976,7.0,424.0,4.199633
1693,Who's Afraid of Virginia Woolf?,Crime,1966,7.0,199.0,4.182275
8735,The Imitation Game,Crime,2014,8.0,5895.0,4.177078
967,Apocalypse Now,Crime,1979,8.0,2112.0,4.164218
8477,The Wolf of Wall Street,Crime,2013,7.0,6768.0,4.15451


In [None]:
#user_id & title is given & else is None ---> Top Movies Recommendation based on User searched Movie Title
user_op=pd.DataFrame(Recommendation_system(user_id= 437, movie_genre= None, movie_title= 'Avatar', top_n=15)) 
user_op

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8872  0.8934  0.8966  0.8997  0.9071  0.8968  0.0066  
MAE (testset)     0.6821  0.6914  0.6899  0.6929  0.6997  0.6912  0.0057  
Fit time          7.53    6.19    5.22    5.17    5.72    5.97    0.86    
Test time         0.14    0.14    0.33    0.15    0.15    0.18    0.08    
{'test_rmse': array([0.88720875, 0.89339247, 0.89660598, 0.8997471 , 0.90714051]), 'test_mae': array([0.68206639, 0.6914215 , 0.68990632, 0.69292748, 0.69971443]), 'fit_time': (7.530407667160034, 6.191559076309204, 5.222915887832642, 5.171071290969849, 5.72374963760376), 'test_time': (0.14157772064208984, 0.14461565017700195, 0.33451199531555176, 0.145066499710083, 0.14887285232543945)}


Unnamed: 0,title,genres,year,vote_average,vote_count,est_ratings
522,Terminator 2: Judgment Day,"[Action, Thriller, Science Fiction]",1991,7,4274,4.248028
1011,The Terminator,"[Action, Thriller, Science Fiction]",1984,7,4208,4.127063
987,Alien,"[Horror, Action, Thriller, Science Fiction]",1979,7,4564,4.104533
974,Aliens,"[Horror, Action, Thriller, Science Fiction]",1986,7,3282,3.920539
8231,Side by Side,[Documentary],2012,7,111,3.871174
2014,Fantastic Planet,"[Animation, Fantasy, Science Fiction]",1973,7,140,3.843639
5301,Cypher,"[Thriller, Science Fiction, Mystery]",2002,6,196,3.710854
8401,Star Trek Into Darkness,"[Action, Adventure, Science Fiction]",2013,7,4479,3.699559
344,True Lies,"[Action, Thriller]",1994,6,1138,3.687984
922,The Abyss,"[Adventure, Action, Thriller, Science Fiction]",1989,7,822,3.637243


In [None]:
#genre & title is given & else is None ---> Top Movies Recommendation based on searched Genre and Title
user_op=pd.DataFrame(Recommendation_system(user_id= None, movie_genre= 'Fantasy', movie_title= 'Avatar', top_n=15)) 
user_op

0.5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


Unnamed: 0,title,genres,year,vote_average,vote_count,wr
741,Star Wars: The Force Awakens,"[Action, Adventure, Science Fiction, Fantasy]",2015,7,7993,6.90452
737,The Hobbit: The Battle of the Five Armies,"[Action, Adventure, Fantasy]",2014,7,4884,6.852368
104,The Fifth Element,"[Adventure, Fantasy, Action, Thriller, Science...",1997,7,3962,6.823832
370,Treasure Planet,"[Adventure, Animation, Family, Fantasy, Scienc...",2002,7,980,6.530022
705,Man of Steel,"[Action, Adventure, Fantasy, Science Fiction]",2013,6,6462,5.995554
663,Pirates of the Caribbean: On Stranger Tides,"[Adventure, Action, Fantasy]",2011,6,5068,5.9945
714,Thor: The Dark World,"[Action, Adventure, Fantasy]",2013,6,4873,5.994311
748,Home,"[Fantasy, Comedy, Animation, Science Fiction, ...",2015,6,1539,5.986229
598,Underworld: Rise of the Lycans,"[Fantasy, Action, Adventure, Science Fiction, ...",2009,6,1447,5.985667
512,Zathura: A Space Adventure,"[Family, Fantasy, Science Fiction, Adventure]",2005,6,808,5.98


In [None]:
#genre & title is given & else is None ---> Top Movies Recommendation based on searched Genre and Title (If Title Don't have searched Genre)
user_op=pd.DataFrame(Recommendation_system(user_id= None, movie_genre= 'Crime', movie_title= 'Avatar', top_n=15)) 
user_op

0.5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


Unnamed: 0,title,genres,year,vote_average,vote_count,wr
8865,Star Wars: The Force Awakens,"[Action, Adventure, Science Fiction, Fantasy]",2015,7,7993,6.883752
1376,Titanic,"[Drama, Romance, Thriller]",1997,7,7770,6.880967
987,Alien,"[Horror, Action, Thriller, Science Fiction]",1979,7,4564,6.818437
8401,Star Trek Into Darkness,"[Action, Adventure, Science Fiction]",2013,7,4479,6.815872
522,Terminator 2: Judgment Day,"[Action, Thriller, Science Fiction]",1991,7,4274,6.809379
1011,The Terminator,"[Action, Thriller, Science Fiction]",1984,7,4208,6.80719
1241,The Fifth Element,"[Adventure, Fantasy, Action, Thriller, Science...",1997,7,3962,6.798568
974,Aliens,"[Horror, Action, Thriller, Science Fiction]",1986,7,3282,6.770156
8934,Home,"[Fantasy, Comedy, Animation, Science Fiction, ...",2015,6,1539,6.14
1082,Alien³,"[Science Fiction, Action, Horror]",1992,6,1664,6.134536


In [None]:
#Everything is given ---> Top Movies Recommendation based on User searched Genre and Movie Title
user_op=pd.DataFrame(Recommendation_system(user_id= 437, movie_genre= 'Fantasy', movie_title= 'Avatar', top_n=15)) 
user_op

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9006  0.8931  0.9046  0.8924  0.8976  0.8977  0.0046  
MAE (testset)     0.6936  0.6886  0.6956  0.6854  0.6923  0.6911  0.0036  
Fit time          5.28    6.30    6.06    5.24    7.14    6.00    0.70    
Test time         0.18    0.27    0.17    0.32    0.17    0.22    0.06    
{'test_rmse': array([0.90062983, 0.89307474, 0.90455581, 0.89244636, 0.89759554]), 'test_mae': array([0.69361602, 0.68859755, 0.69559091, 0.68543018, 0.69228852]), 'fit_time': (5.2799437046051025, 6.302292346954346, 6.064363241195679, 5.2389771938323975, 7.135181665420532), 'test_time': (0.1791388988494873, 0.2748229503631592, 0.16643619537353516, 0.31722211837768555, 0.16720223426818848)}


Unnamed: 0,title,genres,year,vote_average,vote_count,est_ratings
522,Terminator 2: Judgment Day,"[Action, Thriller, Science Fiction]",1991,7,4274,4.248028
1011,The Terminator,"[Action, Thriller, Science Fiction]",1984,7,4208,4.127063
987,Alien,"[Horror, Action, Thriller, Science Fiction]",1979,7,4564,4.104533
974,Aliens,"[Horror, Action, Thriller, Science Fiction]",1986,7,3282,3.920539
8231,Side by Side,[Documentary],2012,7,111,3.871174
2014,Fantastic Planet,"[Animation, Fantasy, Science Fiction]",1973,7,140,3.843639
5301,Cypher,"[Thriller, Science Fiction, Mystery]",2002,6,196,3.710854
8401,Star Trek Into Darkness,"[Action, Adventure, Science Fiction]",2013,7,4479,3.699559
344,True Lies,"[Action, Thriller]",1994,6,1138,3.687984
922,The Abyss,"[Adventure, Action, Thriller, Science Fiction]",1989,7,822,3.637243


In [None]:
#Everything is given ---> Top Movies Recommendation based on User searched Genre and Movie Title (If Title Don't have searched Genre)
user_op=pd.DataFrame(Recommendation_system(user_id= 437, movie_genre= 'Crime', movie_title= 'Avatar', top_n=15)) 
user_op

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8977  0.8941  0.8983  0.8962  0.8972  0.8967  0.0015  
MAE (testset)     0.6885  0.6898  0.6943  0.6891  0.6899  0.6903  0.0021  
Fit time          5.25    5.16    5.38    7.09    5.22    5.62    0.74    
Test time         0.14    0.33    0.25    0.14    0.17    0.21    0.07    
{'test_rmse': array([0.89768795, 0.89405447, 0.8983001 , 0.89620543, 0.89716941]), 'test_mae': array([0.6884769 , 0.68980851, 0.6942877 , 0.68910294, 0.68993553]), 'fit_time': (5.24828839302063, 5.158446788787842, 5.3757476806640625, 7.088292360305786, 5.220248460769653), 'test_time': (0.1426854133605957, 0.32657885551452637, 0.24917936325073242, 0.13924932479858398, 0.17156314849853516)}


Unnamed: 0,title,genres,year,vote_average,vote_count,est_ratings
522,Terminator 2: Judgment Day,"[Action, Thriller, Science Fiction]",1991,7,4274,4.248028
1011,The Terminator,"[Action, Thriller, Science Fiction]",1984,7,4208,4.127063
987,Alien,"[Horror, Action, Thriller, Science Fiction]",1979,7,4564,4.104533
974,Aliens,"[Horror, Action, Thriller, Science Fiction]",1986,7,3282,3.920539
8231,Side by Side,[Documentary],2012,7,111,3.871174
2014,Fantastic Planet,"[Animation, Fantasy, Science Fiction]",1973,7,140,3.843639
5301,Cypher,"[Thriller, Science Fiction, Mystery]",2002,6,196,3.710854
8401,Star Trek Into Darkness,"[Action, Adventure, Science Fiction]",2013,7,4479,3.699559
344,True Lies,"[Action, Thriller]",1994,6,1138,3.687984
922,The Abyss,"[Adventure, Action, Thriller, Science Fiction]",1989,7,822,3.637243
