## Recommendation System Based on Favorite Director (Director viewed most by User)

### Importing the Required Libraries & Files

In [18]:
import pandas as pd
import numpy as np
import sys

movies= pd.read_csv("movies.dat",encoding='latin-1',sep='\t')
movies.drop(['imdbID','spanishTitle','rtID','rtID', 'rtAllCriticsRating', 'rtAllCriticsNumReviews',\
       'rtAllCriticsNumFresh', 'rtAllCriticsNumRotten', 'rtAllCriticsScore',\
       'rtTopCriticsRating', 'rtTopCriticsNumReviews', 'rtTopCriticsNumFresh',\
       'rtTopCriticsNumRotten', 'rtTopCriticsScore',\
       'rtAudienceNumRatings', 'rtAudienceScore', 'rtPictureURL'],axis=1,inplace=True)
movies['title'].loc[0]='Toy Story'

movie_genre= pd.read_csv("movie_genres.dat",encoding='latin-1',sep='\t')
movie_genre = movie_genre.groupby('movieID').agg({ 'genre': ', '.join}).reset_index()

movie_director= pd.read_csv('movie_directors.dat',encoding='latin-1',sep='\t')

movie_director.drop(['directorID'],axis=1,inplace=True)

movie_actor= pd.read_csv('movie_actors.dat',encoding='latin-1',sep='\t')
movie_actor.drop(['actorID'],axis=1,inplace=True)

merge1= pd.merge(movies,movie_genre,left_on='id',right_on='movieID')
merge2= pd.merge(merge1,movie_director)
merge3= pd.merge(merge2,movie_actor)
merge3.drop(['movieID'],axis=1,inplace=True)

user_ratedmovies= pd.read_csv('user_ratedmovies.dat',encoding='latin-1',sep='\t')
user_ratedmovies.drop(['date_day', 'date_month', 'date_year',\
       'date_hour', 'date_minute', 'date_second'],axis=1,inplace=True)

final_data= pd.merge(merge3,user_ratedmovies,left_on='id',right_on='movieID')

### Identifying Favorite Director of Every User

In [19]:
fav_dir1= pd.DataFrame(final_data.groupby(['userID','directorName'])['id'].count())
fav_dir1.reset_index(inplace=True)
fav_dir1= fav_dir1.merge(fav_dir1.loc[fav_dir1.groupby('userID').id.idxmax(), ['userID', 'directorName']])
fav_dir1.set_index(['userID'],inplace=True)

### Taking User ID as an input from User

In [20]:
user=int(input("Please enter the User ID: "))

Please enter the User ID: 75


In [21]:
### Finding all the movies which has been watched by the User 
user_ratedmovies.groupby(['userID','movieID']).count()
subset_user_ratedmovies= user_ratedmovies[user_ratedmovies['userID']==user]

### Checking if User is present in the database or not
if len(subset_user_ratedmovies)>=1:
    print("User Found")
else:
    print("Invalid User ID, please re-check")

User Found


In [22]:
### Identifying the Favourite Director of the User from User ID entered

dir_needed= fav_dir1[fav_dir1.index==user]['directorName']
for i in fav_dir1.index:
    if i==user:
        dir_needed=fav_dir1['directorName'][i]

### Finding all the Movies where the user's favorite Director has played the Lead Role

dir_mov= movie_director[movie_director['directorName']==dir_needed]


### Recommending Movies of Favourite Director which has not been watched by User Yet

df_all = dir_mov.merge(subset_user_ratedmovies.drop_duplicates(), on=['movieID'],how='left', indicator=True)
recc1= df_all[df_all['_merge']=='left_only']
recc_movie= recc1[['movieID','directorName']]
final_recc= pd.merge(recc_movie,movies,left_on='movieID',right_on='id',how='left')
final_recc.drop(['id'],inplace=True,axis=1)

### List of Movies of Favourite Director which has not been watched by User Yet

final_recc= final_recc.sort_values(by='rtAudienceRating',ascending=False)
final_recc.drop_duplicates('title',inplace=True,keep='first')
### Top 5 Movie Recommendations

final_recc.head(5)

Unnamed: 0,movieID,directorName,title,imdbPictureURL,year,rtAudienceRating
1,1610,John McTiernan,The Hunt for Red October,http://ia.media-imdb.com/images/M/MV5BMTgxMTE3...,1990,3.6
6,3527,John McTiernan,Predator,http://ia.media-imdb.com/images/M/MV5BMTM1Njk0...,1987,3.5
2,2763,John McTiernan,The Thomas Crown Affair,http://ia.media-imdb.com/images/M/MV5BMjE3NzU4...,1999,3.3
5,2826,John McTiernan,The 13th Warrior,http://ia.media-imdb.com/images/M/MV5BMTczNjA5...,1999,3.2
8,6263,John McTiernan,Basic,http://ia.media-imdb.com/images/M/MV5BMTY5ODE5...,2003,3.2
