### Installing required modules

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_colwidth', 0)

### Reading csv file

In [2]:
# reading csv after downloading the file mentioned in README.md
# movies_df=pd.read_csv('MovRec_Dataset/movies_metadata.csv',low_memory=False)

# or directly reading csv files from S3 bucket
movies_df=pd.read_csv('https://movrec.s3.amazonaws.com/MovRec_Dataset/movies_metadata.csv',low_memory=False)

In [3]:
# checking shape means number of rows and columns in the movies_metadata.csv file or movies_df dataframe
movies_df.shape

(45466, 24)

In [4]:
movies_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45466 entries, 0 to 45465
Data columns (total 24 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   adult                  45466 non-null  object 
 1   belongs_to_collection  4494 non-null   object 
 2   budget                 45466 non-null  object 
 3   genres                 45466 non-null  object 
 4   homepage               7782 non-null   object 
 5   id                     45466 non-null  object 
 6   imdb_id                45449 non-null  object 
 7   original_language      45455 non-null  object 
 8   original_title         45466 non-null  object 
 9   overview               44512 non-null  object 
 10  popularity             45461 non-null  object 
 11  poster_path            45080 non-null  object 
 12  production_companies   45463 non-null  object 
 13  production_countries   45463 non-null  object 
 14  release_date           45379 non-null  object 
 15  re

In [5]:
movies_df.head(1)

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', 'poster_path': '/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg', 'backdrop_path': '/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg'}",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, 'name': 'Comedy'}, {'id': 10751, 'name': 'Family'}]",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto the scene. Afraid of losing his place in Andy's heart, Woody plots against Buzz. But when circumstances separate Buzz and Woody from their owner, the duo eventually learns to put aside their differences.",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0


In [6]:
ind=movies_df[movies_df['id'].isin(['1997-08-20','2012-09-29','2014-01-01'])].index
ind


Int64Index([19730, 29503, 35587], dtype='int64')

In [7]:
movies_df.shape

(45466, 24)

dropping the unwnated movie id, as they are actual ids


In [8]:
movies_df.drop(ind,inplace=True)
# movies_df[movies_df['id']=='1997-08-20'].index

In [9]:
movies_df=movies_df.astype({'id':'int64'})

In [10]:
movies_df.shape

(45463, 24)

In [11]:
movies_df.isnull().sum()

adult                    0    
belongs_to_collection    40972
budget                   0    
genres                   0    
homepage                 37684
id                       0    
imdb_id                  17   
original_language        11   
original_title           0    
overview                 954  
popularity               3    
poster_path              386  
production_companies     3    
production_countries     3    
release_date             87   
revenue                  3    
runtime                  260  
spoken_languages         3    
status                   84   
tagline                  25051
title                    3    
video                    3    
vote_average             3    
vote_count               3    
dtype: int64

In [12]:
# movies_df[movies_df['overview'].isnull()]

removing missing overviews and title

In [13]:
movies_df.dropna(subset=['overview','title'],inplace=True)

verifying if null values dropped successfully

In [14]:
movies_df.isnull().sum()

adult                    0    
belongs_to_collection    40075
budget                   0    
genres                   0    
homepage                 36745
id                       0    
imdb_id                  15   
original_language        10   
original_title           0    
overview                 0    
popularity               0    
poster_path              343  
production_companies     0    
production_countries     0    
release_date             71   
revenue                  0    
runtime                  0    
spoken_languages         0    
status                   65   
tagline                  24102
title                    0    
video                    0    
vote_average             0    
vote_count               0    
dtype: int64

In [15]:
movies_df.shape

(44506, 24)

checking duplicate rows

In [16]:
movies_df.duplicated().sum()

17

In [17]:
movies_df.duplicated(subset=['overview']).sum()

203

removing duplicate rows

In [18]:
movies_df.drop_duplicates(subset=['overview'],inplace=True)

verifying if duplicate rows for overview column has been removed

In [19]:
movies_df.duplicated(subset=['overview']).sum()

0

checking the revised shape of the dataframe

In [20]:
movies_df.shape

(44303, 24)

extracting year from the release_date column 

In [21]:
def year(date):
    L=[]
    date=str(date)
    L.append(date[:4])
    return L

In [22]:
movies_df['year']=movies_df['release_date'].apply(year)


In [23]:
# credits_df=pd.read_csv('MovRec_Dataset/credits.csv',low_memory=False)
credits_df=pd.read_csv('https://movrec.s3.amazonaws.com/MovRec_Dataset/credits.csv',low_memory=False)


In [24]:
credits_df.shape

(45476, 3)

In [25]:
credits_df.head(1)

Unnamed: 0,cast,crew,id
0,"[{'cast_id': 14, 'character': 'Woody (voice)', 'credit_id': '52fe4284c3a36847f8024f95', 'gender': 2, 'id': 31, 'name': 'Tom Hanks', 'order': 0, 'profile_path': '/pQFoyx7rp09CJTAb932F2g8Nlho.jpg'}, {'cast_id': 15, 'character': 'Buzz Lightyear (voice)', 'credit_id': '52fe4284c3a36847f8024f99', 'gender': 2, 'id': 12898, 'name': 'Tim Allen', 'order': 1, 'profile_path': '/uX2xVf6pMmPepxnvFWyBtjexzgY.jpg'}, {'cast_id': 16, 'character': 'Mr. Potato Head (voice)', 'credit_id': '52fe4284c3a36847f8024f9d', 'gender': 2, 'id': 7167, 'name': 'Don Rickles', 'order': 2, 'profile_path': '/h5BcaDMPRVLHLDzbQavec4xfSdt.jpg'}, {'cast_id': 17, 'character': 'Slinky Dog (voice)', 'credit_id': '52fe4284c3a36847f8024fa1', 'gender': 2, 'id': 12899, 'name': 'Jim Varney', 'order': 3, 'profile_path': '/eIo2jVVXYgjDtaHoF19Ll9vtW7h.jpg'}, {'cast_id': 18, 'character': 'Rex (voice)', 'credit_id': '52fe4284c3a36847f8024fa5', 'gender': 2, 'id': 12900, 'name': 'Wallace Shawn', 'order': 4, 'profile_path': '/oGE6JqPP2xH4tNORKNqxbNPYi7u.jpg'}, {'cast_id': 19, 'character': 'Hamm (voice)', 'credit_id': '52fe4284c3a36847f8024fa9', 'gender': 2, 'id': 7907, 'name': 'John Ratzenberger', 'order': 5, 'profile_path': '/yGechiKWL6TJDfVE2KPSJYqdMsY.jpg'}, {'cast_id': 20, 'character': 'Bo Peep (voice)', 'credit_id': '52fe4284c3a36847f8024fad', 'gender': 1, 'id': 8873, 'name': 'Annie Potts', 'order': 6, 'profile_path': '/eryXT84RL41jHSJcMy4kS3u9y6w.jpg'}, {'cast_id': 26, 'character': 'Andy (voice)', 'credit_id': '52fe4284c3a36847f8024fc1', 'gender': 0, 'id': 1116442, 'name': 'John Morris', 'order': 7, 'profile_path': '/vYGyvK4LzeaUCoNSHtsuqJUY15M.jpg'}, {'cast_id': 22, 'character': 'Sid (voice)', 'credit_id': '52fe4284c3a36847f8024fb1', 'gender': 2, 'id': 12901, 'name': 'Erik von Detten', 'order': 8, 'profile_path': '/twnF1ZaJ1FUNUuo6xLXwcxjayBE.jpg'}, {'cast_id': 23, 'character': 'Mrs. Davis (voice)', 'credit_id': '52fe4284c3a36847f8024fb5', 'gender': 1, 'id': 12133, 'name': 'Laurie Metcalf', 'order': 9, 'profile_path': '/unMMIT60eoBM2sN2nyR7EZ2BvvD.jpg'}, {'cast_id': 24, 'character': 'Sergeant (voice)', 'credit_id': '52fe4284c3a36847f8024fb9', 'gender': 2, 'id': 8655, 'name': 'R. Lee Ermey', 'order': 10, 'profile_path': '/r8GBqFBjypLUP9VVqDqfZ7wYbSs.jpg'}, {'cast_id': 25, 'character': 'Hannah (voice)', 'credit_id': '52fe4284c3a36847f8024fbd', 'gender': 1, 'id': 12903, 'name': 'Sarah Freeman', 'order': 11, 'profile_path': None}, {'cast_id': 27, 'character': 'TV Announcer (voice)', 'credit_id': '52fe4284c3a36847f8024fc5', 'gender': 2, 'id': 37221, 'name': 'Penn Jillette', 'order': 12, 'profile_path': '/zmAaXUdx12NRsssgHbk1T31j2x9.jpg'}]","[{'credit_id': '52fe4284c3a36847f8024f49', 'department': 'Directing', 'gender': 2, 'id': 7879, 'job': 'Director', 'name': 'John Lasseter', 'profile_path': '/7EdqiNbr4FRjIhKHyPPdFfEEEFG.jpg'}, {'credit_id': '52fe4284c3a36847f8024f4f', 'department': 'Writing', 'gender': 2, 'id': 12891, 'job': 'Screenplay', 'name': 'Joss Whedon', 'profile_path': '/dTiVsuaTVTeGmvkhcyJvKp2A5kr.jpg'}, {'credit_id': '52fe4284c3a36847f8024f55', 'department': 'Writing', 'gender': 2, 'id': 7, 'job': 'Screenplay', 'name': 'Andrew Stanton', 'profile_path': '/pvQWsu0qc8JFQhMVJkTHuexUAa1.jpg'}, {'credit_id': '52fe4284c3a36847f8024f5b', 'department': 'Writing', 'gender': 2, 'id': 12892, 'job': 'Screenplay', 'name': 'Joel Cohen', 'profile_path': '/dAubAiZcvKFbboWlj7oXOkZnTSu.jpg'}, {'credit_id': '52fe4284c3a36847f8024f61', 'department': 'Writing', 'gender': 0, 'id': 12893, 'job': 'Screenplay', 'name': 'Alec Sokolow', 'profile_path': '/v79vlRYi94BZUQnkkyznbGUZLjT.jpg'}, {'credit_id': '52fe4284c3a36847f8024f67', 'department': 'Production', 'gender': 1, 'id': 12894, 'job': 'Producer', 'name': 'Bonnie Arnold', 'profile_path': None}, {'credit_id': '52fe4284c3a36847f8024f6d', 'department': 'Production', 'gender': 0, 'id': 12895, 'job': 'Executive Producer', 'name': 'Ed Catmull', 'profile_path': None}, {'credit_id': '52fe4284c3a36847f8024f73', 'department': 'Production', 'gender': 2, 'id': 12896, 'job': 'Producer', 'name': 'Ralph Guggenheim', 'profile_path': None}, {'credit_id': '52fe4284c3a36847f8024f79', 'department': 'Production', 'gender': 2, 'id': 12897, 'job': 'Executive Producer', 'name': 'Steve Jobs', 'profile_path': '/mOMP3SwD5qWQSR0ldCIByd3guTV.jpg'}, {'credit_id': '52fe4284c3a36847f8024f8b', 'department': 'Editing', 'gender': 2, 'id': 8, 'job': 'Editor', 'name': 'Lee Unkrich', 'profile_path': '/bdTCCXjgOV3YyaNmLGYGOxFQMOc.jpg'}, {'credit_id': '52fe4284c3a36847f8024f91', 'department': 'Art', 'gender': 2, 'id': 7883, 'job': 'Art Direction', 'name': 'Ralph Eggleston', 'profile_path': '/uUfcGKDsKO1aROMpXRs67Hn6RvR.jpg'}, {'credit_id': '598331bf925141421201044b', 'department': 'Editing', 'gender': 2, 'id': 1168870, 'job': 'Editor', 'name': 'Robert Gordon', 'profile_path': None}, {'credit_id': '5892168cc3a36809660095f9', 'department': 'Sound', 'gender': 0, 'id': 1552883, 'job': 'Foley Editor', 'name': 'Mary Helen Leasman', 'profile_path': None}, {'credit_id': '5531824d9251415289000945', 'department': 'Visual Effects', 'gender': 0, 'id': 1453514, 'job': 'Animation', 'name': 'Kim Blanchette', 'profile_path': None}, {'credit_id': '589215969251412dcb009bf6', 'department': 'Sound', 'gender': 0, 'id': 1414182, 'job': 'ADR Editor', 'name': 'Marilyn McCoppen', 'profile_path': None}, {'credit_id': '589217099251412dc500a018', 'department': 'Sound', 'gender': 2, 'id': 7885, 'job': 'Orchestrator', 'name': 'Randy Newman', 'profile_path': '/w0JzfoiM25nrnxYOzosPHRq6mlE.jpg'}, {'credit_id': '5693e6b29251417b0e0000e3', 'department': 'Editing', 'gender': 0, 'id': 1429549, 'job': 'Color Timer', 'name': 'Dale E. Grahn', 'profile_path': None}, {'credit_id': '572e2522c3a36869e6001a9c', 'department': 'Visual Effects', 'gender': 0, 'id': 7949, 'job': 'CG Painter', 'name': 'Robin Cooper', 'profile_path': None}, {'credit_id': '574f12309251415ca1000012', 'department': 'Writing', 'gender': 2, 'id': 7879, 'job': 'Original Story', 'name': 'John Lasseter', 'profile_path': '/7EdqiNbr4FRjIhKHyPPdFfEEEFG.jpg'}, {'credit_id': '574f1240c3a3682e7300001c', 'department': 'Writing', 'gender': 2, 'id': 12890, 'job': 'Original Story', 'name': 'Pete Docter', 'profile_path': '/r6ngPgnReA3RHmKjmSoVsc6Awjp.jpg'}, {'credit_id': '574f12519251415c92000015', 'department': 'Writing', 'gender': 0, 'id': 7911, 'job': 'Original Story', 'name': 'Joe Ranft', 'profile_path': '/f1BoWC2JbCcfP1e5hKfGsxkHzVU.jpg'}, {'credit_id': '574f12cec3a3682e82000022', 'department': 'Crew', 'gender': 0, 'id': 1629419, 'job': 'Post Production Supervisor', 'name': 'Patsy Bouge', 'profile_path': None}, {'credit_id': '574f14f19251415ca1000082', 'department': 'Art', 'gender': 0, 'id': 7961, 'job': 'Sculptor', 'name': 'Norm DeCarlo', 'profile_path': None}, {'credit_id': '5751ae4bc3a3683772002b7f', 'department': 'Visual Effects', 'gender': 2, 'id': 12905, 'job': 'Animation Director', 'name': 'Ash Brannon', 'profile_path': '/6ueWgPEEBHvS3De2BHYQnYjRTig.jpg'}, {'credit_id': '5891edbe9251412dc5007cd6', 'department': 'Sound', 'gender': 2, 'id': 7885, 'job': 'Music', 'name': 'Randy Newman', 'profile_path': '/w0JzfoiM25nrnxYOzosPHRq6mlE.jpg'}, {'credit_id': '589213d39251412dc8009832', 'department': 'Directing', 'gender': 0, 'id': 1748707, 'job': 'Layout', 'name': 'Roman Figun', 'profile_path': None}, {'credit_id': '5892173dc3a3680968009351', 'department': 'Sound', 'gender': 2, 'id': 4949, 'job': 'Orchestrator', 'name': 'Don Davis', 'profile_path': None}, {'credit_id': '589217cec3a3686b0a0052ba', 'department': 'Sound', 'gender': 0, 'id': 1372885, 'job': 'Music Editor', 'name': 'James Flamberg', 'profile_path': None}, {'credit_id': '58921831c3a3686348004a64', 'department': 'Editing', 'gender': 0, 'id': 1739962, 'job': 'Negative Cutter', 'name': 'Mary Beth Smith', 'profile_path': None}, {'credit_id': '58921838c3a36809700096c0', 'department': 'Editing', 'gender': 0, 'id': 1748513, 'job': 'Negative Cutter', 'name': 'Rick Mackay', 'profile_path': None}, {'credit_id': '589218429251412dd1009d1b', 'department': 'Art', 'gender': 0, 'id': 1458006, 'job': 'Title Designer', 'name': 'Susan Bradley', 'profile_path': None}, {'credit_id': '5891ed99c3a3680966007670', 'department': 'Crew', 'gender': 0, 'id': 1748557, 'job': 'Supervising Technical Director', 'name': 'William Reeves', 'profile_path': None}, {'credit_id': '5891edcec3a3686b0a002eb2', 'department': 'Sound', 'gender': 2, 'id': 7885, 'job': 'Songs', 'name': 'Randy Newman', 'profile_path': '/w0JzfoiM25nrnxYOzosPHRq6mlE.jpg'}, {'credit_id': '5891edf9c3a36809700075e6', 'department': 'Writing', 'gender': 2, 'id': 7, 'job': 'Original Story', 'name': 'Andrew Stanton', 'profile_path': '/pvQWsu0qc8JFQhMVJkTHuexUAa1.jpg'}, {'credit_id': '58920f0b9251412dd7009104', 'department': 'Crew', 'gender': 2, 'id': 12890, 'job': 'Supervising Animator', 'name': 'Pete Docter', 'profile_path': '/r6ngPgnReA3RHmKjmSoVsc6Awjp.jpg'}, {'credit_id': '58920f1fc3a3680977009021', 'department': 'Sound', 'gender': 2, 'id': 2216, 'job': 'Sound Designer', 'name': 'Gary Rydstrom', 'profile_path': '/jZpr1nVfO7lldWI0YtmP1FGw7Rj.jpg'}, {'credit_id': '58920f389251412dd700912d', 'department': 'Production', 'gender': 0, 'id': 12909, 'job': 'Production Supervisor', 'name': 'Karen Robert Jackson', 'profile_path': None}, {'credit_id': '58920fbd9251412dcb00969c', 'department': 'Crew', 'gender': 0, 'id': 953331, 'job': 'Executive Music Producer', 'name': 'Chris Montan', 'profile_path': None}, {'credit_id': '589210069251412dd7009219', 'department': 'Visual Effects', 'gender': 0, 'id': 7893, 'job': 'Animation Director', 'name': 'Rich Quade', 'profile_path': None}, {'credit_id': '589210329251412dcd00943b', 'department': 'Visual Effects', 'gender': 0, 'id': 8025, 'job': 'Animation', 'name': 'Michael Berenstein', 'profile_path': None}, {'credit_id': '5892103bc3a368096a009180', 'department': 'Visual Effects', 'gender': 0, 'id': 78009, 'job': 'Animation', 'name': 'Colin Brady', 'profile_path': None}, {'credit_id': '5892105dc3a3680968008db2', 'department': 'Visual Effects', 'gender': 0, 'id': 1748682, 'job': 'Animation', 'name': 'Davey Crockett Feiten', 'profile_path': None}, {'credit_id': '589210669251412dcd009466', 'department': 'Visual Effects', 'gender': 0, 'id': 1454030, 'job': 'Animation', 'name': 'Angie Glocka', 'profile_path': None}, {'credit_id': '5892107c9251412dd1009613', 'department': 'Visual Effects', 'gender': 0, 'id': 1748683, 'job': 'Animation', 'name': 'Rex Grignon', 'profile_path': None}, {'credit_id': '5892108ac3a3680973008d3f', 'department': 'Visual Effects', 'gender': 0, 'id': 1748684, 'job': 'Animation', 'name': 'Tom K. Gurney', 'profile_path': None}, {'credit_id': '58921093c3a3686348004477', 'department': 'Visual Effects', 'gender': 2, 'id': 8029, 'job': 'Animation', 'name': 'Jimmy Hayward', 'profile_path': '/lTDRpudEY7BDwTefXbXzMlmb0ui.jpg'}, {'credit_id': '5892109b9251412dcd0094b0', 'department': 'Visual Effects', 'gender': 0, 'id': 1426773, 'job': 'Animation', 'name': 'Hal T. Hickel', 'profile_path': None}, {'credit_id': '589210a29251412dc5009a29', 'department': 'Visual Effects', 'gender': 0, 'id': 8035, 'job': 'Animation', 'name': 'Karen Kiser', 'profile_path': None}, {'credit_id': '589210ccc3a3680977009191', 'department': 'Visual Effects', 'gender': 0, 'id': 1748688, 'job': 'Animation', 'name': 'Anthony B. LaMolinara', 'profile_path': None}, {'credit_id': '589210d7c3a3686b0a004c1f', 'department': 'Visual Effects', 'gender': 0, 'id': 587314, 'job': 'Animation', 'name': 'Guionne Leroy', 'profile_path': None}, {'credit_id': '589210e1c3a36809770091a7', 'department': 'Visual Effects', 'gender': 2, 'id': 7918, 'job': 'Animation', 'name': 'Bud Luckey', 'profile_path': '/pcCh7G19FKMNijmPQg1PMH1btic.jpg'}, {'credit_id': '589210ee9251412dc200978a', 'department': 'Visual Effects', 'gender': 0, 'id': 1748689, 'job': 'Animation', 'name': 'Les Major', 'profile_path': None}, {'credit_id': '589210fa9251412dc8009595', 'department': 'Visual Effects', 'gender': 2, 'id': 7892, 'job': 'Animation', 'name': 'Glenn McQueen', 'profile_path': None}, {'credit_id': '589211029251412dc8009598', 'department': 'Visual Effects', 'gender': 0, 'id': 555795, 'job': 'Animation', 'name': 'Mark Oftedal', 'profile_path': None}, {'credit_id': '5892110b9251412dc800959d', 'department': 'Visual Effects', 'gender': 2, 'id': 7882, 'job': 'Animation', 'name': 'Jeff Pidgeon', 'profile_path': '/yLddkg5HcgbJg00cS13GVBnP0HY.jpg'}, {'credit_id': '58921113c3a36863480044e4', 'department': 'Visual Effects', 'gender': 0, 'id': 8017, 'job': 'Animation', 'name': 'Jeff Pratt', 'profile_path': None}, {'credit_id': '5892111c9251412dcb0097e9', 'department': 'Visual Effects', 'gender': 0, 'id': 1184140, 'job': 'Animation', 'name': 'Steve Rabatich', 'profile_path': None}, {'credit_id': '58921123c3a36809700090f6', 'department': 'Visual Effects', 'gender': 0, 'id': 8049, 'job': 'Animation', 'name': 'Roger Rose', 'profile_path': None}, {'credit_id': '5892112b9251412dcb0097fb', 'department': 'Visual Effects', 'gender': 0, 'id': 1509559, 'job': 'Animation', 'name': 'Steve Segal', 'profile_path': None}, {'credit_id': '589211349251412dc80095c3', 'department': 'Visual Effects', 'gender': 0, 'id': 1748691, 'job': 'Animation', 'name': 'Doug Sheppeck', 'profile_path': None}, {'credit_id': '5892113cc3a3680970009106', 'department': 'Visual Effects', 'gender': 0, 'id': 8050, 'job': 'Animation', 'name': 'Alan Sperling', 'profile_path': None}, {'credit_id': '58921148c3a3686b0a004c99', 'department': 'Visual Effects', 'gender': 0, 'id': 8010, 'job': 'Animation', 'name': 'Doug Sweetland', 'profile_path': None}, {'credit_id': '58921150c3a3680966009125', 'department': 'Visual Effects', 'gender': 0, 'id': 8044, 'job': 'Animation', 'name': 'David Tart', 'profile_path': None}, {'credit_id': '589211629251412dc5009b00', 'department': 'Visual Effects', 'gender': 0, 'id': 1454034, 'job': 'Animation', 'name': 'Ken Willard', 'profile_path': None}, {'credit_id': '589211c1c3a3686b0a004d28', 'department': 'Visual Effects', 'gender': 0, 'id': 7887, 'job': 'Visual Effects Supervisor', 'name': 'Thomas Porter', 'profile_path': None}, {'credit_id': '589211d4c3a3680968008ed9', 'department': 'Visual Effects', 'gender': 0, 'id': 1406878, 'job': 'Visual Effects', 'name': 'Mark Thomas Henne', 'profile_path': None}, {'credit_id': '589211f59251412dd4008e65', 'department': 'Visual Effects', 'gender': 0, 'id': 1748698, 'job': 'Visual Effects', 'name': 'Oren Jacob', 'profile_path': None}, {'credit_id': '58921242c3a368096a00939b', 'department': 'Visual Effects', 'gender': 0, 'id': 1748699, 'job': 'Visual Effects', 'name': 'Darwyn Peachey', 'profile_path': None}, {'credit_id': '5892124b9251412dc5009bd2', 'department': 'Visual Effects', 'gender': 0, 'id': 1748701, 'job': 'Visual Effects', 'name': 'Mitch Prater', 'profile_path': None}, {'credit_id': '58921264c3a3686b0a004dbf', 'department': 'Visual Effects', 'gender': 0, 'id': 1748703, 'job': 'Visual Effects', 'name': 'Brian M. Rosen', 'profile_path': None}, {'credit_id': '589212709251412dcd009676', 'department': 'Lighting', 'gender': 1, 'id': 12912, 'job': 'Lighting Supervisor', 'name': 'Sharon Calahan', 'profile_path': None}, {'credit_id': '5892127fc3a3686b0a004de5', 'department': 'Lighting', 'gender': 0, 'id': 7899, 'job': 'Lighting Supervisor', 'name': 'Galyn Susman', 'profile_path': None}, {'credit_id': '589212cdc3a3680970009268', 'department': 'Visual Effects', 'gender': 0, 'id': 12915, 'job': 'CG Painter', 'name': 'William Cone', 'profile_path': None}, {'credit_id': '5892130f9251412dc8009791', 'department': 'Art', 'gender': 0, 'id': 1748705, 'job': 'Sculptor', 'name': 'Shelley Daniels Lekven', 'profile_path': None}, {'credit_id': '5892131c9251412dd4008f4c', 'department': 'Visual Effects', 'gender': 2, 'id': 7889, 'job': 'Character Designer', 'name': 'Bob Pauley', 'profile_path': None}, {'credit_id': '589213249251412dd100987b', 'department': 'Visual Effects', 'gender': 2, 'id': 7918, 'job': 'Character Designer', 'name': 'Bud Luckey', 'profile_path': '/pcCh7G19FKMNijmPQg1PMH1btic.jpg'}, {'credit_id': '5892132b9251412dc80097b1', 'department': 'Visual Effects', 'gender': 2, 'id': 7, 'job': 'Character Designer', 'name': 'Andrew Stanton', 'profile_path': '/pvQWsu0qc8JFQhMVJkTHuexUAa1.jpg'}, {'credit_id': '58921332c3a368634800467b', 'department': 'Visual Effects', 'gender': 0, 'id': 12915, 'job': 'Character Designer', 'name': 'William Cone', 'profile_path': None}, {'credit_id': '5892135f9251412dd4008f90', 'department': 'Visual Effects', 'gender': 0, 'id': 1748706, 'job': 'Character Designer', 'name': 'Steve Johnson', 'profile_path': None}, {'credit_id': '58921384c3a3680973008fd4', 'department': 'Visual Effects', 'gender': 0, 'id': 1176752, 'job': 'Character Designer', 'name': 'Dan Haskett', 'profile_path': None}, {'credit_id': '5892138e9251412dc20099fc', 'department': 'Visual Effects', 'gender': 0, 'id': 1088034, 'job': 'Character Designer', 'name': 'Tom Holloway', 'profile_path': '/a0r0T2usTBpgMI5aZbRBDW1fTl8.jpg'}, {'credit_id': '58921395c3a368097700942f', 'department': 'Visual Effects', 'gender': 0, 'id': 1447465, 'job': 'Character Designer', 'name': 'Jean Gillmore', 'profile_path': None}, {'credit_id': '589213e2c3a3680973009026', 'department': 'Directing', 'gender': 0, 'id': 1748709, 'job': 'Layout', 'name': 'Desirée Mourad', 'profile_path': None}, {'credit_id': '589214099251412dc5009d57', 'department': 'Art', 'gender': 0, 'id': 1748710, 'job': 'Set Dresser', 'name': ""Kelly O'Connell"", 'profile_path': None}, {'credit_id': '58921411c3a3686b0a004f70', 'department': 'Art', 'gender': 0, 'id': 1443471, 'job': 'Set Dresser', 'name': 'Sonoko Konishi', 'profile_path': None}, {'credit_id': '58921434c3a368096a00956e', 'department': 'Art', 'gender': 0, 'id': 1748711, 'job': 'Set Dresser', 'name': 'Ann M. Rockwell', 'profile_path': None}, {'credit_id': '5892144ac3a36809680090de', 'department': 'Editing', 'gender': 0, 'id': 1748712, 'job': 'Editorial Manager', 'name': 'Julie M. McDonald', 'profile_path': None}, {'credit_id': '58921479c3a368096800910f', 'department': 'Editing', 'gender': 0, 'id': 1589729, 'job': 'Assistant Editor', 'name': 'Robin Lee', 'profile_path': None}, {'credit_id': '5892148b9251412dd10099cc', 'department': 'Editing', 'gender': 0, 'id': 1748716, 'job': 'Assistant Editor', 'name': 'Tom Freeman', 'profile_path': None}, {'credit_id': '589214959251412dcb009b1f', 'department': 'Editing', 'gender': 0, 'id': 1748717, 'job': 'Assistant Editor', 'name': 'Ada Cochavi', 'profile_path': None}, {'credit_id': '5892149ec3a3686348004798', 'department': 'Editing', 'gender': 0, 'id': 1336438, 'job': 'Assistant Editor', 'name': 'Dana Mulligan', 'profile_path': None}, {'credit_id': '589214adc3a368096a0095db', 'department': 'Editing', 'gender': 0, 'id': 1748718, 'job': 'Editorial Coordinator', 'name': 'Deirdre Morrison', 'profile_path': None}, {'credit_id': '589214c7c3a368097700952b', 'department': 'Production', 'gender': 0, 'id': 1748719, 'job': 'Production Coordinator', 'name': 'Lori Lombardo', 'profile_path': None}, {'credit_id': '589214cec3a368096a009603', 'department': 'Production', 'gender': 0, 'id': 1748720, 'job': 'Production Coordinator', 'name': 'Ellen Devine', 'profile_path': None}, {'credit_id': '589214e39251412dc8009904', 'department': 'Crew', 'gender': 0, 'id': 1468014, 'job': 'Unit Publicist', 'name': 'Lauren Beth Strogoff', 'profile_path': None}, {'credit_id': '58921544c3a3686b0a00507d', 'department': 'Sound', 'gender': 2, 'id': 2216, 'job': 'Sound Re-Recording Mixer', 'name': 'Gary Rydstrom', 'profile_path': '/jZpr1nVfO7lldWI0YtmP1FGw7Rj.jpg'}, {'credit_id': '5892154c9251412dd1009a56', 'department': 'Sound', 'gender': 0, 'id': 1425978, 'job': 'Sound Re-Recording Mixer', 'name': 'Gary Summers', 'profile_path': None}, {'credit_id': '58921555c3a36809680091bd', 'department': 'Sound', 'gender': 2, 'id': 8276, 'job': 'Supervising Sound Editor', 'name': 'Tim Holland', 'profile_path': None}, {'credit_id': '589215c39251412dcb009c12', 'department': 'Sound', 'gender': 0, 'id': 7069, 'job': 'Sound Effects Editor', 'name': 'Pat Jackson', 'profile_path': None}, {'credit_id': '58921698c3a368096a009788', 'department': 'Crew', 'gender': 2, 'id': 15894, 'job': 'Sound Design Assistant', 'name': 'Tom Myers', 'profile_path': None}, {'credit_id': '589216a89251412dc2009ca4', 'department': 'Sound', 'gender': 0, 'id': 1414177, 'job': 'Assistant Sound Editor', 'name': 'J.R. Grubbs', 'profile_path': None}, {'credit_id': '589216c19251412dc2009cb9', 'department': 'Sound', 'gender': 1, 'id': 1748724, 'job': 'Assistant Sound Editor', 'name': 'Susan Sanford', 'profile_path': None}, {'credit_id': '589216ccc3a3680973009274', 'department': 'Sound', 'gender': 0, 'id': 1748725, 'job': 'Assistant Sound Editor', 'name': 'Susan Popovic', 'profile_path': None}, {'credit_id': '589216d79251412dc8009aa0', 'department': 'Sound', 'gender': 0, 'id': 8067, 'job': 'Assistant Sound Editor', 'name': 'Dan Engstrom', 'profile_path': None}, {'credit_id': '589216e49251412dcd009a4f', 'department': 'Production', 'gender': 1, 'id': 7902, 'job': 'Casting Consultant', 'name': 'Ruth Lambert', 'profile_path': None}, {'credit_id': '589216f39251412dc2009cf3', 'department': 'Production', 'gender': 0, 'id': 84493, 'job': 'ADR Voice Casting', 'name': 'Mickie McGowan', 'profile_path': '/k7TjJBfINsg8vLQxJwos6XObAD6.jpg'}]",862


In [26]:
# keywords_df=pd.read_csv('MovRec_Dataset/keywords.csv',low_memory=False)
keywords_df=pd.read_csv('https://movrec.s3.amazonaws.com/MovRec_Dataset/keywords.csv',low_memory=False)


In [27]:
keywords_df.shape

(46419, 2)

In [28]:
keywords_df.head(5)

Unnamed: 0,id,keywords
0,862,"[{'id': 931, 'name': 'jealousy'}, {'id': 4290, 'name': 'toy'}, {'id': 5202, 'name': 'boy'}, {'id': 6054, 'name': 'friendship'}, {'id': 9713, 'name': 'friends'}, {'id': 9823, 'name': 'rivalry'}, {'id': 165503, 'name': 'boy next door'}, {'id': 170722, 'name': 'new toy'}, {'id': 187065, 'name': 'toy comes to life'}]"
1,8844,"[{'id': 10090, 'name': 'board game'}, {'id': 10941, 'name': 'disappearance'}, {'id': 15101, 'name': ""based on children's book""}, {'id': 33467, 'name': 'new home'}, {'id': 158086, 'name': 'recluse'}, {'id': 158091, 'name': 'giant insect'}]"
2,15602,"[{'id': 1495, 'name': 'fishing'}, {'id': 12392, 'name': 'best friend'}, {'id': 179431, 'name': 'duringcreditsstinger'}, {'id': 208510, 'name': 'old men'}]"
3,31357,"[{'id': 818, 'name': 'based on novel'}, {'id': 10131, 'name': 'interracial relationship'}, {'id': 14768, 'name': 'single mother'}, {'id': 15160, 'name': 'divorce'}, {'id': 33455, 'name': 'chick flick'}]"
4,11862,"[{'id': 1009, 'name': 'baby'}, {'id': 1599, 'name': 'midlife crisis'}, {'id': 2246, 'name': 'confidence'}, {'id': 4995, 'name': 'aging'}, {'id': 5600, 'name': 'daughter'}, {'id': 10707, 'name': 'mother daughter relationship'}, {'id': 13149, 'name': 'pregnancy'}, {'id': 33358, 'name': 'contraception'}, {'id': 170521, 'name': 'gynecologist'}]"


merging all the 3 dataframes:
1. movies_df
2. credits_df 
3. keywords_df 

on the common column 'id' to from a new dataframe movies_df_join

In [29]:
movies_df_join=movies_df.merge(credits_df, on='id').merge(keywords_df, on= 'id')
movies_df_join.shape

(45320, 28)

considering only 8 important columns for our model

In [30]:
movies_df_join = movies_df_join[['id','title','overview','genres','keywords','cast','crew','year']]

In [31]:
movies_df_join.head(1)

Unnamed: 0,id,title,overview,genres,keywords,cast,crew,year
0,862,Toy Story,"Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto the scene. Afraid of losing his place in Andy's heart, Woody plots against Buzz. But when circumstances separate Buzz and Woody from their owner, the duo eventually learns to put aside their differences.","[{'id': 16, 'name': 'Animation'}, {'id': 35, 'name': 'Comedy'}, {'id': 10751, 'name': 'Family'}]","[{'id': 931, 'name': 'jealousy'}, {'id': 4290, 'name': 'toy'}, {'id': 5202, 'name': 'boy'}, {'id': 6054, 'name': 'friendship'}, {'id': 9713, 'name': 'friends'}, {'id': 9823, 'name': 'rivalry'}, {'id': 165503, 'name': 'boy next door'}, {'id': 170722, 'name': 'new toy'}, {'id': 187065, 'name': 'toy comes to life'}]","[{'cast_id': 14, 'character': 'Woody (voice)', 'credit_id': '52fe4284c3a36847f8024f95', 'gender': 2, 'id': 31, 'name': 'Tom Hanks', 'order': 0, 'profile_path': '/pQFoyx7rp09CJTAb932F2g8Nlho.jpg'}, {'cast_id': 15, 'character': 'Buzz Lightyear (voice)', 'credit_id': '52fe4284c3a36847f8024f99', 'gender': 2, 'id': 12898, 'name': 'Tim Allen', 'order': 1, 'profile_path': '/uX2xVf6pMmPepxnvFWyBtjexzgY.jpg'}, {'cast_id': 16, 'character': 'Mr. Potato Head (voice)', 'credit_id': '52fe4284c3a36847f8024f9d', 'gender': 2, 'id': 7167, 'name': 'Don Rickles', 'order': 2, 'profile_path': '/h5BcaDMPRVLHLDzbQavec4xfSdt.jpg'}, {'cast_id': 17, 'character': 'Slinky Dog (voice)', 'credit_id': '52fe4284c3a36847f8024fa1', 'gender': 2, 'id': 12899, 'name': 'Jim Varney', 'order': 3, 'profile_path': '/eIo2jVVXYgjDtaHoF19Ll9vtW7h.jpg'}, {'cast_id': 18, 'character': 'Rex (voice)', 'credit_id': '52fe4284c3a36847f8024fa5', 'gender': 2, 'id': 12900, 'name': 'Wallace Shawn', 'order': 4, 'profile_path': '/oGE6JqPP2xH4tNORKNqxbNPYi7u.jpg'}, {'cast_id': 19, 'character': 'Hamm (voice)', 'credit_id': '52fe4284c3a36847f8024fa9', 'gender': 2, 'id': 7907, 'name': 'John Ratzenberger', 'order': 5, 'profile_path': '/yGechiKWL6TJDfVE2KPSJYqdMsY.jpg'}, {'cast_id': 20, 'character': 'Bo Peep (voice)', 'credit_id': '52fe4284c3a36847f8024fad', 'gender': 1, 'id': 8873, 'name': 'Annie Potts', 'order': 6, 'profile_path': '/eryXT84RL41jHSJcMy4kS3u9y6w.jpg'}, {'cast_id': 26, 'character': 'Andy (voice)', 'credit_id': '52fe4284c3a36847f8024fc1', 'gender': 0, 'id': 1116442, 'name': 'John Morris', 'order': 7, 'profile_path': '/vYGyvK4LzeaUCoNSHtsuqJUY15M.jpg'}, {'cast_id': 22, 'character': 'Sid (voice)', 'credit_id': '52fe4284c3a36847f8024fb1', 'gender': 2, 'id': 12901, 'name': 'Erik von Detten', 'order': 8, 'profile_path': '/twnF1ZaJ1FUNUuo6xLXwcxjayBE.jpg'}, {'cast_id': 23, 'character': 'Mrs. Davis (voice)', 'credit_id': '52fe4284c3a36847f8024fb5', 'gender': 1, 'id': 12133, 'name': 'Laurie Metcalf', 'order': 9, 'profile_path': '/unMMIT60eoBM2sN2nyR7EZ2BvvD.jpg'}, {'cast_id': 24, 'character': 'Sergeant (voice)', 'credit_id': '52fe4284c3a36847f8024fb9', 'gender': 2, 'id': 8655, 'name': 'R. Lee Ermey', 'order': 10, 'profile_path': '/r8GBqFBjypLUP9VVqDqfZ7wYbSs.jpg'}, {'cast_id': 25, 'character': 'Hannah (voice)', 'credit_id': '52fe4284c3a36847f8024fbd', 'gender': 1, 'id': 12903, 'name': 'Sarah Freeman', 'order': 11, 'profile_path': None}, {'cast_id': 27, 'character': 'TV Announcer (voice)', 'credit_id': '52fe4284c3a36847f8024fc5', 'gender': 2, 'id': 37221, 'name': 'Penn Jillette', 'order': 12, 'profile_path': '/zmAaXUdx12NRsssgHbk1T31j2x9.jpg'}]","[{'credit_id': '52fe4284c3a36847f8024f49', 'department': 'Directing', 'gender': 2, 'id': 7879, 'job': 'Director', 'name': 'John Lasseter', 'profile_path': '/7EdqiNbr4FRjIhKHyPPdFfEEEFG.jpg'}, {'credit_id': '52fe4284c3a36847f8024f4f', 'department': 'Writing', 'gender': 2, 'id': 12891, 'job': 'Screenplay', 'name': 'Joss Whedon', 'profile_path': '/dTiVsuaTVTeGmvkhcyJvKp2A5kr.jpg'}, {'credit_id': '52fe4284c3a36847f8024f55', 'department': 'Writing', 'gender': 2, 'id': 7, 'job': 'Screenplay', 'name': 'Andrew Stanton', 'profile_path': '/pvQWsu0qc8JFQhMVJkTHuexUAa1.jpg'}, {'credit_id': '52fe4284c3a36847f8024f5b', 'department': 'Writing', 'gender': 2, 'id': 12892, 'job': 'Screenplay', 'name': 'Joel Cohen', 'profile_path': '/dAubAiZcvKFbboWlj7oXOkZnTSu.jpg'}, {'credit_id': '52fe4284c3a36847f8024f61', 'department': 'Writing', 'gender': 0, 'id': 12893, 'job': 'Screenplay', 'name': 'Alec Sokolow', 'profile_path': '/v79vlRYi94BZUQnkkyznbGUZLjT.jpg'}, {'credit_id': '52fe4284c3a36847f8024f67', 'department': 'Production', 'gender': 1, 'id': 12894, 'job': 'Producer', 'name': 'Bonnie Arnold', 'profile_path': None}, {'credit_id': '52fe4284c3a36847f8024f6d', 'department': 'Production', 'gender': 0, 'id': 12895, 'job': 'Executive Producer', 'name': 'Ed Catmull', 'profile_path': None}, {'credit_id': '52fe4284c3a36847f8024f73', 'department': 'Production', 'gender': 2, 'id': 12896, 'job': 'Producer', 'name': 'Ralph Guggenheim', 'profile_path': None}, {'credit_id': '52fe4284c3a36847f8024f79', 'department': 'Production', 'gender': 2, 'id': 12897, 'job': 'Executive Producer', 'name': 'Steve Jobs', 'profile_path': '/mOMP3SwD5qWQSR0ldCIByd3guTV.jpg'}, {'credit_id': '52fe4284c3a36847f8024f8b', 'department': 'Editing', 'gender': 2, 'id': 8, 'job': 'Editor', 'name': 'Lee Unkrich', 'profile_path': '/bdTCCXjgOV3YyaNmLGYGOxFQMOc.jpg'}, {'credit_id': '52fe4284c3a36847f8024f91', 'department': 'Art', 'gender': 2, 'id': 7883, 'job': 'Art Direction', 'name': 'Ralph Eggleston', 'profile_path': '/uUfcGKDsKO1aROMpXRs67Hn6RvR.jpg'}, {'credit_id': '598331bf925141421201044b', 'department': 'Editing', 'gender': 2, 'id': 1168870, 'job': 'Editor', 'name': 'Robert Gordon', 'profile_path': None}, {'credit_id': '5892168cc3a36809660095f9', 'department': 'Sound', 'gender': 0, 'id': 1552883, 'job': 'Foley Editor', 'name': 'Mary Helen Leasman', 'profile_path': None}, {'credit_id': '5531824d9251415289000945', 'department': 'Visual Effects', 'gender': 0, 'id': 1453514, 'job': 'Animation', 'name': 'Kim Blanchette', 'profile_path': None}, {'credit_id': '589215969251412dcb009bf6', 'department': 'Sound', 'gender': 0, 'id': 1414182, 'job': 'ADR Editor', 'name': 'Marilyn McCoppen', 'profile_path': None}, {'credit_id': '589217099251412dc500a018', 'department': 'Sound', 'gender': 2, 'id': 7885, 'job': 'Orchestrator', 'name': 'Randy Newman', 'profile_path': '/w0JzfoiM25nrnxYOzosPHRq6mlE.jpg'}, {'credit_id': '5693e6b29251417b0e0000e3', 'department': 'Editing', 'gender': 0, 'id': 1429549, 'job': 'Color Timer', 'name': 'Dale E. Grahn', 'profile_path': None}, {'credit_id': '572e2522c3a36869e6001a9c', 'department': 'Visual Effects', 'gender': 0, 'id': 7949, 'job': 'CG Painter', 'name': 'Robin Cooper', 'profile_path': None}, {'credit_id': '574f12309251415ca1000012', 'department': 'Writing', 'gender': 2, 'id': 7879, 'job': 'Original Story', 'name': 'John Lasseter', 'profile_path': '/7EdqiNbr4FRjIhKHyPPdFfEEEFG.jpg'}, {'credit_id': '574f1240c3a3682e7300001c', 'department': 'Writing', 'gender': 2, 'id': 12890, 'job': 'Original Story', 'name': 'Pete Docter', 'profile_path': '/r6ngPgnReA3RHmKjmSoVsc6Awjp.jpg'}, {'credit_id': '574f12519251415c92000015', 'department': 'Writing', 'gender': 0, 'id': 7911, 'job': 'Original Story', 'name': 'Joe Ranft', 'profile_path': '/f1BoWC2JbCcfP1e5hKfGsxkHzVU.jpg'}, {'credit_id': '574f12cec3a3682e82000022', 'department': 'Crew', 'gender': 0, 'id': 1629419, 'job': 'Post Production Supervisor', 'name': 'Patsy Bouge', 'profile_path': None}, {'credit_id': '574f14f19251415ca1000082', 'department': 'Art', 'gender': 0, 'id': 7961, 'job': 'Sculptor', 'name': 'Norm DeCarlo', 'profile_path': None}, {'credit_id': '5751ae4bc3a3683772002b7f', 'department': 'Visual Effects', 'gender': 2, 'id': 12905, 'job': 'Animation Director', 'name': 'Ash Brannon', 'profile_path': '/6ueWgPEEBHvS3De2BHYQnYjRTig.jpg'}, {'credit_id': '5891edbe9251412dc5007cd6', 'department': 'Sound', 'gender': 2, 'id': 7885, 'job': 'Music', 'name': 'Randy Newman', 'profile_path': '/w0JzfoiM25nrnxYOzosPHRq6mlE.jpg'}, {'credit_id': '589213d39251412dc8009832', 'department': 'Directing', 'gender': 0, 'id': 1748707, 'job': 'Layout', 'name': 'Roman Figun', 'profile_path': None}, {'credit_id': '5892173dc3a3680968009351', 'department': 'Sound', 'gender': 2, 'id': 4949, 'job': 'Orchestrator', 'name': 'Don Davis', 'profile_path': None}, {'credit_id': '589217cec3a3686b0a0052ba', 'department': 'Sound', 'gender': 0, 'id': 1372885, 'job': 'Music Editor', 'name': 'James Flamberg', 'profile_path': None}, {'credit_id': '58921831c3a3686348004a64', 'department': 'Editing', 'gender': 0, 'id': 1739962, 'job': 'Negative Cutter', 'name': 'Mary Beth Smith', 'profile_path': None}, {'credit_id': '58921838c3a36809700096c0', 'department': 'Editing', 'gender': 0, 'id': 1748513, 'job': 'Negative Cutter', 'name': 'Rick Mackay', 'profile_path': None}, {'credit_id': '589218429251412dd1009d1b', 'department': 'Art', 'gender': 0, 'id': 1458006, 'job': 'Title Designer', 'name': 'Susan Bradley', 'profile_path': None}, {'credit_id': '5891ed99c3a3680966007670', 'department': 'Crew', 'gender': 0, 'id': 1748557, 'job': 'Supervising Technical Director', 'name': 'William Reeves', 'profile_path': None}, {'credit_id': '5891edcec3a3686b0a002eb2', 'department': 'Sound', 'gender': 2, 'id': 7885, 'job': 'Songs', 'name': 'Randy Newman', 'profile_path': '/w0JzfoiM25nrnxYOzosPHRq6mlE.jpg'}, {'credit_id': '5891edf9c3a36809700075e6', 'department': 'Writing', 'gender': 2, 'id': 7, 'job': 'Original Story', 'name': 'Andrew Stanton', 'profile_path': '/pvQWsu0qc8JFQhMVJkTHuexUAa1.jpg'}, {'credit_id': '58920f0b9251412dd7009104', 'department': 'Crew', 'gender': 2, 'id': 12890, 'job': 'Supervising Animator', 'name': 'Pete Docter', 'profile_path': '/r6ngPgnReA3RHmKjmSoVsc6Awjp.jpg'}, {'credit_id': '58920f1fc3a3680977009021', 'department': 'Sound', 'gender': 2, 'id': 2216, 'job': 'Sound Designer', 'name': 'Gary Rydstrom', 'profile_path': '/jZpr1nVfO7lldWI0YtmP1FGw7Rj.jpg'}, {'credit_id': '58920f389251412dd700912d', 'department': 'Production', 'gender': 0, 'id': 12909, 'job': 'Production Supervisor', 'name': 'Karen Robert Jackson', 'profile_path': None}, {'credit_id': '58920fbd9251412dcb00969c', 'department': 'Crew', 'gender': 0, 'id': 953331, 'job': 'Executive Music Producer', 'name': 'Chris Montan', 'profile_path': None}, {'credit_id': '589210069251412dd7009219', 'department': 'Visual Effects', 'gender': 0, 'id': 7893, 'job': 'Animation Director', 'name': 'Rich Quade', 'profile_path': None}, {'credit_id': '589210329251412dcd00943b', 'department': 'Visual Effects', 'gender': 0, 'id': 8025, 'job': 'Animation', 'name': 'Michael Berenstein', 'profile_path': None}, {'credit_id': '5892103bc3a368096a009180', 'department': 'Visual Effects', 'gender': 0, 'id': 78009, 'job': 'Animation', 'name': 'Colin Brady', 'profile_path': None}, {'credit_id': '5892105dc3a3680968008db2', 'department': 'Visual Effects', 'gender': 0, 'id': 1748682, 'job': 'Animation', 'name': 'Davey Crockett Feiten', 'profile_path': None}, {'credit_id': '589210669251412dcd009466', 'department': 'Visual Effects', 'gender': 0, 'id': 1454030, 'job': 'Animation', 'name': 'Angie Glocka', 'profile_path': None}, {'credit_id': '5892107c9251412dd1009613', 'department': 'Visual Effects', 'gender': 0, 'id': 1748683, 'job': 'Animation', 'name': 'Rex Grignon', 'profile_path': None}, {'credit_id': '5892108ac3a3680973008d3f', 'department': 'Visual Effects', 'gender': 0, 'id': 1748684, 'job': 'Animation', 'name': 'Tom K. Gurney', 'profile_path': None}, {'credit_id': '58921093c3a3686348004477', 'department': 'Visual Effects', 'gender': 2, 'id': 8029, 'job': 'Animation', 'name': 'Jimmy Hayward', 'profile_path': '/lTDRpudEY7BDwTefXbXzMlmb0ui.jpg'}, {'credit_id': '5892109b9251412dcd0094b0', 'department': 'Visual Effects', 'gender': 0, 'id': 1426773, 'job': 'Animation', 'name': 'Hal T. Hickel', 'profile_path': None}, {'credit_id': '589210a29251412dc5009a29', 'department': 'Visual Effects', 'gender': 0, 'id': 8035, 'job': 'Animation', 'name': 'Karen Kiser', 'profile_path': None}, {'credit_id': '589210ccc3a3680977009191', 'department': 'Visual Effects', 'gender': 0, 'id': 1748688, 'job': 'Animation', 'name': 'Anthony B. LaMolinara', 'profile_path': None}, {'credit_id': '589210d7c3a3686b0a004c1f', 'department': 'Visual Effects', 'gender': 0, 'id': 587314, 'job': 'Animation', 'name': 'Guionne Leroy', 'profile_path': None}, {'credit_id': '589210e1c3a36809770091a7', 'department': 'Visual Effects', 'gender': 2, 'id': 7918, 'job': 'Animation', 'name': 'Bud Luckey', 'profile_path': '/pcCh7G19FKMNijmPQg1PMH1btic.jpg'}, {'credit_id': '589210ee9251412dc200978a', 'department': 'Visual Effects', 'gender': 0, 'id': 1748689, 'job': 'Animation', 'name': 'Les Major', 'profile_path': None}, {'credit_id': '589210fa9251412dc8009595', 'department': 'Visual Effects', 'gender': 2, 'id': 7892, 'job': 'Animation', 'name': 'Glenn McQueen', 'profile_path': None}, {'credit_id': '589211029251412dc8009598', 'department': 'Visual Effects', 'gender': 0, 'id': 555795, 'job': 'Animation', 'name': 'Mark Oftedal', 'profile_path': None}, {'credit_id': '5892110b9251412dc800959d', 'department': 'Visual Effects', 'gender': 2, 'id': 7882, 'job': 'Animation', 'name': 'Jeff Pidgeon', 'profile_path': '/yLddkg5HcgbJg00cS13GVBnP0HY.jpg'}, {'credit_id': '58921113c3a36863480044e4', 'department': 'Visual Effects', 'gender': 0, 'id': 8017, 'job': 'Animation', 'name': 'Jeff Pratt', 'profile_path': None}, {'credit_id': '5892111c9251412dcb0097e9', 'department': 'Visual Effects', 'gender': 0, 'id': 1184140, 'job': 'Animation', 'name': 'Steve Rabatich', 'profile_path': None}, {'credit_id': '58921123c3a36809700090f6', 'department': 'Visual Effects', 'gender': 0, 'id': 8049, 'job': 'Animation', 'name': 'Roger Rose', 'profile_path': None}, {'credit_id': '5892112b9251412dcb0097fb', 'department': 'Visual Effects', 'gender': 0, 'id': 1509559, 'job': 'Animation', 'name': 'Steve Segal', 'profile_path': None}, {'credit_id': '589211349251412dc80095c3', 'department': 'Visual Effects', 'gender': 0, 'id': 1748691, 'job': 'Animation', 'name': 'Doug Sheppeck', 'profile_path': None}, {'credit_id': '5892113cc3a3680970009106', 'department': 'Visual Effects', 'gender': 0, 'id': 8050, 'job': 'Animation', 'name': 'Alan Sperling', 'profile_path': None}, {'credit_id': '58921148c3a3686b0a004c99', 'department': 'Visual Effects', 'gender': 0, 'id': 8010, 'job': 'Animation', 'name': 'Doug Sweetland', 'profile_path': None}, {'credit_id': '58921150c3a3680966009125', 'department': 'Visual Effects', 'gender': 0, 'id': 8044, 'job': 'Animation', 'name': 'David Tart', 'profile_path': None}, {'credit_id': '589211629251412dc5009b00', 'department': 'Visual Effects', 'gender': 0, 'id': 1454034, 'job': 'Animation', 'name': 'Ken Willard', 'profile_path': None}, {'credit_id': '589211c1c3a3686b0a004d28', 'department': 'Visual Effects', 'gender': 0, 'id': 7887, 'job': 'Visual Effects Supervisor', 'name': 'Thomas Porter', 'profile_path': None}, {'credit_id': '589211d4c3a3680968008ed9', 'department': 'Visual Effects', 'gender': 0, 'id': 1406878, 'job': 'Visual Effects', 'name': 'Mark Thomas Henne', 'profile_path': None}, {'credit_id': '589211f59251412dd4008e65', 'department': 'Visual Effects', 'gender': 0, 'id': 1748698, 'job': 'Visual Effects', 'name': 'Oren Jacob', 'profile_path': None}, {'credit_id': '58921242c3a368096a00939b', 'department': 'Visual Effects', 'gender': 0, 'id': 1748699, 'job': 'Visual Effects', 'name': 'Darwyn Peachey', 'profile_path': None}, {'credit_id': '5892124b9251412dc5009bd2', 'department': 'Visual Effects', 'gender': 0, 'id': 1748701, 'job': 'Visual Effects', 'name': 'Mitch Prater', 'profile_path': None}, {'credit_id': '58921264c3a3686b0a004dbf', 'department': 'Visual Effects', 'gender': 0, 'id': 1748703, 'job': 'Visual Effects', 'name': 'Brian M. Rosen', 'profile_path': None}, {'credit_id': '589212709251412dcd009676', 'department': 'Lighting', 'gender': 1, 'id': 12912, 'job': 'Lighting Supervisor', 'name': 'Sharon Calahan', 'profile_path': None}, {'credit_id': '5892127fc3a3686b0a004de5', 'department': 'Lighting', 'gender': 0, 'id': 7899, 'job': 'Lighting Supervisor', 'name': 'Galyn Susman', 'profile_path': None}, {'credit_id': '589212cdc3a3680970009268', 'department': 'Visual Effects', 'gender': 0, 'id': 12915, 'job': 'CG Painter', 'name': 'William Cone', 'profile_path': None}, {'credit_id': '5892130f9251412dc8009791', 'department': 'Art', 'gender': 0, 'id': 1748705, 'job': 'Sculptor', 'name': 'Shelley Daniels Lekven', 'profile_path': None}, {'credit_id': '5892131c9251412dd4008f4c', 'department': 'Visual Effects', 'gender': 2, 'id': 7889, 'job': 'Character Designer', 'name': 'Bob Pauley', 'profile_path': None}, {'credit_id': '589213249251412dd100987b', 'department': 'Visual Effects', 'gender': 2, 'id': 7918, 'job': 'Character Designer', 'name': 'Bud Luckey', 'profile_path': '/pcCh7G19FKMNijmPQg1PMH1btic.jpg'}, {'credit_id': '5892132b9251412dc80097b1', 'department': 'Visual Effects', 'gender': 2, 'id': 7, 'job': 'Character Designer', 'name': 'Andrew Stanton', 'profile_path': '/pvQWsu0qc8JFQhMVJkTHuexUAa1.jpg'}, {'credit_id': '58921332c3a368634800467b', 'department': 'Visual Effects', 'gender': 0, 'id': 12915, 'job': 'Character Designer', 'name': 'William Cone', 'profile_path': None}, {'credit_id': '5892135f9251412dd4008f90', 'department': 'Visual Effects', 'gender': 0, 'id': 1748706, 'job': 'Character Designer', 'name': 'Steve Johnson', 'profile_path': None}, {'credit_id': '58921384c3a3680973008fd4', 'department': 'Visual Effects', 'gender': 0, 'id': 1176752, 'job': 'Character Designer', 'name': 'Dan Haskett', 'profile_path': None}, {'credit_id': '5892138e9251412dc20099fc', 'department': 'Visual Effects', 'gender': 0, 'id': 1088034, 'job': 'Character Designer', 'name': 'Tom Holloway', 'profile_path': '/a0r0T2usTBpgMI5aZbRBDW1fTl8.jpg'}, {'credit_id': '58921395c3a368097700942f', 'department': 'Visual Effects', 'gender': 0, 'id': 1447465, 'job': 'Character Designer', 'name': 'Jean Gillmore', 'profile_path': None}, {'credit_id': '589213e2c3a3680973009026', 'department': 'Directing', 'gender': 0, 'id': 1748709, 'job': 'Layout', 'name': 'Desirée Mourad', 'profile_path': None}, {'credit_id': '589214099251412dc5009d57', 'department': 'Art', 'gender': 0, 'id': 1748710, 'job': 'Set Dresser', 'name': ""Kelly O'Connell"", 'profile_path': None}, {'credit_id': '58921411c3a3686b0a004f70', 'department': 'Art', 'gender': 0, 'id': 1443471, 'job': 'Set Dresser', 'name': 'Sonoko Konishi', 'profile_path': None}, {'credit_id': '58921434c3a368096a00956e', 'department': 'Art', 'gender': 0, 'id': 1748711, 'job': 'Set Dresser', 'name': 'Ann M. Rockwell', 'profile_path': None}, {'credit_id': '5892144ac3a36809680090de', 'department': 'Editing', 'gender': 0, 'id': 1748712, 'job': 'Editorial Manager', 'name': 'Julie M. McDonald', 'profile_path': None}, {'credit_id': '58921479c3a368096800910f', 'department': 'Editing', 'gender': 0, 'id': 1589729, 'job': 'Assistant Editor', 'name': 'Robin Lee', 'profile_path': None}, {'credit_id': '5892148b9251412dd10099cc', 'department': 'Editing', 'gender': 0, 'id': 1748716, 'job': 'Assistant Editor', 'name': 'Tom Freeman', 'profile_path': None}, {'credit_id': '589214959251412dcb009b1f', 'department': 'Editing', 'gender': 0, 'id': 1748717, 'job': 'Assistant Editor', 'name': 'Ada Cochavi', 'profile_path': None}, {'credit_id': '5892149ec3a3686348004798', 'department': 'Editing', 'gender': 0, 'id': 1336438, 'job': 'Assistant Editor', 'name': 'Dana Mulligan', 'profile_path': None}, {'credit_id': '589214adc3a368096a0095db', 'department': 'Editing', 'gender': 0, 'id': 1748718, 'job': 'Editorial Coordinator', 'name': 'Deirdre Morrison', 'profile_path': None}, {'credit_id': '589214c7c3a368097700952b', 'department': 'Production', 'gender': 0, 'id': 1748719, 'job': 'Production Coordinator', 'name': 'Lori Lombardo', 'profile_path': None}, {'credit_id': '589214cec3a368096a009603', 'department': 'Production', 'gender': 0, 'id': 1748720, 'job': 'Production Coordinator', 'name': 'Ellen Devine', 'profile_path': None}, {'credit_id': '589214e39251412dc8009904', 'department': 'Crew', 'gender': 0, 'id': 1468014, 'job': 'Unit Publicist', 'name': 'Lauren Beth Strogoff', 'profile_path': None}, {'credit_id': '58921544c3a3686b0a00507d', 'department': 'Sound', 'gender': 2, 'id': 2216, 'job': 'Sound Re-Recording Mixer', 'name': 'Gary Rydstrom', 'profile_path': '/jZpr1nVfO7lldWI0YtmP1FGw7Rj.jpg'}, {'credit_id': '5892154c9251412dd1009a56', 'department': 'Sound', 'gender': 0, 'id': 1425978, 'job': 'Sound Re-Recording Mixer', 'name': 'Gary Summers', 'profile_path': None}, {'credit_id': '58921555c3a36809680091bd', 'department': 'Sound', 'gender': 2, 'id': 8276, 'job': 'Supervising Sound Editor', 'name': 'Tim Holland', 'profile_path': None}, {'credit_id': '589215c39251412dcb009c12', 'department': 'Sound', 'gender': 0, 'id': 7069, 'job': 'Sound Effects Editor', 'name': 'Pat Jackson', 'profile_path': None}, {'credit_id': '58921698c3a368096a009788', 'department': 'Crew', 'gender': 2, 'id': 15894, 'job': 'Sound Design Assistant', 'name': 'Tom Myers', 'profile_path': None}, {'credit_id': '589216a89251412dc2009ca4', 'department': 'Sound', 'gender': 0, 'id': 1414177, 'job': 'Assistant Sound Editor', 'name': 'J.R. Grubbs', 'profile_path': None}, {'credit_id': '589216c19251412dc2009cb9', 'department': 'Sound', 'gender': 1, 'id': 1748724, 'job': 'Assistant Sound Editor', 'name': 'Susan Sanford', 'profile_path': None}, {'credit_id': '589216ccc3a3680973009274', 'department': 'Sound', 'gender': 0, 'id': 1748725, 'job': 'Assistant Sound Editor', 'name': 'Susan Popovic', 'profile_path': None}, {'credit_id': '589216d79251412dc8009aa0', 'department': 'Sound', 'gender': 0, 'id': 8067, 'job': 'Assistant Sound Editor', 'name': 'Dan Engstrom', 'profile_path': None}, {'credit_id': '589216e49251412dcd009a4f', 'department': 'Production', 'gender': 1, 'id': 7902, 'job': 'Casting Consultant', 'name': 'Ruth Lambert', 'profile_path': None}, {'credit_id': '589216f39251412dc2009cf3', 'department': 'Production', 'gender': 0, 'id': 84493, 'job': 'ADR Voice Casting', 'name': 'Mickie McGowan', 'profile_path': '/k7TjJBfINsg8vLQxJwos6XObAD6.jpg'}]",[1995]


creating a function "convert(text)" to extract "name value" from the list of dictionaries both from genres and keywords

In [32]:
import ast

In [33]:
def convert(text):
    L = []
    for i in ast.literal_eval(text):
        L.append(i['name']) 
    return L 

now applying the function over both the column genres and keywords with apply function

In [34]:
movies_df_join['genres'] = movies_df_join['genres'].apply(convert)


In [35]:
movies_df_join['keywords'] = movies_df_join['keywords'].apply(convert)


creating a function "top_3_cast(text)" to extract "top 3 name values" or top 3 casts of the movie from the list of dictionaries of "cast"

In [36]:
#top 3 starring cast
def top_3_cast(text):
    L = []
    counter = 0
    for i in ast.literal_eval(text):
        if counter < 3:
            L.append(i['name'])
        counter+=1
    return L

In [37]:
movies_df_join['cast'] = movies_df_join['cast'].apply(top_3_cast)
movies_df_join['cast'] 

0        [Tom Hanks, Tim Allen, Don Rickles]                
1        [Robin Williams, Jonathan Hyde, Kirsten Dunst]     
2        [Walter Matthau, Jack Lemmon, Ann-Margret]         
3        [Whitney Houston, Angela Bassett, Loretta Devine]  
4        [Steve Martin, Diane Keaton, Martin Short]         
                            ...                             
45315    [Leila Hatami, Kourosh Tahami, Elham Korda]        
45316    [Angel Aquino, Perry Dizon, Hazel Orencio]         
45317    [Erika Eleniak, Adam Baldwin, Julie du Page]       
45318    [Iwan Mosschuchin, Nathalie Lissenko, Pavel Pavlov]
45319    []                                                 
Name: cast, Length: 45320, dtype: object

creating a function "fetch_director(text)" to extract "director name" from the list of dictionaries of crew

In [38]:
def fetch_director(text):
    L = []
    for i in ast.literal_eval(text):
        if i['job'] == 'Director':
            L.append(i['name'])
    return L 

In [39]:
movies_df_join['crew'] = movies_df_join['crew'].apply(fetch_director)

In [40]:
#checking our function how it looks now after applying the fucntions
movies_df_join.head()

Unnamed: 0,id,title,overview,genres,keywords,cast,crew,year
0,862,Toy Story,"Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto the scene. Afraid of losing his place in Andy's heart, Woody plots against Buzz. But when circumstances separate Buzz and Woody from their owner, the duo eventually learns to put aside their differences.","[Animation, Comedy, Family]","[jealousy, toy, boy, friendship, friends, rivalry, boy next door, new toy, toy comes to life]","[Tom Hanks, Tim Allen, Don Rickles]",[John Lasseter],[1995]
1,8844,Jumanji,"When siblings Judy and Peter discover an enchanted board game that opens the door to a magical world, they unwittingly invite Alan -- an adult who's been trapped inside the game for 26 years -- into their living room. Alan's only hope for freedom is to finish the game, which proves risky as all three find themselves running from giant rhinoceroses, evil monkeys and other terrifying creatures.","[Adventure, Fantasy, Family]","[board game, disappearance, based on children's book, new home, recluse, giant insect]","[Robin Williams, Jonathan Hyde, Kirsten Dunst]",[Joe Johnston],[1995]
2,15602,Grumpier Old Men,"A family wedding reignites the ancient feud between next-door neighbors and fishing buddies John and Max. Meanwhile, a sultry Italian divorcée opens a restaurant at the local bait shop, alarming the locals who worry she'll scare the fish away. But she's less interested in seafood than she is in cooking up a hot time with Max.","[Romance, Comedy]","[fishing, best friend, duringcreditsstinger, old men]","[Walter Matthau, Jack Lemmon, Ann-Margret]",[Howard Deutch],[1995]
3,31357,Waiting to Exhale,"Cheated on, mistreated and stepped on, the women are holding their breath, waiting for the elusive ""good man"" to break a string of less-than-stellar lovers. Friends and confidants Vannah, Bernie, Glo and Robin talk it all out, determined to find a better way to breathe.","[Comedy, Drama, Romance]","[based on novel, interracial relationship, single mother, divorce, chick flick]","[Whitney Houston, Angela Bassett, Loretta Devine]",[Forest Whitaker],[1995]
4,11862,Father of the Bride Part II,"Just when George Banks has recovered from his daughter's wedding, he receives the news that she's pregnant ... and that George's wife, Nina, is expecting too. He was planning on selling their home, but that's a plan that -- like George -- will have to change with the arrival of both a grandchild and a kid of his own.",[Comedy],"[baby, midlife crisis, confidence, aging, daughter, mother daughter relationship, pregnancy, contraception, gynecologist]","[Steve Martin, Diane Keaton, Martin Short]",[Charles Shyer],[1995]


removing whitespace between name to treat it as a single entity as only first or last name can lead to confusion for the model

In [41]:
def collapse(L):
    L1 = []
    for i in L:
        L1.append(i.replace(" ",""))
    return L1

In [42]:
movies_df_join['genres'] = movies_df_join['genres'].apply(collapse)
movies_df_join['keywords'] = movies_df_join['keywords'].apply(collapse)
movies_df_join['cast'] = movies_df_join['cast'].apply(collapse)
movies_df_join['crew'] = movies_df_join['crew'].apply(collapse)


splitting overview in a list

In [43]:
movies_df_join['overview'] = movies_df_join['overview'].apply(lambda x:x.split())


joining all the columns in one column called tags to train the model

In [44]:
movies_df_join['tags'] = movies_df_join['overview'] + movies_df_join['genres'] + movies_df_join['keywords'] + movies_df_join['cast'] + movies_df_join['crew']  + movies_df_join['year']
movies_df_join.head()

Unnamed: 0,id,title,overview,genres,keywords,cast,crew,year,tags
0,862,Toy Story,"[Led, by, Woody,, Andy's, toys, live, happily, in, his, room, until, Andy's, birthday, brings, Buzz, Lightyear, onto, the, scene., Afraid, of, losing, his, place, in, Andy's, heart,, Woody, plots, against, Buzz., But, when, circumstances, separate, Buzz, and, Woody, from, their, owner,, the, duo, eventually, learns, to, put, aside, their, differences.]","[Animation, Comedy, Family]","[jealousy, toy, boy, friendship, friends, rivalry, boynextdoor, newtoy, toycomestolife]","[TomHanks, TimAllen, DonRickles]",[JohnLasseter],[1995],"[Led, by, Woody,, Andy's, toys, live, happily, in, his, room, until, Andy's, birthday, brings, Buzz, Lightyear, onto, the, scene., Afraid, of, losing, his, place, in, Andy's, heart,, Woody, plots, against, Buzz., But, when, circumstances, separate, Buzz, and, Woody, from, their, owner,, the, duo, eventually, learns, to, put, aside, their, differences., Animation, Comedy, Family, jealousy, toy, boy, friendship, friends, rivalry, boynextdoor, newtoy, toycomestolife, TomHanks, TimAllen, DonRickles, JohnLasseter, 1995]"
1,8844,Jumanji,"[When, siblings, Judy, and, Peter, discover, an, enchanted, board, game, that, opens, the, door, to, a, magical, world,, they, unwittingly, invite, Alan, --, an, adult, who's, been, trapped, inside, the, game, for, 26, years, --, into, their, living, room., Alan's, only, hope, for, freedom, is, to, finish, the, game,, which, proves, risky, as, all, three, find, themselves, running, from, giant, rhinoceroses,, evil, monkeys, and, other, terrifying, creatures.]","[Adventure, Fantasy, Family]","[boardgame, disappearance, basedonchildren'sbook, newhome, recluse, giantinsect]","[RobinWilliams, JonathanHyde, KirstenDunst]",[JoeJohnston],[1995],"[When, siblings, Judy, and, Peter, discover, an, enchanted, board, game, that, opens, the, door, to, a, magical, world,, they, unwittingly, invite, Alan, --, an, adult, who's, been, trapped, inside, the, game, for, 26, years, --, into, their, living, room., Alan's, only, hope, for, freedom, is, to, finish, the, game,, which, proves, risky, as, all, three, find, themselves, running, from, giant, rhinoceroses,, evil, monkeys, and, other, terrifying, creatures., Adventure, Fantasy, Family, boardgame, disappearance, basedonchildren'sbook, newhome, recluse, giantinsect, RobinWilliams, JonathanHyde, KirstenDunst, JoeJohnston, 1995]"
2,15602,Grumpier Old Men,"[A, family, wedding, reignites, the, ancient, feud, between, next-door, neighbors, and, fishing, buddies, John, and, Max., Meanwhile,, a, sultry, Italian, divorcée, opens, a, restaurant, at, the, local, bait, shop,, alarming, the, locals, who, worry, she'll, scare, the, fish, away., But, she's, less, interested, in, seafood, than, she, is, in, cooking, up, a, hot, time, with, Max.]","[Romance, Comedy]","[fishing, bestfriend, duringcreditsstinger, oldmen]","[WalterMatthau, JackLemmon, Ann-Margret]",[HowardDeutch],[1995],"[A, family, wedding, reignites, the, ancient, feud, between, next-door, neighbors, and, fishing, buddies, John, and, Max., Meanwhile,, a, sultry, Italian, divorcée, opens, a, restaurant, at, the, local, bait, shop,, alarming, the, locals, who, worry, she'll, scare, the, fish, away., But, she's, less, interested, in, seafood, than, she, is, in, cooking, up, a, hot, time, with, Max., Romance, Comedy, fishing, bestfriend, duringcreditsstinger, oldmen, WalterMatthau, JackLemmon, Ann-Margret, HowardDeutch, 1995]"
3,31357,Waiting to Exhale,"[Cheated, on,, mistreated, and, stepped, on,, the, women, are, holding, their, breath,, waiting, for, the, elusive, ""good, man"", to, break, a, string, of, less-than-stellar, lovers., Friends, and, confidants, Vannah,, Bernie,, Glo, and, Robin, talk, it, all, out,, determined, to, find, a, better, way, to, breathe.]","[Comedy, Drama, Romance]","[basedonnovel, interracialrelationship, singlemother, divorce, chickflick]","[WhitneyHouston, AngelaBassett, LorettaDevine]",[ForestWhitaker],[1995],"[Cheated, on,, mistreated, and, stepped, on,, the, women, are, holding, their, breath,, waiting, for, the, elusive, ""good, man"", to, break, a, string, of, less-than-stellar, lovers., Friends, and, confidants, Vannah,, Bernie,, Glo, and, Robin, talk, it, all, out,, determined, to, find, a, better, way, to, breathe., Comedy, Drama, Romance, basedonnovel, interracialrelationship, singlemother, divorce, chickflick, WhitneyHouston, AngelaBassett, LorettaDevine, ForestWhitaker, 1995]"
4,11862,Father of the Bride Part II,"[Just, when, George, Banks, has, recovered, from, his, daughter's, wedding,, he, receives, the, news, that, she's, pregnant, ..., and, that, George's, wife,, Nina,, is, expecting, too., He, was, planning, on, selling, their, home,, but, that's, a, plan, that, --, like, George, --, will, have, to, change, with, the, arrival, of, both, a, grandchild, and, a, kid, of, his, own.]",[Comedy],"[baby, midlifecrisis, confidence, aging, daughter, motherdaughterrelationship, pregnancy, contraception, gynecologist]","[SteveMartin, DianeKeaton, MartinShort]",[CharlesShyer],[1995],"[Just, when, George, Banks, has, recovered, from, his, daughter's, wedding,, he, receives, the, news, that, she's, pregnant, ..., and, that, George's, wife,, Nina,, is, expecting, too., He, was, planning, on, selling, their, home,, but, that's, a, plan, that, --, like, George, --, will, have, to, change, with, the, arrival, of, both, a, grandchild, and, a, kid, of, his, own., Comedy, baby, midlifecrisis, confidence, aging, daughter, motherdaughterrelationship, pregnancy, contraception, gynecologist, SteveMartin, DianeKeaton, MartinShort, CharlesShyer, 1995]"


In [45]:
final_df = movies_df_join.drop(columns=['overview','keywords','cast','crew','year'])
final_df.head()

Unnamed: 0,id,title,genres,tags
0,862,Toy Story,"[Animation, Comedy, Family]","[Led, by, Woody,, Andy's, toys, live, happily, in, his, room, until, Andy's, birthday, brings, Buzz, Lightyear, onto, the, scene., Afraid, of, losing, his, place, in, Andy's, heart,, Woody, plots, against, Buzz., But, when, circumstances, separate, Buzz, and, Woody, from, their, owner,, the, duo, eventually, learns, to, put, aside, their, differences., Animation, Comedy, Family, jealousy, toy, boy, friendship, friends, rivalry, boynextdoor, newtoy, toycomestolife, TomHanks, TimAllen, DonRickles, JohnLasseter, 1995]"
1,8844,Jumanji,"[Adventure, Fantasy, Family]","[When, siblings, Judy, and, Peter, discover, an, enchanted, board, game, that, opens, the, door, to, a, magical, world,, they, unwittingly, invite, Alan, --, an, adult, who's, been, trapped, inside, the, game, for, 26, years, --, into, their, living, room., Alan's, only, hope, for, freedom, is, to, finish, the, game,, which, proves, risky, as, all, three, find, themselves, running, from, giant, rhinoceroses,, evil, monkeys, and, other, terrifying, creatures., Adventure, Fantasy, Family, boardgame, disappearance, basedonchildren'sbook, newhome, recluse, giantinsect, RobinWilliams, JonathanHyde, KirstenDunst, JoeJohnston, 1995]"
2,15602,Grumpier Old Men,"[Romance, Comedy]","[A, family, wedding, reignites, the, ancient, feud, between, next-door, neighbors, and, fishing, buddies, John, and, Max., Meanwhile,, a, sultry, Italian, divorcée, opens, a, restaurant, at, the, local, bait, shop,, alarming, the, locals, who, worry, she'll, scare, the, fish, away., But, she's, less, interested, in, seafood, than, she, is, in, cooking, up, a, hot, time, with, Max., Romance, Comedy, fishing, bestfriend, duringcreditsstinger, oldmen, WalterMatthau, JackLemmon, Ann-Margret, HowardDeutch, 1995]"
3,31357,Waiting to Exhale,"[Comedy, Drama, Romance]","[Cheated, on,, mistreated, and, stepped, on,, the, women, are, holding, their, breath,, waiting, for, the, elusive, ""good, man"", to, break, a, string, of, less-than-stellar, lovers., Friends, and, confidants, Vannah,, Bernie,, Glo, and, Robin, talk, it, all, out,, determined, to, find, a, better, way, to, breathe., Comedy, Drama, Romance, basedonnovel, interracialrelationship, singlemother, divorce, chickflick, WhitneyHouston, AngelaBassett, LorettaDevine, ForestWhitaker, 1995]"
4,11862,Father of the Bride Part II,[Comedy],"[Just, when, George, Banks, has, recovered, from, his, daughter's, wedding,, he, receives, the, news, that, she's, pregnant, ..., and, that, George's, wife,, Nina,, is, expecting, too., He, was, planning, on, selling, their, home,, but, that's, a, plan, that, --, like, George, --, will, have, to, change, with, the, arrival, of, both, a, grandchild, and, a, kid, of, his, own., Comedy, baby, midlifecrisis, confidence, aging, daughter, motherdaughterrelationship, pregnancy, contraception, gynecologist, SteveMartin, DianeKeaton, MartinShort, CharlesShyer, 1995]"


## joining splitted list in a sentence like

In [46]:
final_df['tags'] = final_df['tags'].apply(lambda x: " ".join(x))
final_df['genres'] = final_df['genres'].apply(lambda x: " ".join(x))



In [47]:
final_df['tags'] = final_df['tags'].apply(lambda x: x.lower())
final_df.head()


Unnamed: 0,id,title,genres,tags
0,862,Toy Story,Animation Comedy Family,"led by woody, andy's toys live happily in his room until andy's birthday brings buzz lightyear onto the scene. afraid of losing his place in andy's heart, woody plots against buzz. but when circumstances separate buzz and woody from their owner, the duo eventually learns to put aside their differences. animation comedy family jealousy toy boy friendship friends rivalry boynextdoor newtoy toycomestolife tomhanks timallen donrickles johnlasseter 1995"
1,8844,Jumanji,Adventure Fantasy Family,"when siblings judy and peter discover an enchanted board game that opens the door to a magical world, they unwittingly invite alan -- an adult who's been trapped inside the game for 26 years -- into their living room. alan's only hope for freedom is to finish the game, which proves risky as all three find themselves running from giant rhinoceroses, evil monkeys and other terrifying creatures. adventure fantasy family boardgame disappearance basedonchildren'sbook newhome recluse giantinsect robinwilliams jonathanhyde kirstendunst joejohnston 1995"
2,15602,Grumpier Old Men,Romance Comedy,"a family wedding reignites the ancient feud between next-door neighbors and fishing buddies john and max. meanwhile, a sultry italian divorcée opens a restaurant at the local bait shop, alarming the locals who worry she'll scare the fish away. but she's less interested in seafood than she is in cooking up a hot time with max. romance comedy fishing bestfriend duringcreditsstinger oldmen waltermatthau jacklemmon ann-margret howarddeutch 1995"
3,31357,Waiting to Exhale,Comedy Drama Romance,"cheated on, mistreated and stepped on, the women are holding their breath, waiting for the elusive ""good man"" to break a string of less-than-stellar lovers. friends and confidants vannah, bernie, glo and robin talk it all out, determined to find a better way to breathe. comedy drama romance basedonnovel interracialrelationship singlemother divorce chickflick whitneyhouston angelabassett lorettadevine forestwhitaker 1995"
4,11862,Father of the Bride Part II,Comedy,"just when george banks has recovered from his daughter's wedding, he receives the news that she's pregnant ... and that george's wife, nina, is expecting too. he was planning on selling their home, but that's a plan that -- like george -- will have to change with the arrival of both a grandchild and a kid of his own. comedy baby midlifecrisis confidence aging daughter motherdaughterrelationship pregnancy contraception gynecologist stevemartin dianekeaton martinshort charlesshyer 1995"


## stemming


In [48]:
from nltk.stem.porter import PorterStemmer
ps=PorterStemmer()

In [49]:
def stem(text):
    y=[]
    for i in text.split():
       y.append(ps.stem(i))
       
    return " ".join(y)
        

In [50]:
final_df['tags_post_stem']=final_df['tags'].apply(stem)

In [51]:
final_df.head()

Unnamed: 0,id,title,genres,tags,tags_post_stem
0,862,Toy Story,Animation Comedy Family,"led by woody, andy's toys live happily in his room until andy's birthday brings buzz lightyear onto the scene. afraid of losing his place in andy's heart, woody plots against buzz. but when circumstances separate buzz and woody from their owner, the duo eventually learns to put aside their differences. animation comedy family jealousy toy boy friendship friends rivalry boynextdoor newtoy toycomestolife tomhanks timallen donrickles johnlasseter 1995","led by woody, andy' toy live happili in hi room until andy' birthday bring buzz lightyear onto the scene. afraid of lose hi place in andy' heart, woodi plot against buzz. but when circumst separ buzz and woodi from their owner, the duo eventu learn to put asid their differences. anim comedi famili jealousi toy boy friendship friend rivalri boynextdoor newtoy toycomestolif tomhank timallen donrickl johnlasset 1995"
1,8844,Jumanji,Adventure Fantasy Family,"when siblings judy and peter discover an enchanted board game that opens the door to a magical world, they unwittingly invite alan -- an adult who's been trapped inside the game for 26 years -- into their living room. alan's only hope for freedom is to finish the game, which proves risky as all three find themselves running from giant rhinoceroses, evil monkeys and other terrifying creatures. adventure fantasy family boardgame disappearance basedonchildren'sbook newhome recluse giantinsect robinwilliams jonathanhyde kirstendunst joejohnston 1995","when sibl judi and peter discov an enchant board game that open the door to a magic world, they unwittingli invit alan -- an adult who' been trap insid the game for 26 year -- into their live room. alan' onli hope for freedom is to finish the game, which prove riski as all three find themselv run from giant rhinoceroses, evil monkey and other terrifi creatures. adventur fantasi famili boardgam disappear basedonchildren'sbook newhom reclus giantinsect robinwilliam jonathanhyd kirstendunst joejohnston 1995"
2,15602,Grumpier Old Men,Romance Comedy,"a family wedding reignites the ancient feud between next-door neighbors and fishing buddies john and max. meanwhile, a sultry italian divorcée opens a restaurant at the local bait shop, alarming the locals who worry she'll scare the fish away. but she's less interested in seafood than she is in cooking up a hot time with max. romance comedy fishing bestfriend duringcreditsstinger oldmen waltermatthau jacklemmon ann-margret howarddeutch 1995","a famili wed reignit the ancient feud between next-door neighbor and fish buddi john and max. meanwhile, a sultri italian divorcé open a restaur at the local bait shop, alarm the local who worri she'll scare the fish away. but she' less interest in seafood than she is in cook up a hot time with max. romanc comedi fish bestfriend duringcreditssting oldmen waltermatthau jacklemmon ann-margret howarddeutch 1995"
3,31357,Waiting to Exhale,Comedy Drama Romance,"cheated on, mistreated and stepped on, the women are holding their breath, waiting for the elusive ""good man"" to break a string of less-than-stellar lovers. friends and confidants vannah, bernie, glo and robin talk it all out, determined to find a better way to breathe. comedy drama romance basedonnovel interracialrelationship singlemother divorce chickflick whitneyhouston angelabassett lorettadevine forestwhitaker 1995","cheat on, mistreat and step on, the women are hold their breath, wait for the elus ""good man"" to break a string of less-than-stellar lovers. friend and confid vannah, bernie, glo and robin talk it all out, determin to find a better way to breathe. comedi drama romanc basedonnovel interracialrelationship singlemoth divorc chickflick whitneyhouston angelabassett lorettadevin forestwhitak 1995"
4,11862,Father of the Bride Part II,Comedy,"just when george banks has recovered from his daughter's wedding, he receives the news that she's pregnant ... and that george's wife, nina, is expecting too. he was planning on selling their home, but that's a plan that -- like george -- will have to change with the arrival of both a grandchild and a kid of his own. comedy baby midlifecrisis confidence aging daughter motherdaughterrelationship pregnancy contraception gynecologist stevemartin dianekeaton martinshort charlesshyer 1995","just when georg bank ha recov from hi daughter' wedding, he receiv the news that she' pregnant ... and that george' wife, nina, is expect too. he wa plan on sell their home, but that' a plan that -- like georg -- will have to chang with the arriv of both a grandchild and a kid of hi own. comedi babi midlifecrisi confid age daughter motherdaughterrelationship pregnanc contracept gynecologist stevemartin dianekeaton martinshort charlesshy 1995"


In [52]:
final_df.shape

(45320, 5)

In [53]:
final_df.title.value_counts()

Blackout                       13
Cinderella                     13
Hamlet                         9 
Alice in Wonderland            8 
Beauty and the Beast           8 
                              .. 
Whoopee!                       1 
Dr. Ehrlich's Magic Bullet     1 
The House on Telegraph Hill    1 
How About You...               1 
Queerama                       1 
Name: title, Length: 41217, dtype: int64

ngram range bigram, 2 words together

In [54]:
final_df.genres.value_counts()

Drama                                     5016
Comedy                                    3390
Documentary                               2761
                                          2231
Drama Romance                             1319
                                          ... 
Crime Adventure Action Comedy Thriller    1   
Animation Horror Thriller                 1   
Drama Action Comedy Thriller Foreign      1   
ScienceFiction Action Adventure Comedy    1   
Family Animation Romance Comedy           1   
Name: genres, Length: 4039, dtype: int64

In [55]:
final_df.tags.value_counts()

recovering from a nail gun shot to the head and 13 months of coma, doctor pekka valinta starts to unravel the mystery of his past, still suffering from total amnesia. thriller mystery petterisummanen ismokallio eppusalminen jpsiili 2008                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            

In [56]:
final_df.to_csv('final_df_analysis.csv')


PREPROCESSING DONE


## text to vectors

SEARCH ALGO tfidf_tags

In [57]:
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
stop = stopwords.words('english')

# instantiate the vectorizer object
vectorizer_title = TfidfVectorizer(ngram_range=(1,2),lowercase=True,stop_words=stop,max_features=41217)
vectorizer_genres = TfidfVectorizer(ngram_range=(1,2),lowercase=True,stop_words=stop,max_features=4039)
vectorizer_tags = TfidfVectorizer(ngram_range=(1,2),lowercase=True,stop_words=stop,max_features=44303)

# convert the documents into a matrix
tfidf_title=vectorizer_title.fit_transform(final_df["title"])
tfidf_genres=vectorizer_genres.fit_transform(final_df["genres"])
tfidf_tags=vectorizer_tags.fit_transform(final_df["tags_post_stem"])


In [58]:
vectorizer_title.get_feature_names_out()


array(['00', '000', '000 black', ..., 'юленька', 'іван', 'іван сила'],
      dtype=object)

In [59]:
len(vectorizer_title.get_feature_names_out())

41217

In [60]:
vectorizer_genres.get_feature_names_out()

array(['action', 'action adventure', 'action animation', 'action comedy',
       'action crime', 'action documentary', 'action drama',
       'action family', 'action fantasy', 'action foreign',
       'action history', 'action horror', 'action music',
       'action mystery', 'action romance', 'action sciencefiction',
       'action thriller', 'action tvmovie', 'action war',
       'action western', 'adventure', 'adventure action',
       'adventure animation', 'adventure comedy', 'adventure crime',
       'adventure documentary', 'adventure drama', 'adventure family',
       'adventure fantasy', 'adventure foreign', 'adventure history',
       'adventure horror', 'adventure music', 'adventure mystery',
       'adventure romance', 'adventure sciencefiction',
       'adventure thriller', 'adventure tvmovie', 'adventure war',
       'adventure western', 'animation', 'animation action',
       'animation adventure', 'animation comedy', 'animation crime',
       'animation documentary', '

In [61]:
len(vectorizer_genres.get_feature_names_out())

397

In [62]:
vectorizer_tags.get_feature_names_out()

array(['00', '000', '000 000', ..., 'он', 'эрикаэлениак', '松田龙平'],
      dtype=object)

In [63]:
len(vectorizer_tags.get_feature_names_out())

44303

In [64]:
tfidf_title.toarray()


In [65]:
tfidf_title.shape

(45320, 41217)

In [66]:
tfidf_genres.toarray()

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.36617605, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [67]:
tfidf_genres.shape

(45320, 397)

In [68]:
tfidf_tags.toarray()

In [69]:
tfidf_tags.shape

(45320, 44303)

## cosine similarity

In [70]:
from sklearn.metrics.pairwise import cosine_similarity


In [177]:
#input tags matching with tags
def search_tags(tags):
    tags=tags.lower()
    
    l=[]
    query_vector=vectorizer_tags.transform([tags])
    similarity_algo=cosine_similarity(query_vector,tfidf_tags).flatten()

    distances = sorted(list(enumerate(similarity_algo)),reverse=True,key = lambda x: x[1])
    # distances = sorted(list(enumerate(similarity_algo)),reverse=True)
    for i in distances[:5]:
        l.append(i[0])

    # return final_df['title'].iloc[l].tolist()
    # return movies_ratings_df_join.iloc[l]   
    return final_df.iloc[l][["id","title","genres","tags"]]

   
        

In [178]:
#input genres matching with genres
def search_genres(genres):
    genres=genres.lower()
    
    l=[]
    query_vector=vectorizer_genres.transform([genres])
    similarity_algo=cosine_similarity(query_vector,tfidf_genres).flatten()

    distances = sorted(list(enumerate(similarity_algo)),reverse=True,key = lambda x: x[1])
    
    for i in distances[:5]:
        l.append(i[0])

    return final_df.iloc[l][["id","title","genres","tags"]]

   
        

In [179]:
#input title matching with title
def search_title(title):
    title=title.lower()
    l=[]
    query_vector=vectorizer_title.transform([title])
    similarity_algo=cosine_similarity(query_vector,tfidf_title).flatten()

    distances = sorted(list(enumerate(similarity_algo)),reverse=True,key = lambda x: x[1])
    
    for i in distances[:5]:
        l.append(i[0])

    return final_df.iloc[l][["id","title","genres","tags"]]

   
        

In [203]:
search_title('Kabhi Khushi Kabhie Gham')

Unnamed: 0,id,title,genres,tags
9252,10757,Kabhi Khushi Kabhie Gham,Comedy Drama,"orthodox indian, raichand, would like his two sons to live together with him and his wife, and get married to girls' of his choice. one of his sons, rahul, is adopted, while rohan is his real son. rahul falls in love with a poor indian girl named anjali, and incurs the displeasure of raichand, they argue and fight, as a result rahul leaves the house, moves to britain, and settles down. raichand now focuses his attention on his real son, rohan, who has no plans to get married, but is determined to bring rahul and anjali back home so that they can be together again. will raichand permit rohan to have his way, or will he also ask him to leave the house? comedy drama fathersonrelationship loveofone'slife forbiddenlove reunion familyconflict familyplanning shahrukhkhan kajol amitabhbachchan karanjohar 2001"
36813,345426,Khushi,Romance Drama Action,"karan, born in calcutta, and khushi, born in the small village of chamoli, are destined to meet despite the distance between the two. karan wants to go canada to further her studies--but due to an accident that takes place, he is forced to continue his studies in mumbai university. khushi's father believes that for a girl the education that she has receives is enough and now she should stay with him and marry a boy who is ready to be his son-in-law. but khushi insists on pursuing her education in mumbai university. at the university karan's friend vicky and khushi's friend priya are in love, but are having relationship problems. karan and khushi decide to help vicky and priya come together, but end up making a romantic connection with each other. romance drama action kareenakapoor fardeenkhan amrishpuri s.j.surya 2003"
23490,15419,Kabhi Haan Kabhi Naa,Comedy Drama Music,"sunil belongs to a middle-class family, and is intent in pursuing his career with a music group, despite of his dad disapproval. sunil is also in love with anna, but anna does not really love him, but likes him as a friend. sunil is persistent, but instead anna openly declares her love for chris. in order to impress his dad, sunil forges his examination results, but then later confesses to his family, who receive this news in utter dismay. will this change sunil's ways? will anna change her mind about sunil or will she get married to chris? comedy drama music shahrukhkhan suchitrakrishnamoorthi deepaktijori kundanshah 1993"
43581,14395,Kabhi Alvida Naa Kehna,Drama Romance,"dev and maya are both married to different people. settled into a life of domestic ritual, and convinced that they are happy in their respective relationships, they still yearn for something deeper and more meaningful, which is precisely what they find in each other. drama romance adultery extramaritalaffair shahrukhkhan ranimukerji preityzinta karanjohar 2006"
0,862,Toy Story,Animation Comedy Family,"led by woody, andy's toys live happily in his room until andy's birthday brings buzz lightyear onto the scene. afraid of losing his place in andy's heart, woody plots against buzz. but when circumstances separate buzz and woody from their owner, the duo eventually learns to put aside their differences. animation comedy family jealousy toy boy friendship friends rivalry boynextdoor newtoy toycomestolife tomhanks timallen donrickles johnlasseter 1995"


In [75]:
similarity_algo=cosine_similarity(tfidf_tags)

In [83]:
similarity_algo[0]

array([1.        , 0.02321995, 0.01423045, ..., 0.        , 0.0119559 ,
       0.        ])

In [181]:
title='Kabhi Khushi Kabhie Gham'

In [183]:
movie_index= final_df[final_df['title']==title].index[0]
similarity=similarity_algo[movie_index]
similarity

array([0.0180215 , 0.00220158, 0.00119112, ..., 0.00375383, 0.03979121,
       0.        ])

In [184]:
distances = sorted(list(enumerate(similarity*100)),reverse=True,key = lambda x: x[1])[1:6]

distances

[(33796, 32.20625984816069),
 (9169, 29.463376977690643),
 (33895, 28.75888476886173),
 (33821, 27.321860755288117),
 (36912, 26.627149097765983)]

In [185]:
l=[]
for i in distances:
        l.append(i[0])
l

[33796, 9169, 33895, 33821, 36912]

In [186]:
final_df.iloc[l][["id","title","genres","tags"]]

Unnamed: 0,id,title,genres,tags
33796,21462,Masoom,Drama Family,"d.k. malhotra lives a comfortable lifestyle with his wife, indu, and two school-going daughters, pinky and minni. he works in the office of an architect. one day while the family is relaxing, d.k. gets a phone call that results in him bringing home a young school-going boy by the name of rahul. indu is shocked to learn that rahul is d.k.'s son from another woman, bhavana, who is no more. d.k. does his best to make rahul comfortable, but fails. rahul also feels that indu does not really like him, though pinky and minni have taken to him in a big way. finally, d.k. decides to admit rahul in a boarding school in far off nainital. rahul reluctantly goes along with this new-found uncle/friend. it is when rahul is asked to put his papers together for school that rahul finds out that d.k. is his biological father. watch what happens when rahul disappears from d.k.'s house, and the impact this has on d.k., indu, and her two daughters. drama family bollywood naseeruddinshah shabanaazmi urmilamatondkar shekharkapur 1983"
9169,11854,Kuch Kuch Hota Hai,Drama Romance,"anjali is left heartbroken when her best friend and secret crush, rahul, falls in love with tina. years later, tina's young daughter tries to fulfil her mother's last wish of uniting rahul and anjali. drama romance soulmates platoniclove dyinganddeath approach terminalillness friendshipbracelet burschikos love bollywood fallinginlove unhappiness childhoodfriends shahrukhkhan kajol ranimukerji karanjohar 1998"
33895,161227,Jaanam Samjha Karo,,"talented singer and dancer, chandni's life is dominated by three aunts, and a soft-spoken maternal grandmother. she meets with a womanizer named rahul, and falls in love with him. rahul treats her just like any of his other girlfriends, and decides to have an affair with her. rahul unexpectedly meets with his dadaji, who wants him to get married to chandni. in order to fool him, rahul asks chandni to pretend to be his wife, which she does. after dadaji leaves, will rahul and chandni go their separate ways, and will rahul return to the waiting arms of his many girlfriends? salmankhan urmilamatondkar jaspalbhatti andaleebsultanpuri 1999"
33821,54814,Hum Hain Rahi Pyar Ke,,"rahul malhotra (aamir khan) is the manager of the heavily in debt family business. he is also the guardian of his dead sister's mischievous kids. rahul hires vaijayanti (juhi chawla) as governess. vaijayanti is a runaway from home as she does not want to marry the man her orthodox family has chosen for her. predictably, rahul and vaijayanti fall in love. maya (navneet nishan), a rich girl in love with rahul tries to ruin his family and his business, but all ends happily. aamirkhan juhichawla mastersharokh maheshbhatt 1993"
36912,362812,Chaahat Ek Nasha...,Romance Drama,"rashmi lives a wealthy lifestyle in india along with her widower dad, dr. sanjeev jaitly. she aspires to be a singer and dancer, and is encouraged by her friends and dad to take this up as a profession. she prepares her portfolio and approaches r.k. music co, which is run by rahul kapoor. rashmi is also an ardent fan of established singer/dancer, mallika arora, a client of rahul, and always sends her gifts and flowers on every possible occasion. when mallika fails to impress the public, rahul is advised to look for fresh talent, and he approaches rashmi, and together they release their first album which turns to be a major success. together they deliver many other hits and also fall in love with each other. sanjeev initially opposes their romance and eventual marriage as he feels that rahul and his celebrity lifestyle is not right for rashmi. he eventually gives in when rashmi insists on marrying rahul... romance drama aryanvaid manishakoirala preetijhangiani jayaprakashreddy suryaprakash 2005"


In [137]:
#input title matching with title
def search_title_tags(title):
    l=[]
    movie_index= final_df[final_df['title']==title].index[0]
    
    similarity=similarity_algo[movie_index]
    
    distances = sorted(list(enumerate(similarity)),reverse=True,key = lambda x: x[1])[1:6]
    
    for i in distances:
        l.append(i[0])

    return final_df.iloc[l][["id","title","genres","tags"]]

   
        

In [224]:
title='Malamaal Weekly'
search_title_tags(title)

Unnamed: 0,id,title,genres,tags
7505,5521,Finder's Fee,Drama Thriller,after finding a wallet in the street tepper calls the owner in order to return it. after making the call he discovers that the lottery ticket inside is a $6 million winner. to add to things his friends are on their way over for their weekly poker night & the groups tradition is to bet their lottery ticket. drama thriller poker man lottery gain suspense independentfilm erikpalladino dashmihok carlypope jeffprobst 2001
44033,18457,Dead Weight,Comedy,"prison guard reggio lets moltes have a weekly lottery ticket in exchange for his suave relationship advice. when moltes learns that he has a winning lottery ticket, he breaks out of jail to claim his money. however, reggio's girlfriend, pauline , has the ticket with her in africa. the action involves the odd couple teaming up to find the ticket while being chased by det. youssouf , along with the vengeful turk and the giant. comedy benoîtpoelvoorde gérardlanvin joségarcia alainberbérian frédéricforestier 2002"
40993,403429,Two Lottery Tickets,Comedy,"3 men from a provincial town who are in an urgent need for money so they decide to buy a lottery ticket. they win the lottery, but very soon, the ticket gets stolen. comedy lottery roadmovie mirceabanu dorianboguţă dragoșbucur paulnegoescu 2016"
32320,71051,Sportloto-82,Comedy,adventurous comedy about a bunch of people hunting the winning lottery ticket. comedy algisarlauskas svetlanaamanova mikhailpugovkin leonidgayday 1982
2232,10162,Waking Ned,Comedy Romance,"when a lottery winner dies of shock, his fellow townsfolk attempt to claim the money. comedy romance falseidentity beguilement lottery village jackpot independentfilm ireland ianbannen davidkelly fionnulaflanagan kirkjones 1998"


In [220]:
search_tags("Bajirao Mastani")

Unnamed: 0,id,title,genres,tags
0,862,Toy Story,Animation Comedy Family,"led by woody, andy's toys live happily in his room until andy's birthday brings buzz lightyear onto the scene. afraid of losing his place in andy's heart, woody plots against buzz. but when circumstances separate buzz and woody from their owner, the duo eventually learns to put aside their differences. animation comedy family jealousy toy boy friendship friends rivalry boynextdoor newtoy toycomestolife tomhanks timallen donrickles johnlasseter 1995"
1,8844,Jumanji,Adventure Fantasy Family,"when siblings judy and peter discover an enchanted board game that opens the door to a magical world, they unwittingly invite alan -- an adult who's been trapped inside the game for 26 years -- into their living room. alan's only hope for freedom is to finish the game, which proves risky as all three find themselves running from giant rhinoceroses, evil monkeys and other terrifying creatures. adventure fantasy family boardgame disappearance basedonchildren'sbook newhome recluse giantinsect robinwilliams jonathanhyde kirstendunst joejohnston 1995"
2,15602,Grumpier Old Men,Romance Comedy,"a family wedding reignites the ancient feud between next-door neighbors and fishing buddies john and max. meanwhile, a sultry italian divorcée opens a restaurant at the local bait shop, alarming the locals who worry she'll scare the fish away. but she's less interested in seafood than she is in cooking up a hot time with max. romance comedy fishing bestfriend duringcreditsstinger oldmen waltermatthau jacklemmon ann-margret howarddeutch 1995"
3,31357,Waiting to Exhale,Comedy Drama Romance,"cheated on, mistreated and stepped on, the women are holding their breath, waiting for the elusive ""good man"" to break a string of less-than-stellar lovers. friends and confidants vannah, bernie, glo and robin talk it all out, determined to find a better way to breathe. comedy drama romance basedonnovel interracialrelationship singlemother divorce chickflick whitneyhouston angelabassett lorettadevine forestwhitaker 1995"
4,11862,Father of the Bride Part II,Comedy,"just when george banks has recovered from his daughter's wedding, he receives the news that she's pregnant ... and that george's wife, nina, is expecting too. he was planning on selling their home, but that's a plan that -- like george -- will have to change with the arrival of both a grandchild and a kid of his own. comedy baby midlifecrisis confidence aging daughter motherdaughterrelationship pregnancy contraception gynecologist stevemartin dianekeaton martinshort charlesshyer 1995"


In [None]:
search_tags("romance")

Unnamed: 0,id,title,genres,tags
0,862,Toy Story,Animation Comedy Family,"led by woody, andy's toys live happily in his room until andy's birthday brings buzz lightyear onto the scene. afraid of losing his place in andy's heart, woody plots against buzz. but when circumstances separate buzz and woody from their owner, the duo eventually learns to put aside their differences. animation comedy family jealousy toy boy friendship friends rivalry boynextdoor newtoy toycomestolife tomhanks timallen donrickles johnlasseter 1995"
1,8844,Jumanji,Adventure Fantasy Family,"when siblings judy and peter discover an enchanted board game that opens the door to a magical world, they unwittingly invite alan -- an adult who's been trapped inside the game for 26 years -- into their living room. alan's only hope for freedom is to finish the game, which proves risky as all three find themselves running from giant rhinoceroses, evil monkeys and other terrifying creatures. adventure fantasy family boardgame disappearance basedonchildren'sbook newhome recluse giantinsect robinwilliams jonathanhyde kirstendunst joejohnston 1995"
2,15602,Grumpier Old Men,Romance Comedy,"a family wedding reignites the ancient feud between next-door neighbors and fishing buddies john and max. meanwhile, a sultry italian divorcée opens a restaurant at the local bait shop, alarming the locals who worry she'll scare the fish away. but she's less interested in seafood than she is in cooking up a hot time with max. romance comedy fishing bestfriend duringcreditsstinger oldmen waltermatthau jacklemmon ann-margret howarddeutch 1995"
3,31357,Waiting to Exhale,Comedy Drama Romance,"cheated on, mistreated and stepped on, the women are holding their breath, waiting for the elusive ""good man"" to break a string of less-than-stellar lovers. friends and confidants vannah, bernie, glo and robin talk it all out, determined to find a better way to breathe. comedy drama romance basedonnovel interracialrelationship singlemother divorce chickflick whitneyhouston angelabassett lorettadevine forestwhitaker 1995"
4,11862,Father of the Bride Part II,Comedy,"just when george banks has recovered from his daughter's wedding, he receives the news that she's pregnant ... and that george's wife, nina, is expecting too. he was planning on selling their home, but that's a plan that -- like george -- will have to change with the arrival of both a grandchild and a kid of his own. comedy baby midlifecrisis confidence aging daughter motherdaughterrelationship pregnancy contraception gynecologist stevemartin dianekeaton martinshort charlesshyer 1995"
5,949,Heat,Action Crime Drama Thriller,"obsessive master thief, neil mccauley leads a top-notch crew on various insane heists throughout los angeles while a mentally unstable detective, vincent hanna pursues him without rest. each man recognizes and respects the ability and the dedication of the other even though they are aware their cat-and-mouse game may end in violence. action crime drama thriller robbery detective bank obsession chase shooting thief honor murder suspense heist betrayal money gang catandmouse criminalmastermind cultfilm ex-con heistmovie onelastjob loner bankjob neo-noir gunfight crimeepic alpacino robertdeniro valkilmer michaelmann 1995"
6,11860,Sabrina,Comedy Romance,"an ugly duckling having undergone a remarkable change, still harbors feelings for her crush: a carefree playboy, but not before his business-focused brother has something to say about it. comedy romance paris brotherbrotherrelationship chauffeur longisland fusion millionaire harrisonford juliaormond gregkinnear sydneypollack 1995"
7,45325,Tom and Huck,Action Adventure Drama Family,"a mischievous young boy, tom sawyer, witnesses a murder by the deadly injun joe. tom becomes friends with huckleberry finn, a boy with no future and no family. tom has to choose between honoring a friendship or honoring an oath because the town alcoholic is accused of the murder. tom and huck go through several adventures trying to retrieve evidence. action adventure drama family jonathantaylorthomas bradrenfro rachaelleighcook peterhewitt 1995"
8,9091,Sudden Death,Action Adventure Thriller,"international action superstar jean claude van damme teams with powers boothe in a tension-packed, suspense thriller, set against the back-drop of a stanley cup game.van damme portrays a father whose daughter is suddenly taken during a championship hockey game. with the captors demanding a billion dollars by game's end, van damme frantically sets a plan in motion to rescue his daughter and abort an impending explosion before the final buzzer... action adventure thriller terrorist hostage explosive vicepresident jean-claudevandamme powersboothe dorianharewood peterhyams 1995"
9,710,GoldenEye,Adventure Action Thriller,james bond must unmask the mysterious head of the janus syndicate and prevent the leader from utilizing the goldeneye weapons system to inflict devastating revenge on britain. adventure action thriller cuba falselyaccused secretidentity computervirus secretbase secretintelligenceservice kgb satellite specialcar cossack electromagneticpulse timebomb st.petersburgrussia ejectionseat redarmy piercebrosnan seanbean izabellascorupco martincampbell 1995


In [None]:
search_genres("romance")


Unnamed: 0,id,title,genres,tags
1324,621,Grease,Romance,"australian good girl sandy and greaser danny fell in love over the summer. but when they unexpectedly discover they're now in the same high school, will they be able to rekindle their romance despite their eccentric friends? romance flyingcar streetgang running graduation musical rivalry basedonplayormusical gossip makeover automobileracing nostalgic greaser wolfwhistle schooldance animatedcredits peprally wrongsideofthetracks mooning schoolprincipal johntravolta olivianewton-john stockardchanning randalkleiser 1978"
2998,78256,The End of the Affair,Romance,a civil servant's wife (deborah kerr) in wartime london vows to leave her injured lover (van johnson) if he recovers. romance speech party starcrossedlovers deborahkerr vanjohnson johnmills edwarddmytryk 1955
3321,13539,Here On Earth,Romance,a rich college kid is taught a lesson after a joy ride ends up destroying a country restaurant. romance carrace privateschool diner leeleesobieski chrisklein joshhartnett markpiznarski 2000
5132,116904,Bar Girls,Romance,the life and loves of gay women hanging out at a local tavern are examined in this slice of life film. romance nancyallisonwolfe lizad'agostino camilagriggs maritagiovanni 1994
6614,10934,Under the Tuscan Sun,Romance,"after a rough divoce, frances, a 35 year old book editor from san francisco takes a tour of tuscany at the urgings of her friends. on a whim she buys bramasole, a run down villa in the tuscan countryside and begins to piece her life together starting with the villa and finds that life sometimes has unexpected ways of giving her everything she wanted. romance depression toscana recreation author divorce womandirector dianelane sandraoh lindsayduncan audreywells 2003"
7306,17993,Henry & June,Romance,"while traveling in paris, author henry miller and his wife, june, meet anais nin, and sexual sparks fly as nin starts an affair with the openly bisexual june. when june is forced to return to the u.s., she gives nin her blessing to sleep with her husband. then, when june returns to france, an unexpected, and sometimes contentious, threesome forms. romance gayman lesbianinterest fredward umathurman mariademedeiros philipkaufman 1990"
8152,84655,The Black Orchid,Romance,an aging widower fights family disapproval when he falls in love with a gangster's widow. romance sophialoren anthonyquinn petermarkrichman martinritt 1958
9223,20160,Restless,Romance,"an old dog has a hard time learning new tricks in this drama set in turku. ari, a paramedic, is a chronic womanizer; he makes it a point of pride to never sleep with the same woman twice, and his nights are a long series of brazen one-night stands. but when ari meets tiina (laura malmivaara), something unexpected happens - he falls in love. for the first time, ari finds himself pursuing a long-term relationship, and he makes a genuine effort to remain faithful to her. but old habits die hard; tiina introduces ari to her circle of friends and temptation arises as he encounters hanna-riikka, a theology student, and ilona, who is soon to be married. despite ari's feelings for tiina, he begins having affairs with both hanna-riikka and ilona, leading to an unpleasant revelation on the day of ilona's nuptials. ~ mark deming, allrovi romance mikkonousiainen lauramalmivaara petterisummanen akulouhimies 2000"
9795,85926,Strangers When We Meet,Romance,"a suburban architect loves his wife but is bored with his marriage and with his work, so he takes up with the neglected, married beauty who lives down the street. romance infidelity marriage kirkdouglas kimnovak erniekovacs richardquine 1960"
11106,76344,Two for the Seesaw,Romance,"jerry ryan wanders aimlessly around new york after giving up his nebrascan law practice. his wife asks for a divorce. he meets gittel, a struggling dancer from greenwich village and they try to sort out their lives. an extended conversation piece with a static camera, but reflects the moral climate of the time. romance greenwichvillage robertmitchum shirleymaclaine edmonryan robertwise 1962"


In [None]:
search_title("Harry Potter")

Unnamed: 0,id,title,genres,tags
10550,674,Harry Potter and the Goblet of Fire,Adventure Fantasy Family,"harry starts his fourth year at hogwarts, competes in the treacherous triwizard tournament and faces the evil lord voldemort. ron and hermione help harry manage the pressure – but voldemort lurks, awaiting his chance to destroy harry and all that he stands for. adventure fantasy family magic dyinganddeath broom sorcerer'sapprentice schoolofwitchcraft chosenone blackmagic boardingschool vision tournament teenager wizard teenagehero basedonyoungadultnovel danielradcliffe rupertgrint emmawatson mikenewell 2005"
13887,767,Harry Potter and the Half-Blood Prince,Adventure Fantasy Family,"as harry begins his sixth year at hogwarts, he discovers an old book marked as 'property of the half-blood prince', and begins to learn more about lord voldemort's dark past. adventure fantasy family witch magic broom schoolofwitchcraft wizardry apparition teenagecrush werewolf danielradcliffe rupertgrint emmawatson davidyates 2009"
7725,673,Harry Potter and the Prisoner of Azkaban,Adventure Fantasy Family,"harry, ron and hermione return to hogwarts for another magic-filled year. harry comes face to face with danger yet again, this time in the form of escaped convict, sirius black – and turns to sympathetic professor lupin for help. adventure fantasy family flying traitor magic cuttingthecord childhero broom sorcerer'sapprentice schoolofwitchcraft griffon blackmagic timetravel bestfriend werewolf dark muggle aftercreditsstinger danielradcliffe rupertgrint emmawatson alfonsocuarón 2004"
5673,672,Harry Potter and the Chamber of Secrets,Adventure Fantasy Family,"ignoring threats to his life, harry returns to hogwarts to investigate – aided by ron and hermione – a mysterious series of attacks. adventure fantasy family flyingcar witch magic cuttingthecord childhero broom sorcerer'sapprentice schoolofwitchcraft giantsnake blackmagic aftercreditsstinger danielradcliffe rupertgrint emmawatson chriscolumbus 2002"
11923,675,Harry Potter and the Order of the Phoenix,Adventure Fantasy Family Mystery,"returning for his fifth year of study at hogwarts, harry is stunned to find that his warnings about the return of lord voldemort have been ignored. left with no choice, harry takes matters into his own hands, training a small group of students – dubbed 'dumbledore's army' – to defend themselves against the dark arts. adventure fantasy family mystery prophecy witch lossoflover magic cuttingthecord childhero dyinganddeath broom sorcerer'sapprentice schoolofwitchcraft blackmagic deathofafriend sorcery occultism danielradcliffe rupertgrint emmawatson davidyates 2007"
4756,671,Harry Potter and the Philosopher's Stone,Adventure Fantasy Family,"harry potter has lived under the stairs at his aunt and uncle's house his whole life. but on his 11th birthday, he learns he's a powerful wizard -- with a place waiting for him at the hogwarts school of witchcraft and wizardry. as he learns to harness his newfound powers with the help of the school's kindly headmaster, harry uncovers the truth about his parents' deaths -- and about the villain who's to blame. adventure fantasy family witch christmasparty magic cuttingthecord halloween childhero broom chosenone frog fantasyworld basedonyoungadultnovel danielradcliffe rupertgrint emmawatson chriscolumbus 2001"
16097,12444,Harry Potter and the Deathly Hallows: Part 1,Adventure Fantasy Family,"harry, ron and hermione walk away from their last year at hogwarts to find and destroy the remaining horcruxes, putting an end to voldemort's bid for immortality. but with harry's beloved dumbledore dead and voldemort's unscrupulous death eaters on the loose, the world is more dangerous than ever. adventure fantasy family corruption isolation radio magic teleportation bravery tension attack werewolf danielradcliffe emmawatson rupertgrint davidyates 2010"
17382,12445,Harry Potter and the Deathly Hallows: Part 2,Family Fantasy Adventure,"harry, ron and hermione continue their quest to vanquish the evil voldemort once and for all. just as things begin to look hopeless for the young wizards, harry discovers a trio of magical objects that endow him with powers to rival voldemort's formidable skills. family fantasy adventure selfsacrifice magic frog sorcerer school wizard 3d danielradcliffe rupertgrint emmawatson davidyates 2011"
3258,8989,Harry and the Hendersons,Comedy Family Fantasy,"returning from a hunting trip in the forest, the henderson family's car hits an animal in the road. at first they fear it was a man, but when they examine the ""body"" they find it's a ""bigfoot"". they think it's dead so they decide to take it home (there could be some money in this). as you guessed, it isn't dead. far from being the ferocious monster they fear ""harry"" to be, he's a friendly giant. comedy family fantasy badsmell bigfoot vegetarian hamburger huntingtrip wildlife cryptozoology family familyholiday footprint frenchcanadian sasquatch johnlithgow melindadillon margaretlangrick williamdear 1987"
27829,91767,Harry & Son,Drama,"widower harry keach is a construction worker who was raised to appreciate the importance of working for a living. he takes a dim view of his sensitive son howard's lackadaisical lifestyle and has a strained relationship with his daughter nina as he does not approve of her husband. when harry is fired from his job, his life changes drastically as he is made to focus on the relationships around him. drama petshop beer independentfilm writer widower childbirth paulnewman robbybenson ellenbarkin paulnewman 1984"


In [None]:

# ratings_df_raw=pd.read_csv('MovRec_Dataset/ratings.csv', low_memory=False)

ratings_df_raw=pd.read_csv('https://movrec.s3.amazonaws.com/MovRec_Dataset/ratings.csv', low_memory=False)



MemoryError: 

## Finding Users Liked Same Movie


In [None]:
ratings_df_raw.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,110,1.0,1425941529
1,1,147,4.5,1425942435
2,1,858,5.0,1425941523
3,1,1221,5.0,1425941546
4,1,1246,5.0,1425941556


In [None]:
ratings_df_raw.shape

(26024289, 4)

In [None]:
#as "id" was not the common column name in both the dataframes although the data was same, we need to mention the both the name as per the dataframe in order to join
movies_ratings_df_join=final_df.merge(ratings_df_raw, left_on='id',right_on='movieId')
movies_ratings_df_join.shape

(11369230, 9)

we can see the last 5 rows with same movie titles but the userId is different as different user have rated the same movie

In [None]:
movies_ratings_df_join.tail()

Unnamed: 0,id,title,genres,tags,tags_post_stem,userId,movieId,rating,timestamp
11369225,111109,Century of Birthing,Drama,an artist struggles to finish his work while a storyline about a cult plays in his head. drama artist play pinoy angelaquino perrydizon hazelorencio lavdiaz 2011,an artist struggl to finish hi work while a storylin about a cult play in hi head. drama artist play pinoy angelaquino perrydizon hazelorencio lavdiaz 2011,33940,111109,2.5,1405878785
11369226,111109,Century of Birthing,Drama,an artist struggles to finish his work while a storyline about a cult plays in his head. drama artist play pinoy angelaquino perrydizon hazelorencio lavdiaz 2011,an artist struggl to finish hi work while a storylin about a cult play in hi head. drama artist play pinoy angelaquino perrydizon hazelorencio lavdiaz 2011,172224,111109,3.0,1399502972
11369227,111109,Century of Birthing,Drama,an artist struggles to finish his work while a storyline about a cult plays in his head. drama artist play pinoy angelaquino perrydizon hazelorencio lavdiaz 2011,an artist struggl to finish hi work while a storylin about a cult play in hi head. drama artist play pinoy angelaquino perrydizon hazelorencio lavdiaz 2011,210792,111109,3.0,1467090449
11369228,111109,Century of Birthing,Drama,an artist struggles to finish his work while a storyline about a cult plays in his head. drama artist play pinoy angelaquino perrydizon hazelorencio lavdiaz 2011,an artist struggl to finish hi work while a storylin about a cult play in hi head. drama artist play pinoy angelaquino perrydizon hazelorencio lavdiaz 2011,225396,111109,3.5,1399302912
11369229,111109,Century of Birthing,Drama,an artist struggles to finish his work while a storyline about a cult plays in his head. drama artist play pinoy angelaquino perrydizon hazelorencio lavdiaz 2011,an artist struggl to finish hi work while a storylin about a cult play in hi head. drama artist play pinoy angelaquino perrydizon hazelorencio lavdiaz 2011,269593,111109,0.5,1471835519


In [None]:
movies_ratings_df_join.title.value_counts()

The Million Dollar Hotel              91082
Terminator 3: Rise of the Machines    87901
Solaris                               84318
The 39 Steps                          77045
Monsoon Wedding                       74355
                                      ...  
Phil Spector                          1    
The Call                              1    
A Thousand Cuts                       1    
I Will Buy You                        1    
Shanghai Calling                      1    
Name: title, Length: 7238, dtype: int64

In [None]:
len(movies_ratings_df_join.title.unique())

7238

### Let us assume a user, X liked a movie M, let us find out who all are the users (UX) who liked the same movie M to find our niche (UX).
### now let us check what are the other movies M2,M3 which the same users (UX) like.
### now let us check who are the other users UY who liked those movies M2, M3.
### if those users YU, also like the same movie M2, as of our niche UX, may be those movies M2 are liked by everyone in general, so to avoid bias
### we will consider the least like movies M3, by other niche UY so that we can be very specific in our niche that those movies M3 would be the best to recommendation to the user X


Let us assume user X, likes Toy Story, which we have found from the viewing history/rating/watch time/no. of times watched etc.

In [None]:
title='Toy Story'

now finding the similar users UX like X, who also liked Toy Story and gave 4+ rating

In [None]:
similar_users=movies_ratings_df_join[(movies_ratings_df_join["title"] == title ) & (movies_ratings_df_join['rating']>=4)]['userId'].unique()
print(len(similar_users))
similar_users    
    

199


array([  2103,   6177,   6525,   8659,   9328,   9682,  13839,  15235,
        16287,  17210,  17211,  17325,  17489,  17545,  17765,  18274,
        18595,  19404,  21664,  23635,  27685,  28528,  28866,  29515,
        29611,  33067,  33296,  34876,  35190,  35808,  36342,  39216,
        39409,  39760,  40363,  41468,  42550,  42746,  43681,  44470,
        45264,  46200,  47253,  50057,  51615,  51943,  52672,  52794,
        54069,  56306,  60313,  66311,  66423,  66622,  67102,  68112,
        69979,  70766,  71266,  71832,  72206,  73948,  75133,  75357,
        75580,  76690,  77635,  81267,  81491,  82385,  83777,  84981,
        88615,  90789,  91516,  95090,  95885,  98243,  99478, 100309,
       100542, 101330, 101914, 108287, 108416, 111040, 112373, 112843,
       114034, 117252, 118990, 123841, 127879, 128297, 128620, 128979,
       129725, 130237, 131439, 132221, 134237, 134366, 135979, 137233,
       138710, 139886, 141711, 141725, 142142, 142905, 143161, 143998,
      

now checking what are the other movies which the above similar users also liked and gave 4+ rating

In [None]:

similar_user_recs=movies_ratings_df_join[(movies_ratings_df_join['userId'].isin(similar_users)) & (movies_ratings_df_join['rating']>=4)]['title']
similar_user_recs.unique().tolist()


    

['Toy Story',
 'Jumanji',
 'Heat',
 'Cutthroat Island',
 'Casino',
 'Sense and Sensibility',
 'Four Rooms',
 'Get Shorty',
 'Leaving Las Vegas',
 'The City of Lost Children',
 'Twelve Monkeys',
 'Dead Man Walking',
 'To Die For',
 'Se7en',
 "Mr. Holland's Opus",
 'From Dusk Till Dawn',
 "Things to Do in Denver When You're Dead",
 "Antonia's Line",
 'Angels and Insects',
 'Unforgettable',
 'The Bridges of Madison County',
 'Braveheart',
 'Taxi Driver',
 'Flirting with Disaster',
 'Apollo 13',
 'Batman Forever',
 'Belle de Jour',
 'Beyond Rangoon',
 'Crumb',
 'Desperado',
 'Devil in a Blue Dress',
 'Die Hard: With a Vengeance',
 'Lord of Illusions',
 'Mallrats',
 'The Net',
 'Safe',
 'Strange Days',
 'Under Siege 2: Dark Territory',
 'Bushwhacked',
 'Before Sunrise',
 'Clerks',
 'Don Juan DeMarco',
 'Disclosure',
 'Drop Zone',
 'Ed Wood',
 'Heavenly Creatures',
 'Interview with the Vampire',
 'Star Wars',
 'Legends of the Fall',
 "Mary Shelley's Frankenstein",
 'Milk Money',
 'Nell',
 'N

checking how many such movies are there

In [None]:
len(similar_user_recs.unique().tolist())

2626

checking the popular movies M1,M2,M3.. from the above list, which are popular in our similar users niche audience 

In [None]:
similar_user_recs.value_counts()

Toy Story                  199
Men in Black II            142
The Thomas Crown Affair    136
Dawn of the Dead           131
Once Were Warriors         116
                          ... 
Shooter                    1  
Dreamland                  1  
Philanthropy               1  
Perfect Stranger           1  
The One-Man Band           1  
Name: title, Length: 2626, dtype: int64

converting the count in % for an ease of understanding

In [None]:
similar_user_recs_per=similar_user_recs.value_counts()*100 / len(similar_users)
similar_user_recs_per    

Toy Story                  100.000000
Men in Black II            71.356784 
The Thomas Crown Affair    68.341709 
Dawn of the Dead           65.829146 
Once Were Warriors         58.291457 
                             ...     
Shooter                    0.502513  
Dreamland                  0.502513  
Philanthropy               0.502513  
Perfect Stranger           0.502513  
The One-Man Band           0.502513  
Name: title, Length: 2626, dtype: float64

narrowing down 2626 movies by filtering those movies which are at least like by more than 10% of our similar users niche audience, which came to 435

In [None]:
# movies those were liked more than 10% audience
similar_user_recs_per_filter =similar_user_recs_per[similar_user_recs_per>10]
similar_user_recs_per_filter    
    

Toy Story                  100.000000
Men in Black II            71.356784 
The Thomas Crown Affair    68.341709 
Dawn of the Dead           65.829146 
Once Were Warriors         58.291457 
                             ...     
Syriana                    10.050251 
The Sicilian Clan          10.050251 
Closer                     10.050251 
Ask the Dust               10.050251 
Rush Hour                  10.050251 
Name: title, Length: 435, dtype: float64

now finding those users, UY who have liked the above 435 movies and gave 4+ ratings

In [None]:
all_users= movies_ratings_df_join[(movies_ratings_df_join['title'].isin(similar_user_recs_per_filter.index)) & (movies_ratings_df_join['rating']>=4)]
all_users   

Unnamed: 0,id,title,genres,tags,tags_post_stem,userId,movieId,rating,timestamp
1,862,Toy Story,Animation Comedy Family,"led by woody, andy's toys live happily in his room until andy's birthday brings buzz lightyear onto the scene. afraid of losing his place in andy's heart, woody plots against buzz. but when circumstances separate buzz and woody from their owner, the duo eventually learns to put aside their differences. animation comedy family jealousy toy boy friendship friends rivalry boynextdoor newtoy toycomestolife tomhanks timallen donrickles johnlasseter 1995","led by woody, andy' toy live happili in hi room until andy' birthday bring buzz lightyear onto the scene. afraid of lose hi place in andy' heart, woodi plot against buzz. but when circumst separ buzz and woodi from their owner, the duo eventu learn to put asid their differences. anim comedi famili jealousi toy boy friendship friend rivalri boynextdoor newtoy toycomestolif tomhank timallen donrickl johnlasset 1995",2103,862,5.0,946044912
3,862,Toy Story,Animation Comedy Family,"led by woody, andy's toys live happily in his room until andy's birthday brings buzz lightyear onto the scene. afraid of losing his place in andy's heart, woody plots against buzz. but when circumstances separate buzz and woody from their owner, the duo eventually learns to put aside their differences. animation comedy family jealousy toy boy friendship friends rivalry boynextdoor newtoy toycomestolife tomhanks timallen donrickles johnlasseter 1995","led by woody, andy' toy live happili in hi room until andy' birthday bring buzz lightyear onto the scene. afraid of lose hi place in andy' heart, woodi plot against buzz. but when circumst separ buzz and woodi from their owner, the duo eventu learn to put asid their differences. anim comedi famili jealousi toy boy friendship friend rivalri boynextdoor newtoy toycomestolif tomhank timallen donrickl johnlasset 1995",6177,862,4.0,859415226
4,862,Toy Story,Animation Comedy Family,"led by woody, andy's toys live happily in his room until andy's birthday brings buzz lightyear onto the scene. afraid of losing his place in andy's heart, woody plots against buzz. but when circumstances separate buzz and woody from their owner, the duo eventually learns to put aside their differences. animation comedy family jealousy toy boy friendship friends rivalry boynextdoor newtoy toycomestolife tomhanks timallen donrickles johnlasseter 1995","led by woody, andy' toy live happili in hi room until andy' birthday bring buzz lightyear onto the scene. afraid of lose hi place in andy' heart, woodi plot against buzz. but when circumst separ buzz and woodi from their owner, the duo eventu learn to put asid their differences. anim comedi famili jealousi toy boy friendship friend rivalri boynextdoor newtoy toycomestolif tomhank timallen donrickl johnlasset 1995",6525,862,4.0,857388995
7,862,Toy Story,Animation Comedy Family,"led by woody, andy's toys live happily in his room until andy's birthday brings buzz lightyear onto the scene. afraid of losing his place in andy's heart, woody plots against buzz. but when circumstances separate buzz and woody from their owner, the duo eventually learns to put aside their differences. animation comedy family jealousy toy boy friendship friends rivalry boynextdoor newtoy toycomestolife tomhanks timallen donrickles johnlasseter 1995","led by woody, andy' toy live happili in hi room until andy' birthday bring buzz lightyear onto the scene. afraid of lose hi place in andy' heart, woodi plot against buzz. but when circumst separ buzz and woodi from their owner, the duo eventu learn to put asid their differences. anim comedi famili jealousi toy boy friendship friend rivalri boynextdoor newtoy toycomestolif tomhank timallen donrickl johnlasset 1995",8659,862,4.0,997143296
8,862,Toy Story,Animation Comedy Family,"led by woody, andy's toys live happily in his room until andy's birthday brings buzz lightyear onto the scene. afraid of losing his place in andy's heart, woody plots against buzz. but when circumstances separate buzz and woody from their owner, the duo eventually learns to put aside their differences. animation comedy family jealousy toy boy friendship friends rivalry boynextdoor newtoy toycomestolife tomhanks timallen donrickles johnlasseter 1995","led by woody, andy' toy live happili in hi room until andy' birthday bring buzz lightyear onto the scene. afraid of lose hi place in andy' heart, woodi plot against buzz. but when circumst separ buzz and woodi from their owner, the duo eventu learn to put asid their differences. anim comedi famili jealousi toy boy friendship friend rivalri boynextdoor newtoy toycomestolif tomhank timallen donrickl johnlasset 1995",9328,862,4.0,1037486302
...,...,...,...,...,...,...,...,...,...
11367614,3104,Frankenstein Created Woman,Horror ScienceFiction,"a deformed tormented girl drowns herself after her lover is framed for murder and guillotined. baron frankenstein, experimenting with the transfer of souls, places the boy's soul into her body, bringing christina back to life. driven by revenge, she carries out a violent retribution on those responsible for both deaths. horror sciencefiction frankenstein hammerhorror petercushing susandenberg thorleywalters terencefisher 1967","a deform torment girl drown herself after her lover is frame for murder and guillotined. baron frankenstein, experi with the transfer of souls, place the boy' soul into her body, bring christina back to life. driven by revenge, she carri out a violent retribut on those respons for both deaths. horror sciencefict frankenstein hammerhorror petercush susandenberg thorleywalt terencefish 1967",270548,3104,4.0,1112153954
11367616,3104,Frankenstein Created Woman,Horror ScienceFiction,"a deformed tormented girl drowns herself after her lover is framed for murder and guillotined. baron frankenstein, experimenting with the transfer of souls, places the boy's soul into her body, bringing christina back to life. driven by revenge, she carries out a violent retribution on those responsible for both deaths. horror sciencefiction frankenstein hammerhorror petercushing susandenberg thorleywalters terencefisher 1967","a deform torment girl drown herself after her lover is frame for murder and guillotined. baron frankenstein, experi with the transfer of souls, place the boy' soul into her body, bring christina back to life. driven by revenge, she carri out a violent retribut on those respons for both deaths. horror sciencefict frankenstein hammerhorror petercush susandenberg thorleywalt terencefish 1967",270654,3104,4.0,977262838
11367619,3104,Frankenstein Created Woman,Horror ScienceFiction,"a deformed tormented girl drowns herself after her lover is framed for murder and guillotined. baron frankenstein, experimenting with the transfer of souls, places the boy's soul into her body, bringing christina back to life. driven by revenge, she carries out a violent retribution on those responsible for both deaths. horror sciencefiction frankenstein hammerhorror petercushing susandenberg thorleywalters terencefisher 1967","a deform torment girl drown herself after her lover is frame for murder and guillotined. baron frankenstein, experi with the transfer of souls, place the boy' soul into her body, bring christina back to life. driven by revenge, she carri out a violent retribut on those respons for both deaths. horror sciencefict frankenstein hammerhorror petercush susandenberg thorleywalt terencefish 1967",270731,3104,4.0,959088560
11367621,3104,Frankenstein Created Woman,Horror ScienceFiction,"a deformed tormented girl drowns herself after her lover is framed for murder and guillotined. baron frankenstein, experimenting with the transfer of souls, places the boy's soul into her body, bringing christina back to life. driven by revenge, she carries out a violent retribution on those responsible for both deaths. horror sciencefiction frankenstein hammerhorror petercushing susandenberg thorleywalters terencefisher 1967","a deform torment girl drown herself after her lover is frame for murder and guillotined. baron frankenstein, experi with the transfer of souls, place the boy' soul into her body, bring christina back to life. driven by revenge, she carri out a violent retribut on those respons for both deaths. horror sciencefict frankenstein hammerhorror petercush susandenberg thorleywalt terencefish 1967",270776,3104,5.0,974739774


in the above output, movie repeats because different users can give ratings to the same movie

#counting unique users
len(all_users['userId'].unique())

extracting popular movies from this audience, UY

In [None]:
all_users_rec=all_users['title'].value_counts()*100/len(all_users['userId'].unique())
all_users_rec    
    

The Million Dollar Hotel              33.347067
Terminator 3: Rise of the Machines    28.646324
Solaris                               27.699532
The 39 Steps                          24.712851
Once Were Warriors                    23.122393
                                        ...    
Breathless                            0.151352 
Night and Fog                         0.132853 
Pan's Labyrinth                       0.111832 
WarGames                              0.083664 
Toy Story                             0.083664 
Name: title, Length: 435, dtype: float64

comparing the similar niche audience (UA) popularity score vs overall audience (UMA)

In [None]:
rec_per=pd.concat([similar_user_recs_per_filter, all_users_rec],axis=1)
rec_per    

Unnamed: 0,title,title.1
Toy Story,100.000000,0.083664
Men in Black II,71.356784,16.724825
The Thomas Crown Affair,68.341709,14.541992
Dawn of the Dead,65.829146,15.110403
Once Were Warriors,58.291457,23.122393
...,...,...
Syriana,10.050251,4.594797
The Sicilian Clan,10.050251,2.025175
Closer,10.050251,3.002657
Ask the Dust,10.050251,2.224455


In [None]:
rec_per.columns=['similar%','all_avg%']
    
rec_per.columns    

Index(['similar%', 'all_avg%'], dtype='object')

calculating similarity score by taking ratio of similar user to all user and then sorting in descending order in the next step

In [None]:
rec_per['score']=rec_per['similar%']/rec_per['all_avg%']
rec_per['score']   

Toy Story                  1195.256281
Men in Black II            4.266519   
The Thomas Crown Affair    4.699611   
Dawn of the Dead           4.356545   
Once Were Warriors         2.520996   
                             ...      
Syriana                    2.187311   
The Sicilian Clan          4.962658   
Closer                     3.347119   
Ask the Dust               4.518073   
Rush Hour                  5.319343   
Name: score, Length: 435, dtype: float64

which gives us different result which are not biased 

In [None]:
 rec_per=rec_per.sort_values("score" , ascending=False)
 rec_per

Unnamed: 0,similar%,all_avg%,score
Toy Story,100.000000,0.083664,1195.256281
WarGames,11.055276,0.083664,132.138885
Before Sunset,15.075377,0.155977,96.651451
Pan's Labyrinth,10.050251,0.111832,89.868893
Night and Fog,10.552764,0.132853,79.431588
...,...,...,...
Three Colors: Red,26.130653,19.685020,1.327439
And Then There Were None,11.055276,9.064308,1.219649
5 Card Stud,17.587940,17.916302,0.981672
License to Wed,18.592965,20.178595,0.921420


all the code now in a single function now

In [None]:
def find_similar_movies_by_tags(title):
    similar_users=movies_ratings_df_join[(movies_ratings_df_join["title"] == title ) & (movies_ratings_df_join['rating']>=4)]['userId'].unique()
    similar_user_recs=movies_ratings_df_join[(movies_ratings_df_join['userId'].isin(similar_users)) & (movies_ratings_df_join['rating']>=4)]['title']
    
    similar_user_recs_per=similar_user_recs.value_counts()*100/ len(similar_users)
    similar_user_recs_per_filter =similar_user_recs_per[similar_user_recs_per>10]
    
    all_users= movies_ratings_df_join[(movies_ratings_df_join['title'].isin(similar_user_recs_per_filter.index)) & (movies_ratings_df_join['rating']>=4)]
    all_users_rec=all_users['title'].value_counts()*100/len(all_users['userId'].unique())
    
    rec_per=pd.concat([similar_user_recs_per_filter, all_users_rec],axis=1)
    rec_per.columns=['similar%','all_avg%']
    
    rec_per['score']=rec_per['similar%']/rec_per['all_avg%']
    rec_per=rec_per.sort_values("score" , ascending=False)
    
    return rec_per.merge(final_df, left_index=True,right_on="title",how='left')[["id","title","genres","score","tags"]][:5]
    
    

In [None]:
tags='Sci-fi'

In [None]:
def rs_tags(tags):
    tags_=search_tags(tags).title[search_tags(tags).title.index[0]]
    
    return tags_

In [None]:

rs_tags(tags)


'Third Guest'

In [None]:
find_similar_movies_by_tags('Toy Story')

Unnamed: 0,id,title,genres,score,tags
0,862,Toy Story,Animation Comedy Family,1195.256281,"led by woody, andy's toys live happily in his room until andy's birthday brings buzz lightyear onto the scene. afraid of losing his place in andy's heart, woody plots against buzz. but when circumstances separate buzz and woody from their owner, the duo eventually learns to put aside their differences. animation comedy family jealousy toy boy friendship friends rivalry boynextdoor newtoy toycomestolife tomhanks timallen donrickles johnlasseter 1995"
6825,860,WarGames,Thriller ScienceFiction,132.138885,"high school student david lightman (matthew broderick) has a talent for hacking. but while trying to hack into a computer system to play unreleased video games, he unwittingly taps into the defense department's war computer and initiates a confrontation of global proportions! together with his girlfriend (ally sheedy) and a wizardly computer genius (john wood), david must race against time to outwit his opponent...and prevent a nuclear armageddon. thriller sciencefiction videogame artificialintelligence fbi coldwar hacker prosecution norad government computer nuclearthreat matthewbroderick dabneycoleman allysheedy johnbadham 1983"
7911,80,Before Sunset,Drama Romance,96.651451,nine years ago two strangers met by chance and spent a night in vienna that ended before sunrise. they are about to meet for the first time since. now they have one afternoon to find out if they belong together. drama romance paris journalist dialogue talking soulmates walking bookshop loveofone'slife author ethanhawke juliedelpy vernondobtcheff richardlinklater 2004
11304,1417,Pan's Labyrinth,Fantasy Drama War,89.868893,"living with her tyrannical stepfather in a new home with her pregnant mother, 10-year-old ofelia feels alone until she explores a decaying labyrinth guarded by a mysterious faun who claims to know her destiny. if she wishes to return to her real father, ofelia must complete three terrifying tasks. fantasy drama war spain resistance servant antihero fairy fairytale francoregime army princess love woods king hiding labyrinth mythological ivanabaquero maribelverdú sergilópez guillermodeltoro 2006"
8504,803,Night and Fog,History Documentary War,79.431588,the documentary film from french director resnais set ten years after the second world war depicts the problems of auschwitz with shocking images from the concentration camps. history documentary war germany destructionofacivilization polen worldwarii prisonersofwar auschwitz nazis independentfilm nazigermany torture crime michelbouquet reinhardheydrich heinrichhimmler alainresnais 1955


In [None]:
find_similar_movies_by_tags(rs_tags(tags))

Unnamed: 0,id,title,genres,score,tags


In [None]:
search_tags(tags)

Unnamed: 0,id,title,genres,tags
42431,437122,Third Guest,Drama ScienceFiction,sci-fi end of the world drama drama sciencefiction elenaharding garylind fraserwatson 2016
5248,27834,CQ,Comedy Drama ScienceFiction,"a young filmmaker in 1960s paris juggles directing a cheesy sci-fi debacle, directing his own personal art film, coping with his crumbling relationship with his girlfriend, and a new-found infatuation with the sci-fi film's starlet. comedy drama sciencefiction jeremydavies angelalindvall élodiebouchez romancoppola 2001"
3047,926,Galaxy Quest,Comedy Family ScienceFiction,"the stars of a 1970s sci-fi show - now scraping a living through re-runs and sci-fi conventions - are beamed aboard an alien spacecraft. believing the cast's heroic on-screen dramas are historical documents of real-life adventures, the band of aliens turn to the ailing celebrities for help in their quest to overcome the oppressive regime in their solar system. comedy family sciencefiction spacebattle spaceship spoof fictionaltvshow timallen sigourneyweaver alanrickman deanparisot 1999"
45285,222872,Starquest II,Thriller ScienceFiction,sci-fi thriller directed by fred gallo. thriller sciencefiction spacecraft alien adambaldwin robertenglund duanedavis fredgallo 1996
9441,11049,Interstella 5555: The 5tory of the 5ecret 5tar 5ystem,ScienceFiction Music Animation,"a sci-fi anime house-musical movie collaboration between daft punk--and their music--and designer leiji matsumoto. during the recording of their discovery album and using the themes of sci-fi celebrity, decadence and space travel, daft punk--with help from cedric hervet--wrote the story and inspired seasoned japanese animators to symbiotically create this stunning space musical. sciencefiction music animation spaceman musical recordproducer space rescue musicband romanthony kazuhisatakenôchi 2003"
28069,326591,Hollywood between Paranoia and Sci-Fi. The Power of Myth,Documentary,gives insight into the creators mindset and how they culled from real life events to create some of the biggest sci fi films of all time. documentary sciencefiction womandirector sci-fifan jamescameron rolandemmerich georgelucas juliakuperberg clarakuperberg 2011
40306,316784,The Black Hole,Thriller ScienceFiction,a sci-fi/thriller centered on a high school violinist who witnesses the collapse of space and time. thriller sciencefiction blackhole malcolmmcdowell deancain aaronperilo markstevengrove 2015
34181,238307,The Isle of Lost Ships,Drama ScienceFiction Music Adventure Romance,"a musical partially based on a sci-fi novel ""the isle of lost ships"" by aleksandr belyaev. drama sciencefiction music adventure romance larisabelogurova gediminasstorpirštis konstantinraykin yevgeniginzburg raufmamedov 1987"
24911,45167,Pumzi,ScienceFiction Drama,"a sci-fi film about futuristic africa, 35 years after world war iii, 'the water war'. sciencefiction drama womandirector chantelleburger kudzanimoswela wanurikahiu 2009"
43812,14543,The Matrix Revisited,Documentary,the film goes behind the scenes of the 1999 sci-fi movie the matrix. documentary behindthescenes fanculture visualeffect keanureeves hugoweaving laurencefishburne joshoreck 2001


In [None]:
search_title(rs_tags(tags))

Unnamed: 0,id,title,genres,tags
42431,437122,Third Guest,Drama ScienceFiction,sci-fi end of the world drama drama sciencefiction elenaharding garylind fraserwatson 2016
23860,241848,The Guest,Mystery Thriller Action,"a soldier introduces himself to the peterson family, claiming to be a friend of their son who died in action. after the young man is welcomed into their home, a series of accidental deaths seem to be connected to his presence. mystery thriller action bar halloween highschool iraqwarveteran murder diner mumblegore danstevens maikamonroe sheilakelley adamwingard 2014"
31709,130374,Guest Wife,Comedy Romance,"christopher price, a small-town bank executive, continues to be loyal to and idolize his boyhood friend, joseph jefferson parker, a famous war correspondent. but chris's wife, mary, is none to fond of joe and tired of her husband's idolizing. on the eve of the price's second-honeymoon trip to new york city, joe arrives and tells chris that he needs someone to pose as his wife in order to fool his boss in nyc, who thinks joe got married to an overseas woman while on an assignment. chris pushes mary into posing as joe's wife. in new york, this leads to many complications and misunderstandings, with mary finally deciding to teach chris and joe a lesson by making them believe she is in love with joe. comedy romance smalltown trip warcorrespondent boyhoodfriend claudettecolbert donameche dickforan samwood 1945"
42041,411088,The Invisible Guest,Mystery Crime Thriller,"""the invisible guest” turns on a young businessman who wakes up in a hotel room locked from the inside with the dead body of his lover next to him. he hires a prestigious lawyer, and over one night they work together to clarify what happened in a frenetic race against time. mystery crime thriller judge suspect interrogation falseaccusationofmurder unreliablenarrator confessionofcrime uncovertruth accusation mariocasas josécoronado bárbaralennie oriolpaulo 2016"
22164,20894,Guest from the Future,Family ScienceFiction Adventure,"6-grader kolya gerasimov discovers a time machine in a basement of an old house in moscow and gets transferred into the 21st century. there he is allowed to look around. accidentally, kolya witnesses two space pirates who arrive from saturn and later try to steal a device called a ""mielophone"" (which can read thoughts) from alisa seleznyova - a girl that performs experiments with this device and animals. kolya manages to save the device from the pirates and brings it back to the 20th century. but both pirates and alisa get there too. alisa knows where kolya studies but doesn't know what he looks like. pirates saw kolya, but don't know anything about him. written by boris shafir family sciencefiction adventure natalyaguseva alekseifomkin vyacheslavnevinnyy pavelarsyonov 1984"
22949,42802,The Dinner Guest,Comedy,"fifty years old, three years past of unemployment, benefits decreasing very quickly... gerard is at the end of the rope when a position is offered him in indonesia. to win the favor of his new employer, gerard invites him to dinner at home. fatal error ! terrified at the idea of not being up to par, his wife colette begs alexandre, their neighbor to help. communication guru, alexander accepts the challenge and gives a makeover to the couple in twenty-four hours. apartment, decoration, lifestyle, dinner menu, uniforms, culture... everything ! comedy danielauteuil valérielemercier thierrylhermitte laurentbouhnik 2007"
28364,42288,Guest in the House,Drama Thriller,"evelyn (anne baxter), an emotionally vulnerable and unstable woman, stays at the home of her doctor dan proctor (scott mckay). there she meets and falls in love with his brother, douglas (ralph bellamy), who is happily married to ann (ruth warrick). evelyn then sets forth to break up the happy marriage and win the love of douglas -- with tragic results. drama thriller suspense annebaxter ralphbellamy alinemacmahon johnbrahm 1944"
31386,117618,The Guest House,Romance,"before leaving for college, a recently dumped goth girl's life changes forever when she falls in love with a smart and professional college grad who is staying in the family's guest house. romance love lesbian losangeles girl gothic ruthreynolds madelinemerrit tommccafferty michaelbaumgarten 2012"
37341,34144,"Dear Guest, When Will You Leave?",Comedy Foreign,"atithi tum kab jaoge? is the story of puneet and munmun, a happily married couple living in mumbai whose lives take an interesting turn when a distant relative, chachaji turns up unannounced at their doorstep from a far off village. the guest overstays his welcome so much so that the exasperated couple come up with various ploys to hasten his departure to hilarious results. comedy foreign ajaydevgn sanjaymishra pareshrawal ashwanidhir 2010"
18047,121462,My Mother and Her Guest,Drama,"a single mother takes on a boarder, with intriguing results. drama singlemother dokum-bong kimjin-kyu haneun-jin shinsang-ok 1961"
