In [None]:
#import the necessary library

import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets called HollywoodMovies dataset
movie_data = pd.read_csv('HollywoodMovies.csv')
movie_data.head()

Unnamed: 0,Movie,LeadStudio,RottenTomatoes,AudienceScore,Story,Genre,TheatersOpenWeek,OpeningWeekend,BOAvgOpenWeekend,DomesticGross,ForeignGross,WorldGross,Budget,Profitability,OpenProfit,Year
0,Spider-Man 3,Sony,61.0,54.0,Metamorphosis,Action,4252.0,151.1,35540.0,336.53,554.34,890.87,258.0,345.3,58.57,2007
1,Shrek the Third,Paramount,42.0,57.0,Quest,Animation,4122.0,121.6,29507.0,322.72,476.24,798.96,160.0,499.35,76.0,2007
2,Transformers,Paramount,57.0,89.0,Monster Force,Action,4011.0,70.5,17577.0,319.25,390.46,709.71,150.0,473.14,47.0,2007
3,Pirates of the Caribbean: At World's End,Disney,45.0,74.0,Rescue,Action,4362.0,114.7,26302.0,309.42,654.0,963.42,300.0,321.14,38.23,2007
4,Harry Potter and the Order of the Phoenix,Warner Bros,78.0,82.0,Quest,Adventure,4285.0,77.1,17998.0,292.0,647.88,939.89,150.0,626.59,51.4,2007


In [None]:
# Add an index column
movie_data.reset_index(inplace=True)
movie_data.rename(columns={'index': 'movie_index'}, inplace=True)

# Inspect the dataset to verify the new index column
print(movie_data.head())

   movie_index                                      Movie   LeadStudio  \
0            0                               Spider-Man 3         Sony   
1            1                            Shrek the Third    Paramount   
2            2                               Transformers    Paramount   
3            3   Pirates of the Caribbean: At World's End       Disney   
4            4  Harry Potter and the Order of the Phoenix  Warner Bros   

   RottenTomatoes  AudienceScore          Story      Genre  TheatersOpenWeek  \
0            61.0           54.0  Metamorphosis     Action            4252.0   
1            42.0           57.0          Quest  Animation            4122.0   
2            57.0           89.0  Monster Force     Action            4011.0   
3            45.0           74.0         Rescue     Action            4362.0   
4            78.0           82.0          Quest  Adventure            4285.0   

   OpeningWeekend  BOAvgOpenWeekend  DomesticGross  ForeignGross  WorldGro

In [None]:
#Select the requied columns in the dataset
selected_features =['Movie','LeadStudio','Story','Genre','Year']

# replacing the null valuess with null string

for feature in selected_features:
  movie_data[feature] = movie_data[feature].fillna('')

# Convert 'Year' column to string type
movie_data['Year'] = movie_data['Year'].astype(str)

combined_features = movie_data['Movie']+' '+movie_data['LeadStudio']+' '+movie_data['Story']+' '+movie_data['Genre']+' '+movie_data['Year']

In [None]:
print(combined_features)

0            Spider-Man 3 Sony Metamorphosis Action 2007
1         Shrek the Third Paramount Quest Animation 2007
2       Transformers Paramount Monster Force Action 2007
3      Pirates of the Caribbean: At World's End Disne...
4      Harry Potter and the Order of the Phoenix Warn...
                             ...                        
965                               The Canyons IFC   2013
966                              The Call TriStar   2013
967    The English Teacher Cinedigm Entertainment   2013
968                 John Dies at the End Magnolia   2013
969                  Lovelace Radius-TWC  Biography 2013
Length: 970, dtype: object


In [None]:
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)

In [None]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)
print(similarity)

[[1.         0.07870827 0.16383454 ... 0.         0.         0.        ]
 [0.07870827 1.         0.18616693 ... 0.0282509  0.02648703 0.        ]
 [0.16383454 0.18616693 1.         ... 0.         0.         0.        ]
 ...
 [0.         0.0282509  0.         ... 1.         0.06468246 0.04208949]
 [0.         0.02648703 0.         ... 0.06468246 1.         0.03946159]
 [0.         0.         0.         ... 0.04208949 0.03946159 1.        ]]


In [None]:
# creating a list with all the movie names given in the dataset

list_of_all_titles = movie_data['Movie'].tolist()
print(list_of_all_titles)

['Spider-Man 3', 'Shrek the Third', 'Transformers', "Pirates of the Caribbean: At World's End", 'Harry Potter and the Order of the Phoenix', 'I Am Legend', 'The Bourne Ultimatum', 'National Treasure: Book of Secrets', 'Alvin and the Chipmunks', '300', 'Ratatouille', 'The Simpsons Movie', 'Wild Hogs', 'Knocked Up', 'Juno', 'Rush Hour 3', 'Live Free or Die Hard', 'Fantastic Four: Rise of the Silver Surfer', 'American Gangster', 'Enchanted', 'Bee Movie', 'Superbad', 'I Now Pronounce You Chuck and Larry', 'Hairspray', 'Blades of Glory', "Ocean's 13", 'Ghost Rider', 'Evan Almighty', 'Meet the Robinsons', 'Norbit', 'The Bucket List', 'The Game Plan', 'Beowulf', 'Disturbia', 'No Country for Old Men', 'The Golden Compass', "Charlie Wilson's War", 'Saw IV', 'Stomp the Yard', "Surf's Up", 'Halloween', "Tyler Perry's Why Did I get Married", 'TMNT', 'P.S. I Love You', '3:10 to Yuma', 'Sweeney Todd: The Demon Barber of Fleet Street', 'Music and Lyrics', 'Michael Clayton', 'Premonition', 'The Kingdo

In [None]:
#Get the movie name from the user
movie_name = input("Enter the movie name: ")

#find the close match from the user input name
find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

Enter the movie name: iron man
['Iron Man', 'Iron Man 3', 'Iron Man 2']


In [None]:
close_match = find_close_match[0]
print(close_match)
index_of_the_movie = movie_data[movie_data.Movie == close_match]['Movie'].values[0]
print(index_of_the_movie)

Iron Man
Iron Man


In [None]:
# getting a list of similar movies

# Get the integer index of the movie from the DataFrame
# Replace the incorrect line with the following:
index_of_the_movie = movie_data[movie_data.Movie == close_match].index[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

[(0, np.float64(0.49992371273960196)), (1, np.float64(0.08701640564449797)), (2, np.float64(0.17636423461512551)), (3, np.float64(0.04280544768823441)), (4, np.float64(0.0)), (5, np.float64(0.0)), (6, np.float64(0.0)), (7, np.float64(0.0)), (8, np.float64(0.0)), (9, np.float64(0.06381088292960435)), (10, np.float64(0.0)), (11, np.float64(0.0)), (12, np.float64(0.0)), (13, np.float64(0.0)), (14, np.float64(0.0)), (15, np.float64(0.055350648342246074)), (16, np.float64(0.04083432958676101)), (17, np.float64(0.04030033991658495)), (18, np.float64(0.0)), (19, np.float64(0.0)), (20, np.float64(0.08048215258427856)), (21, np.float64(0.0)), (22, np.float64(0.0)), (23, np.float64(0.0)), (24, np.float64(0.08862823821594809)), (25, np.float64(0.0)), (26, np.float64(0.05788928528004015)), (27, np.float64(0.0)), (28, np.float64(0.0)), (29, np.float64(0.10740172730540423)), (30, np.float64(0.0)), (31, np.float64(0.0)), (32, np.float64(0.14816249520902247)), (33, np.float64(0.10250718368700863)), (3

In [None]:
# sorting the movies based on their similarity score

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_movies)

[(92, np.float64(1.0000000000000002)), (422, np.float64(0.7242711311422549)), (823, np.float64(0.5819356680265827)), (0, np.float64(0.49992371273960196)), (265, np.float64(0.32797241181394404)), (239, np.float64(0.3256748789559931)), (284, np.float64(0.32312068276335304)), (106, np.float64(0.3212379126092695)), (626, np.float64(0.3081449410198024)), (831, np.float64(0.2792170717054824)), (551, np.float64(0.2683791947585087)), (119, np.float64(0.267663953210493)), (724, np.float64(0.24649076329006464)), (128, np.float64(0.24276575897122588)), (525, np.float64(0.22711467060524768)), (366, np.float64(0.21395554265314626)), (116, np.float64(0.21368620284805953)), (528, np.float64(0.21032685793932482)), (614, np.float64(0.20791196265156794)), (914, np.float64(0.20411493155585927)), (894, np.float64(0.19551063566911236)), (931, np.float64(0.1941879832876271)), (770, np.float64(0.19354422024371223)), (270, np.float64(0.1867454754576883)), (212, np.float64(0.18389847903039136)), (464, np.float

In [None]:
#get recommendation from the user preferance
def get_recommendation(movie_name):
  list_of_all_titles = movie_data['Movie'].tolist()
  find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
  close_match = find_close_match[0]
  index_of_the_movie = movie_data[movie_data.Movie == close_match]['movie_index'].values[0]
  similarity_score = list(enumerate(similarity[index_of_the_movie]))
  sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
  print('Movies suggested for you : \n')

  i = 1
  for movie in sorted_similar_movies:
    index = movie[0]
    title_from_index = movie_data[movie_data.movie_index==index]['Movie'].values[0]
    if (i<11):
      print(i, '.',title_from_index)
      i+=1


movie_name = input(' Enter your favourite movie name : ')
while(True):
  if(movie_name == 'stop' or movie_name == 'exit'):
    print('Thanks for Ask Movie Recommendation ')
    break
  else:
      get_recommendation(movie_name)
      movie_name = input(' Enter your favourite movie name : ')

 Enter your favourite movie name : iron man
Movies suggested for you : 

1 . Iron Man
2 . Iron Man 2
3 . Iron Man 3
4 . Spider-Man 3
5 . District 9
6 . Avatar
7 . I Love You, Man
8 . The Incredible Hulk
9 . The Iron Lady
10 . Man of Steel
 Enter your favourite movie name : spider man
Movies suggested for you : 

1 . Spider-Man 3
2 . The Amazing Spider-Man
3 . Iron Man
4 . District 9
5 . Avatar
6 . Iron Man 2
7 . Man of Steel
8 . Iron Man 3
9 . Green Lantern
10 . The Incredible Hulk
 Enter your favourite movie name : avatar
Movies suggested for you : 

1 . Avatar
2 . District 9
3 . Iron Man
4 . Spider-Man 3
5 . Green Lantern
6 . The Incredible Hulk
7 . Taken
8 . The Princess and the Frog
9 . Beastly
10 . Captain America: The First Avenger
 Enter your favourite movie name : exit
Thanks for Ask Movie Recommendation 
