In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load data
movies_data = pd.read_csv(r'E:\movies.csv')

# Display the shape and info of the dataset
print(movies_data.shape)
print(movies_data.info())



        

(4803, 24)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 24 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   index                 4803 non-null   int64  
 1   budget                4803 non-null   int64  
 2   genres                4775 non-null   object 
 3   homepage              1712 non-null   object 
 4   id                    4803 non-null   int64  
 5   keywords              4391 non-null   object 
 6   original_language     4803 non-null   object 
 7   original_title        4803 non-null   object 
 8   overview              4800 non-null   object 
 9   popularity            4803 non-null   float64
 10  production_companies  4803 non-null   object 
 11  production_countries  4803 non-null   object 
 12  release_date          4802 non-null   object 
 13  revenue               4803 non-null   int64  
 14  runtime               4801 non-null   float64
 15  spoken_lan

In [3]:
# Selecting the relevant features for recommendation
selected_features = ['genres', 'keywords', 'tagline', 'cast', 'director']
print(selected_features)

# Check for Null Values 
print(movies_data.isna().sum())

['genres', 'keywords', 'tagline', 'cast', 'director']
index                      0
budget                     0
genres                    28
homepage                3091
id                         0
keywords                 412
original_language          0
original_title             0
overview                   3
popularity                 0
production_companies       0
production_countries       0
release_date               1
revenue                    0
runtime                    2
spoken_languages           0
status                     0
tagline                  844
title                      0
vote_average               0
vote_count                 0
cast                      43
crew                       0
director                  30
dtype: int64


In [4]:
# Replacing the null values with empty strings
for feature in selected_features:
    movies_data[feature] = movies_data[feature].fillna('')

# Combining all the selected features
combined_features = movies_data['genres'] + ' ' + movies_data['keywords'] + ' ' + movies_data['tagline'] + ' ' + movies_data['cast'] + ' ' + movies_data['director']


In [11]:
# Convert the text data to feature vectors
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)

# Calculate the cosine similarity
similarity = cosine_similarity(feature_vectors)

# Function to get movie recommendations
def get_movie_recommendations(movie_name):
    # Create a list with all the movie names given in the dataset
    list_of_all_titles = movies_data['title'].tolist()
    
    # Find the closest match for the movie name given by the user
    find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
    
    # If a close match is found
    if find_close_match:
        close_match = find_close_match[0]
        
        # Find the index of the movie with the title
        index_of_the_movie = movies_data[movies_data.title == close_match].index[0]
        
        # Get the similarity scores
        similarity_score = list(enumerate(similarity[index_of_the_movie]))
        
        # Sort the movies based on their similarity score
        sorted_similar_movies = sorted(similarity_score, key=lambda x: x[1], reverse=True)
        
        # Print the name of similar movies based on the index
        print('Movies suggested for you:\n')
        i = 1
        for movie in sorted_similar_movies:
            index = movie[0]
            title_from_index = movies_data.iloc[index]['title']
            if i <= 30:
                print(f"{i}. {title_from_index}")
                i += 1
    else:
        print("No match found for the movie name provided.")

# Get recommendations for a specific movie
movie_name = input('Enter your favourite movie name: ')
get_movie_recommendations(movie_name)

        

Enter your favourite movie name: avatar
Movies suggested for you:

1. Avatar
2. Alien
3. Aliens
4. Guardians of the Galaxy
5. Star Trek Beyond
6. Star Trek Into Darkness
7. Galaxy Quest
8. Alien³
9. Cargo
10. Trekkies
11. Gravity
12. Moonraker
13. Jason X
14. Pocahontas
15. Space Cowboys
16. The Helix... Loaded
17. Lockout
18. Event Horizon
19. Space Dogs
20. Machete Kills
21. Gettysburg
22. Clash of the Titans
23. Star Wars: Clone Wars: Volume 1
24. The Right Stuff
25. Terminator Salvation
26. The Astronaut's Wife
27. Planet of the Apes
28. Star Trek
29. Wing Commander
30. Sunshine
