# **Introduction**
This model is built on a dataset provided by IMDb, featuring the top 250 highest-rated movies of all time. By utilizing this dataset, the model can offer tailored movie recommendations based on both genre and plot, allowing users to discover films that best align with their preferences. Through analyzing patterns within this curated collection, the model aims to enhance the movie selection experience, making it intuitive and genre-specific for movie enthusiasts.

In [1]:
# Load necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load the dataset
from google.colab import drive
drive.mount('/content/drive')
movies_df = pd.read_csv('/content/IMDB_Top250Engmovies2_OMDB_Detailed.csv')

Mounted at /content/drive


In [3]:
# Fill any missing values in 'Plot' and 'Genre' with empty strings
movies_df['Plot'] = movies_df['Plot'].fillna('')
movies_df['Genre'] = movies_df['Director'].fillna('')

In [4]:
# Combine 'Plot' and 'Genre' into a single "features" column
movies_df['features'] = movies_df['Plot'] + " " + movies_df['Director']

In [5]:
# Initialize the TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')

In [6]:
# Fit and transform the features column
tfidf_matrix = tfidf.fit_transform(movies_df['features'])

In [7]:
# Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [8]:
# Define the recommendation function
def recommend_movies(title, cosine_sim=cosine_sim, df=movies_df, num_recommendations=6):

    # Get the index of the movie that matches the title
    idx = df[df['Title'] == title].index[0]

    # Get the similarity scores for this movie with all other movies
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort movies based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get indices of the most similar movies (excluding the first one as it’s the input movie itself)
    sim_scores = sim_scores[1:num_recommendations+1]
    movie_indices = [i[0] for i in sim_scores]

    # Return the top similar movies
    return df['Title'].iloc[movie_indices].tolist()

In [9]:
# get recommendations for a specific movie
recommended_movies = recommend_movies("Hacksaw Ridge")
print("Recommended movies:", recommended_movies)
recommended_movies = recommend_movies("The Dark Knight")
print("Recommended movies:", recommended_movies)
recommended_movies = recommend_movies("Pulp Fiction")
print("Recommended movies:", recommended_movies)

Recommended movies: ['Young Frankenstein', 'Cool Hand Luke', 'Braveheart', 'Gone with the Wind', 'Patton', 'Judgment at Nuremberg']
Recommended movies: ['The Dark Knight Rises', 'Batman Begins', 'Interstellar', 'Jaws', 'Memento', 'The Prestige']
Recommended movies: ['Django Unchained', 'The Shawshank Redemption', 'Some Like It Hot', 'Raging Bull', 'Reservoir Dogs', 'Sin City']
