<a href="https://colab.research.google.com/github/harshi600211-collab/movie-recommendation-system/blob/main/movie_recommendation_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# -------------------------------------
# Step 1: Load Dataset
# -------------------------------------
# Dataset must contain columns: 'title', 'genres', 'overview'
movies = pd.read_csv("/content/HollywoodMovies.csv")   # Example dataset (TMDB movies dataset)

# Rename columns to match expected names in the script
movies = movies.rename(columns={'Movie': 'title', 'Genre': 'genres', 'Story': 'overview'})

# Fill missing values
movies['overview'] = movies['overview'].fillna('')
movies['genres'] = movies['genres'].fillna('')

# -------------------------------------n# Step 2: Combine Text Features
# -------------------------------------
movies["combined_features"] = (
    movies["overview"] + " " + movies["genres"]
)

# -------------------------------------
# Step 3: Convert Text → TF-IDF Feature Vectors
# -------------------------------------
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(movies["combined_features"])

# -------------------------------------
# Step 4: Compute Cosine Similarity between Movies
# -------------------------------------
similarity_matrix = cosine_similarity(tfidf_matrix)

# -------------------------------------
# Step 5: Recommendation Function
# -------------------------------------
def recommend(movie_name):
    # Find movie index
    if movie_name not in movies["title"].values:
        return ["Movie not found in database"]

    movie_index = movies[movies["title"] == movie_name].index[0]

    # Find similarity scores
    similarity_scores = list(enumerate(similarity_matrix[movie_index]))

    # Sort movies based on similarity
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    # Top 10 recommendations (skip first → same movie)
    top_10 = sorted_scores[1:11]

    recommended_movies = []
    for index, score in top_10:
        recommended_movies.append(movies.iloc[index]["title"])

    return recommended_movies


# -------------------------------------
# Step 6: Test the System
# -------------------------------------
movie_input = input("Enter a Movie Name: ")
results = recommend(movie_input)

print("\nRecommended Movies:")
for m in results:
    print("\u27a4", m)

Enter a Movie Name: Robin Hood

Recommended Movies:
➤ Never Back Down
➤ The Spirit
➤ Underworld: Rise of the Lycans
➤ Iron Man 2
➤ Kick-Ass
➤ Percy Jackson & the Olympians: The Lightning Thief
➤ Prince of Persia: The Sands of Time
➤ Red
➤ Robin Hood
➤ Takers


In [6]:
display(movies.columns)

Index(['Movie', 'LeadStudio', 'RottenTomatoes', 'AudienceScore', 'Story',
       'Genre', 'TheatersOpenWeek', 'OpeningWeekend', 'BOAvgOpenWeekend',
       'DomesticGross', 'ForeignGross', 'WorldGross', 'Budget',
       'Profitability', 'OpenProfit', 'Year'],
      dtype='object')