In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from fuzzywuzzy import process

# Load dataset
df = pd.read_csv('imdb_top_1000.csv')

# Create 'combined_features' for better recommendations
df['combined_features'] = (
    df['Overview'].fillna('') + ' ' +
    df['Genre'].fillna('') + ' ' +
    df['Director'].fillna('') + ' ' +
    df['Star1'].fillna('') + ' ' +
    df['Star2'].fillna('') + ' ' +
    df['Star3'].fillna('') + ' ' +
    df['Star4'].fillna('')
)

# Vectorize combined features
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['combined_features'])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# For quick lookup
movie_titles = df['Series_Title'].str.strip()
movie_indices = pd.Series(df.index, index=movie_titles.str.lower())

def recommend_movies(movie_title, df, cosine_sim, movie_indices):
    movie_title_lower = movie_title.strip().lower()

    # Use fuzzy matching for close titles
    if movie_title_lower not in movie_indices:
        print("Movie not found. Trying to find close matches...")
        close_match = process.extractOne(movie_title_lower, movie_indices.index)
        if close_match and close_match[1] > 60:  # Confidence threshold
            print(f"Did you mean '{close_match[0]}'? Using closest match for recommendations.")
            movie_title_lower = close_match[0]
        else:
            print("No close matches found. Please check the title and try again.")
            return

    idx = movie_indices[movie_title_lower]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # Top 5 excluding itself

    print(f"\nBecause you liked '{df.loc[idx, 'Series_Title']}', you might like:")
    for i, score in sim_scores:
        print(f"- Title       : {df.loc[i, 'Series_Title']}")
        print(f"  Poster      : {df.loc[i, 'Poster_Link']}")
        print(f"  Released    : {df.loc[i, 'Released_Year']}")
        print(f"  Certificate : {df.loc[i, 'Certificate']}")
        print(f"  Runtime     : {df.loc[i, 'Runtime']}")
        print(f"  Genre       : {df.loc[i, 'Genre']}")
        print(f"  IMDB Rating : {df.loc[i, 'IMDB_Rating']}")
        print(f"  Overview    : {df.loc[i, 'Overview']}\n")

# User input
user_input = input("Enter the movie you liked: ")
recommend_movies(user_input, df, cosine_sim, movie_indices)


Enter the movie you liked:  Hera Pheri



Because you liked 'Hera Pheri', you might like:
- Title       : OMG: Oh My God!
  Poster      : https://m.media-amazon.com/images/M/MV5BMGNhYjUwNmYtNDQxNi00NDdmLTljMDAtZWM1NDQyZTk3ZDYwXkEyXkFqcGdeQXVyODE5NzE3OTE@._V1_UY98_CR0,0,67,98_AL_.jpg
  Released    : 2012
  Certificate : U
  Runtime     : 125.0 min
  Genre       : Comedy, Drama, Fantasy
  IMDB Rating : 8.1
  Overview    : A shopkeeper takes God to court when his shop is destroyed by an earthquake.

- Title       : Baahubali 2: The Conclusion
  Poster      : https://m.media-amazon.com/images/M/MV5BOGNlNmRkMjctNDgxMC00NzFhLWIzY2YtZDk3ZDE0NWZhZDBlXkEyXkFqcGdeQXVyODIwMDI1NjM@._V1_UX67_CR0,0,67,98_AL_.jpg
  Released    : 2017
  Certificate : UA
  Runtime     : 167.0 min
  Genre       : Action, Drama
  IMDB Rating : 8.2
  Overview    : When Shiva, the son of Bahubali, learns about his heritage, he begins to look for answers. His story is juxtaposed with past events that unfolded in the Mahishmati Kingdom.

- Title       : Special Cha