In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from IPython.display import display

In [None]:
df = pd.read_csv("/content/updated_movies (5).csv")
df_tags = pd.read_csv("/content/movies_tags.csv")

  df_tags = pd.read_csv("/content/movies_tags.csv")


In [None]:
df.head(5)

Unnamed: 0,director_name,num_critic_for_reviews,duration,genres,actor_1_name,movie_title,num_voted_users,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,title_year,imdb_score
0,James Cameron,723.0,178.0,Action|Adventure|Fantasy|Sci-Fi,CCH Pounder,Avatar,886204,avatar|future|marine|native|paraplegic,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,3054.0,English,USA,2009.0,7.9
1,Gore Verbinski,302.0,169.0,Action|Adventure|Fantasy,Johnny Depp,Pirates of the Caribbean: At World's End,471220,goddess|marriage ceremony|marriage proposal|pi...,http://www.imdb.com/title/tt0449088/?ref_=fn_t...,1238.0,English,USA,2007.0,7.1
2,Sam Mendes,602.0,148.0,Action|Adventure|Thriller,Christoph Waltz,Spectre,275868,bomb|espionage|sequel|spy|terrorist,http://www.imdb.com/title/tt2379713/?ref_=fn_t...,994.0,English,UK,2015.0,6.8
3,Christopher Nolan,813.0,164.0,Action|Thriller,Tom Hardy,The Dark Knight Rises,1144337,deception|imprisonment|lawlessness|police offi...,http://www.imdb.com/title/tt1345836/?ref_=fn_t...,2701.0,English,USA,2012.0,8.5
4,Doug Walker,140.194272,107.201074,Documentary,Doug Walker,Star Wars: Episode VII - The Force Awakens ...,8,,http://www.imdb.com/title/tt5289954/?ref_=fn_t...,272.770808,English,USA,2002.454856,7.1


#Content-Based Filtering (TF-IDF)

#Content-Based Filtering (TF-IDF)

In [None]:
def get_user_choice():
    # Step 1: Ask the user if they want to enter a movie title or a tag
    print("\nDo you want to enter a movie title or a tag?")
    print("1. Movie Title")
    print("2. Movie Tag")
    choice_type = input("Enter 1 for Movie Title or 2 for Movie Tag: ").strip()

    # Step 2: Based on user input, either get the movie title or tag
    if choice_type == "1":
        movie_title = input("\nEnter the movie title: ").strip().lower()
    elif choice_type == "2":
        movie_tag = input("\nEnter the movie tag: ").strip().lower()
        movie_title = find_movie_from_tag(movie_tag)
    else:
        print("\nInvalid choice. Please enter 1 or 2.")
        return get_user_choice()  # Recursively call if invalid input

    # Step 3: Ask the user to choose a recommendation method
    print("\nChoose a recommendation method:")
    print("1. Genre")
    print("2. Actor Name")
    print("3. Director Name")
    print("4. Description (Content-Based)")
    print("5. Top 5 Highest Rated Movies")

    choice = input("Enter a number (1-5): ").strip()

    # Map user's choice to the corresponding feature
    feature_dict = {
        "1": "genres",
        "2": "actor_1_name",
        "3": "director_name",
        "4": "plot_keywords",  # Using plot keywords for content-based filtering
    }

    if choice == "5":
        return choice, None, movie_title

    return choice, feature_dict.get(choice, None), movie_title


In [None]:
def find_movie_from_tag(tag):
    # Normalize the input tag (convert to lowercase and strip extra spaces)
    tag = tag.strip().lower()

    # Search for the movie title corresponding to the tag
    matched_movies = df_tags[df_tags['tagline'].str.contains(tag, case=False, na=False)]

    if matched_movies.empty:
        print(f"\nNo movie found with the tag: {tag}")
        return None

    # Return the first matching movie title
    return matched_movies.iloc[0]['title']


In [None]:
def find_movie_feature(movie_title, feature):
    # Ensure the movie title is properly sanitized (lowercased, stripped)
    movie_title = movie_title.strip().lower()

    # Search for the movie by the normalized title in the 'df' DataFrame
    movie_row = df[df["movie_title"].str.lower() == movie_title]

    if movie_row.empty:
        print(f"\nMovie '{movie_title}' not found in the dataset.")
        return None

    # Extract the chosen feature value from the 'df' DataFrame
    feature_value = movie_row.iloc[0][feature]
    return feature_value


In [None]:
def fetch_recommendations(feature_data, query_value):
    global Not_found
    idx = df[df[feature_data.name].str.contains(query_value, case=False, na=False)].index

    if len(idx) == 0:
        print("\nNo matching movies found.")
        Not_found = True
        return []

    Not_found = False
    idx = idx[0]  # Take the first match

    # Compute TF-IDF
    tfidf = TfidfVectorizer(stop_words="english")
    tfidf_matrix = tfidf.fit_transform(feature_data.fillna(""))
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

    # Get similarity scores
    sim_scores = sorted(list(enumerate(cosine_sim[idx])), key=lambda x: x[1], reverse=True)[1:16]
    movie_indices = [i[0] for i in sim_scores]

    return movie_indices


In [None]:
def apply_filters(movie_indices, choice):
    filtered_movies = df.iloc[movie_indices]
    RATING_THRESHOLD=7.0
    # Apply the first filter: User's choice (except for choice 5)
    if choice != "5":
        filtered_movies = filtered_movies[filtered_movies["imdb_score"] >= RATING_THRESHOLD]

    # Apply the second filter: Sorting by highest rating
    return filtered_movies.sort_values(by="imdb_score", ascending=False).head(10)

In [None]:
displayed_features = ['movie_title', 'genres', 'imdb_score', 'actor_1_name', 'director_name']

def display_results(choice, filtered_movies):
    if choice == "5":
        print("\nTop 5 Most Popular Movies Based on Ratings:")
        display(df.sort_values(by="imdb_score", ascending=False)[displayed_features].head(5))
    elif filtered_movies.empty:
        print("\nNo recommendations found with a rating above {:.1f}".format(RATING_THRESHOLD))
    else:
        print("\nRecommended Movies (Filtered & Sorted by IMDb Rating):")
        display(filtered_movies[displayed_features])


In [None]:
def get_recommendations(choice, feature, movie_title):
    if choice == "5":
        display_results(choice, None)
        return

    if feature is None:
        print("\nInvalid choice. Please enter a number between 1 and 5.")
        return

    feature_value = find_movie_feature(movie_title, feature)

    if not feature_value:
        return

    movie_indices = fetch_recommendations(df[feature], feature_value)

    if Not_found or not movie_indices:
        return

    filtered_movies = apply_filters(movie_indices, choice)

    display_results(choice, filtered_movies)

    recommended_movies = filtered_movies

In [None]:
choice, feature, movie_title = get_user_choice()

# Proceed with the recommendation process
get_recommendations(choice, feature, movie_title)


Do you want to enter a movie title or a tag?
1. Movie Title
2. Movie Tag
Enter 1 for Movie Title or 2 for Movie Tag: 1

Enter the movie title: avatar

Choose a recommendation method:
1. Genre
2. Actor Name
3. Director Name
4. Description (Content-Based)
5. Top 5 Highest Rated Movies
Enter a number (1-5): 1

Recommended Movies (Filtered & Sorted by IMDb Rating):


Unnamed: 0,movie_title,genres,imdb_score,actor_1_name,director_name
2051,Star Wars: Episode V - The Empire Strikes Back,Action|Adventure|Fantasy|Sci-Fi,8.8,Harrison Ford,Irvin Kershner
3024,Star Wars: Episode IV - A New Hope,Action|Adventure|Fantasy|Sci-Fi,8.7,Harrison Ford,George Lucas
1536,Star Wars: Episode VI - Return of the Jedi,Action|Adventure|Fantasy|Sci-Fi,8.4,Harrison Ford,Richard Marquand
4690,Destiny,Action|Adventure|Fantasy|Sci-Fi,8.1,Peter Dinklage,Joseph Kosinski
236,Star Wars: Episode III - Revenge of the Sith,Action|Adventure|Fantasy|Sci-Fi,7.6,Natalie Portman,George Lucas
3541,Stargate: The Ark of Truth,Action|Adventure|Drama|Fantasy|Sci-Fi,7.4,Ben Browder,Robert C. Cooper
15,Man of Steel,Action|Adventure|Fantasy|Sci-Fi,7.2,Henry Cavill,Zack Snyder


In [None]:
recommended_movies[displayed_features]

Unnamed: 0,movie_title,genres,imdb_score,actor_1_name,director_name
2051,Star Wars: Episode V - The Empire Strikes Back,Action|Adventure|Fantasy|Sci-Fi,8.8,Harrison Ford,Irvin Kershner
3024,Star Wars: Episode IV - A New Hope,Action|Adventure|Fantasy|Sci-Fi,8.7,Harrison Ford,George Lucas
1536,Star Wars: Episode VI - Return of the Jedi,Action|Adventure|Fantasy|Sci-Fi,8.4,Harrison Ford,Richard Marquand
4690,Destiny,Action|Adventure|Fantasy|Sci-Fi,8.1,Peter Dinklage,Joseph Kosinski
236,Star Wars: Episode III - Revenge of the Sith,Action|Adventure|Fantasy|Sci-Fi,7.6,Natalie Portman,George Lucas
3541,Stargate: The Ark of Truth,Action|Adventure|Drama|Fantasy|Sci-Fi,7.4,Ben Browder,Robert C. Cooper
15,Man of Steel,Action|Adventure|Fantasy|Sci-Fi,7.2,Henry Cavill,Zack Snyder


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

# test sample
expected_relevant_movies = ["Man of Steel", "Star Wars", "Stargate: The Ark of Truth" ,"Destiny"]

# recommended movie titles
recommended_movie_titles = recommended_movies['movie_title'].tolist()


all_movies = list(set(recommended_movie_titles + expected_relevant_movies))
y_true = [1 if movie in expected_relevant_movies else 0 for movie in all_movies]
y_pred = [1 if movie in recommended_movie_titles else 0 for movie in all_movies]

# metrics
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

# results
print("Evaluation Results:")
print(f"Precision: {precision:.2f}")
print(f"Recall:    {recall:.2f}")
print(f"F1 Score:  {f1:.2f}")

Evaluation Results:
Precision: 0.43
Recall:    0.75
F1 Score:  0.55
