#1.Movie Dataset

In [43]:
import pandas as pd
import matplotlib.pyplot as plt

try:
    movies_df = pd.read_csv('movies.csv')
    print("Dataset loaded successfully!")
except FileNotFoundError:
    print("Error: 'movies.csv' file not found.")

def explore_dataset(movies_df):
    print(movies_df.head().to_string(index=False))

def filter_movies(movies_df, genre=None, release_year=None, director=None):

    filtered_df = movies_df

    if genre:
        filtered_df = filtered_df[filtered_df['Genre'].str.contains(genre, case=False, na=False)]
    if release_year:
        filtered_df = filtered_df[filtered_df['Release Year'] == release_year]
    if director:
        filtered_df = filtered_df[filtered_df['Director'].str.contains(director, case=False, na=False)]

    print(f"Filtered Movies: {len(filtered_df)} found.\n")
    return filtered_df



Dataset loaded successfully!


#2. Movie Search


In [72]:
def search_movies_by_title(movies_df, title_keyword):
    search_result_df = movies_df[movies_df['Title'].str.contains(title_keyword, case=False, na=False)]
    print(f"Movies found with title containing '{title_keyword}': {len(search_result_df)} found.\n")
    return search_result_df

def search_movies_by_genre(movies_df, keyword):

    search_result_df = movies_df[
        movies_df['Genre'].str.contains(keyword, case=False, na=False)
    ]
    print(f"Movies found with keyword '{keyword}' in genre or description: {len(search_result_df)} found.\n")
    return search_result_df

def suggest_movies_by_genre_or_director(movies_df, genre=None, director=None):

    if genre:
        suggestions_df = movies_df[movies_df['Genre'].str.contains(genre, case=False, na=False)]
        print(f"Movies suggested based on genre '{genre}': {len(suggestions_df)} found.\n")
    elif director:
        suggestions_df = movies_df[movies_df['Director'].str.contains(director, case=False, na=False)]
        print(f"Movies suggested based on director '{director}': {len(suggestions_df)} found.\n")
    else:
        print("No genre or director provided for suggestions.")
        return pd.DataFrame()

    return suggestions_df


#3. User Reviews and Ratings


In [58]:
def rate_movie(movies_df, movie_id_value, user_rating):

    if 'User Ratings' not in movies_df.columns:
        movies_df['User Ratings'] = [[] for _ in range(len(movies_df))]
    if 'Average Rating' not in movies_df.columns:
        movies_df['Average Rating'] = [0.0 for _ in range(len(movies_df))]

    movie_index = movies_df[movies_df['Movie ID'] == movie_id_value].index

    if movie_index.empty:
        print(f"Movie ID {movie_id_value} not found!")
        return

    current_ratings = movies_df.at[movie_index[0], 'User Ratings']
    if not isinstance(current_ratings, list):
        current_ratings = []

    current_ratings.append(user_rating)
    movies_df.at[movie_index[0], 'User Ratings'] = current_ratings

    average_rating = sum(current_ratings) / len(current_ratings)
    movies_df.at[movie_index[0], 'Average Rating'] = average_rating

    print(f"Added rating of {user_rating} to movie ID {movie_id_value}. New average rating: {average_rating:.2f}")

    movies_df.to_csv('movies.csv', index=False)

def add_movie_review(movies_df, movie_id_value, user_review):

    if 'Reviews' not in movies_df.columns:
        movies_df['Reviews'] = [[] for _ in range(len(movies_df))]

    movie_index = movies_df[movies_df['Movie ID'] == movie_id_value].index

    if movie_index.empty:
        print(f"Movie ID {movie_id_value} not found!")
        return

    current_reviews = movies_df.at[movie_index[0], 'Reviews']
    if not isinstance(current_reviews, list):
        current_reviews = []

    current_reviews.append(user_review)
    movies_df.at[movie_index[0], 'Reviews'] = current_reviews

    print(f"Added review for movie ID {movie_id_value}.")

    movies_df.to_csv('movies.csv', index=False)







#4.1 Personalized Recommendations

In [69]:
import pandas as pd

def recommend_movies(movies_df, user_ratings, top_n=5):

    if not isinstance(user_ratings, dict):
        print("user_ratings must be a dictionary with movie indices as keys.")
        return pd.DataFrame()

    genres_series = movies_df.loc[user_ratings.keys(), 'Genre']
    genre_counts = genres_series.str.get_dummies(sep=', ').sum().sort_values(ascending=False)
    top_genres = genre_counts.index[:3]
    mask = movies_df['Genre'].apply(lambda g: any(genre in g for genre in top_genres))
    recommendations = movies_df[mask]
    recommendations = recommendations[~recommendations.index.isin(user_ratings.keys())]
    recommendations = recommendations.sort_values(by='Rating', ascending=False).head(top_n)

    return recommendations


#4.2 Top 10 Lists

In [65]:
def top_10_movies(movies_df, by='Rating', specific_value=None):

    if by not in ['Genre', 'Release Year', 'Rating']:
        print("Invalid 'by' parameter specified.")
        return pd.DataFrame()

    if by == 'Genre':
        print(f"Top 10 movies for genre: {specific_value}")
        genre_filtered = movies_df[movies_df['Genre'].str.contains(specific_value, case=False, na=False)]
        top_genres = genre_filtered.sort_values(by='Rating', ascending=False).head(10)
        return top_genres

    elif by == 'Release Year':
        print(f"Top 10 movies for release year: {specific_value}")
        year_filtered = movies_df[movies_df['Release Year'] == specific_value]
        top_years = year_filtered.sort_values(by='Rating', ascending=False).head(10)
        return top_years

    elif by == 'Rating':
        print("Top 10 movies based on highest ratings across all entries:")
        top_rated = movies_df.sort_values(by='Rating', ascending=False).head(10)
        return top_rated

    return pd.DataFrame()





#4.3 Visualizations

In [76]:
import matplotlib.pyplot as plt

def plot_rating_distribution(movies_df):
    """
    Plot the distribution of movie ratings.

    Parameters:
    - movies_df (DataFrame): The DataFrame containing movie data.
    """
    plt.figure(figsize=(10, 6))
    plt.hist(movies_df['Rating'].dropna(), bins=20, color='skyblue', edgecolor='black')
    plt.title('Distribution of Movie Ratings')
    plt.xlabel('Rating')
    plt.ylabel('Number of Movies')
    plt.grid(axis='y', alpha=0.75)
    plt.show()


def plot_movie_counts_per_genre(movies_df):
    """
    Plot the number of movies per genre.

    Parameters:
    - movies_df (DataFrame): The DataFrame containing movie data.
    """
    genre_series = movies_df['Genre'].dropna().str.split(', ').explode()
    genre_counts = genre_series.value_counts().sort_values()

    plt.figure(figsize=(12, 8))
    genre_counts.plot(kind='barh', color='seagreen')
    plt.title('Number of Movies per Genre')
    plt.xlabel('Number of Movies')
    plt.ylabel('Genre')
    plt.grid(axis='x', alpha=0.75)
    plt.show()





#4.4 Sentiment Analysis

In [86]:
def analyze_sentiment(reviews, positive_keywords=None, negative_keywords=None):
    if positive_keywords is None:
        positive_keywords = ['good', 'great', 'fantastic', 'excellent', 'amazing', 'wonderful']
    if negative_keywords is None:
        negative_keywords = ['bad', 'poor', 'terrible', 'awful', 'disappointing', 'horrible']

    sentiments = []
    for review in reviews:
        review_lower = review.lower()
        pos_match = any(key in review_lower for key in positive_keywords)
        neg_match = any(key in review_lower for key in negative_keywords)

        if pos_match and not neg_match:
            sentiments.append('Positive')
        elif neg_match and not pos_match:
            sentiments.append('Negative')
        elif pos_match and neg_match:
            sentiments.append('Mixed')
        else:
            sentiments.append('Neutral')

    return sentiments

#5. User Interface

In [None]:
def main_menu(movies_df):
    while True:
        print("\n--- Movie Recommendation System ---")
        print("1. Explore the Dataset")
        print("2. Filter Movies")
        print("3. Search Movies")
        print("4. Rate a Movie")
        print("5. Add a Review")
        print("6. Get Recommendations")
        print("7. Top 10 Movies")
        print("8. Visualize Rating Distribution")
        print("9. Visualize Movie Counts per Genre")
        print("10. Analyze Sentiment of Reviews")
        print("11. Suggest Movies by Genre or Director")
        print("12. Exit")

        choice = input("Enter your choice: ")

        if choice == '1':
            explore_dataset(movies_df)
        elif choice == '2':
            genre = input("Enter genre to filter(leave blank if not needed): ")
            release_year = input("Enter release year to filter (leave blank if not needed): ")
            director = input("Enter director to filter (leave blank if not needed): ")
            filtered = filter_movies(movies_df,
                                     genre=genre or None,
                                     release_year=int(release_year) if release_year else None,
                                     director=director or None)
            print(filtered.to_string(index=False))
        elif choice == '3':
            search_type = input("Search by Title (1) or Keyword in Genre (2): ")
            if search_type == '1':
                title_keyword = input("Enter the title keyword: ")
                found_movies = search_movies_by_title(movies_df, title_keyword)
                print(found_movies.to_string(index=False))
            elif search_type == '2':
                keyword = input("Enter the keyword: ")
                found_movies = search_movies_by_genre(movies_df, keyword)
                print(found_movies.to_string(index=False))
        elif choice == '4':
            movie_id = int(input("Enter Movie ID to rate: "))
            rating = float(input("Enter your rating: "))
            rate_movie(movies_df, movie_id, rating)
        elif choice == '5':
            movie_id = int(input("Enter Movie ID to review: "))
            review = input("Enter your review: ")
            add_movie_review(movies_df, movie_id, review)
        elif choice == '6':
            user_ratings = {101: 5.0, 103: 4.5, 150: 3.0}
            recommendations = recommend_movies(movies_df, user_ratings)
            print(recommendations.to_string(index=False))
        elif choice == '7':
            criterion = input("Top 10 by Genre (1), Release Year (2), or Rating (3): ")
            if criterion == '1':
                genre = input("Enter the Genre you want to filter by: ")
                top_10 = top_10_movies(movies_df, by='Genre', specific_value=genre)
            elif criterion == '2':
                try:
                    release_year = int(input("Enter the Release Year you want to filter by: "))
                    top_10 = top_10_movies(movies_df, by='Release Year', specific_value=release_year)
                except ValueError:
                    print("Invalid year input. Please enter a valid integer for the year.")
                    continue
            elif criterion == '3':
                top_10 = top_10_movies(movies_df, by='Rating')
            else:
                print("Invalid choice.")
                continue

            print(top_10.to_string(index=False))
        elif choice == '8':
            plot_rating_distribution(movies_df)
        elif choice == '9':
            plot_movie_counts_per_genre(movies_df)
        elif choice == '10':
            all_reviews = movies_df['Reviews'].explode().dropna().tolist()

            if not all_reviews:
                print("No reviews available to analyze.")
            else:
                sentiments = analyze_sentiment(all_reviews)
                for review, sentiment in zip(all_reviews, sentiments):
                    print(f"Review: '{review}' -> Sentiment: {sentiment}")
        elif choice == '11':
            criteria = input("Suggest by Genre (1) or Director (2): ")
            if criteria == '1':
                genre = input("Enter the genre: ")
                suggestions = suggest_movies_by_genre_or_director(movies_df, genre=genre)
            elif criteria == '2':
                director = input("Enter the director's name: ")
                suggestions = suggest_movies_by_genre_or_director(movies_df, director=director)
            print(suggestions.to_string(index=False))
        elif choice == '12':
            print("Exiting the system. Goodbye!")
            break
        else:
            print("Invalid choice. Please try again.")

main_menu(movies_df)