In [33]:
import pandas as pd 
import numpy as np 
from ast import literal_eval
from tabulate import tabulate  # Import tabulate for clean table formatting
import itertools


In [79]:
#q_movies = pd.read_parquet("../tmbd_exports/quality_movs_weighted_rating.parquet")
q_movies = pd.read_parquet("../tmdb_api/tmdb_api_cleaned/movies_cleaned_hard.parquet")

In [88]:
# Function to get all unique genres in the dataset -> this allows to provide a list of available genres before user input

def get_all_genres(movies_df):
    unique_genres = set(genre.strip() for genres in movies_df['genres'].dropna() for genre in genres.split(','))
    return sorted(unique_genres)

# Function to filter movies by genre (case-insensitive) -> without this if user wrote 'action' instead of 'Action', no movies would come up 
def filter_movies_by_genre(movies_df, selected_genre):
    cleaned_genre = selected_genre.lower().strip()
    genre_filter = movies_df["genres"].apply(lambda x: cleaned_genre in [g.lower().strip() for g in x.split(",")])
    return movies_df.loc[genre_filter][0:10] 

# Display available genres
available_genres = get_all_genres(q_movies)
print("\nAvailable Genres:\n" + ", ".join(available_genres) + "\n")

#  Get user input
user_genre = input("Enter a genre from the list above: ").strip()

# Step 2: Filter dataset based on input genre
filtered_movies = filter_movies_by_genre(q_movies, user_genre)

#  Apply weighted rating function to the filtered dataset
if not filtered_movies.empty:
    print("\nTop 10 Movies in the Selected Genre (Ranked by Weighted Rating):\n")
    print(tabulate(filtered_movies[['title', 'score', 'vote_average', 'vote_count']], 
                   headers="keys", tablefmt="pretty", showindex=False))  # Pretty table format

else:
    print("\nNo movies found in this genre.")



Available Genres:
Action, Adventure, Animation, Comedy, Crime, Documentary, Drama, Family, Fantasy, History, Horror, Music, Mystery, Romance, Science Fiction, TV Movie, Thriller, War, Western



Enter a genre from the list above:  Crime



Top 10 Movies in the Selected Genre (Ranked by Weighted Rating):

+--------------------------+-------------------+--------------+------------+
|          title           |       score       | vote_average | vote_count |
+--------------------------+-------------------+--------------+------------+
| The Shawshank Redemption | 8.672939802561705 |     8.7      |   27833    |
|      The Godfather       | 8.653599980834132 |    8.689     |   21124    |
|  The Godfather Part II   | 8.543745071370289 |     8.6      |   12750    |
|     The Dark Knight      | 8.479128257350427 |     8.5      |   33504    |
|       Pulp Fiction       | 8.475412727614888 |     8.5      |   28399    |
|      The Green Mile      | 8.461152109631522 |     8.5      |   17872    |
|        GoodFellas        | 8.447522386857107 |     8.5      |   13158    |
|          Se7en           | 8.368834474038843 |     8.4      |   21454    |
|       City of God        | 8.339340602894536 |    8.427     |    7534    |
| The Sil