In [1]:
import os
import pandas as pd
df_movies = pd.read_csv('../src/data/raw/movies.csv')
df_ratings = pd.read_csv("../src/data/raw/ratings.csv")

In [5]:
def get_genre_recommendations(genre1, genre2, genre3, num_recommendations=10, excluded_genres=None):
    # Calculate average and Bayesian average ratings
    avg_ratings = df_ratings.groupby('movieId')['rating'].agg(['mean', 'count']).reset_index()
    avg_ratings.columns = ['movieId', 'avg_rating', 'num_ratings']

    global_mean = avg_ratings['avg_rating'].mean()
    C = global_mean / 2
    avg_ratings['bayesian_avg_rating'] = (C * global_mean + avg_ratings['avg_rating'] * avg_ratings['num_ratings']) / (C + avg_ratings['num_ratings'])

    # Merge average ratings with movies
    movies_with_avg_rating = df_movies.merge(avg_ratings[['movieId', 'bayesian_avg_rating']], on='movieId', how='left')

    # Convert genres to list
    movies_with_avg_rating['genres_list'] = movies_with_avg_rating['genres'].apply(lambda x: [genre.lower() for genre in x.split('|')])
    movies_with_avg_rating = movies_with_avg_rating.drop("genres", axis=1)

    # Convert search and excluded genres to lowercase
    genre1 = genre1.lower()
    genre2 = genre2.lower()
    genre3 = genre3.lower()
    excluded_genres = [genre.lower() for genre in excluded_genres] if excluded_genres else []

    # Define condition for matching all genres
    condition_all_genres = movies_with_avg_rating['genres_list'].apply(lambda x: all(genre in x for genre in [genre1, genre2, genre3]))
    
    # Define condition for excluding genres
    if excluded_genres:
        condition_excluded_genres = movies_with_avg_rating['genres_list'].apply(lambda x: not any(excluded_genre in x for excluded_genre in excluded_genres))
    else:
        condition_excluded_genres = True  # No genres are excluded

    # Filter movies matching all genres and excluding the excluded genres
    movies_all_genres = movies_with_avg_rating.loc[condition_all_genres & condition_excluded_genres].sort_values("bayesian_avg_rating", ascending=False)

    # Define condition for matching any of the genres
    condition_any_genre = movies_with_avg_rating['genres_list'].apply(lambda x: any(genre in x for genre in [genre1, genre2, genre3]))
    
    # Filter movies matching any genre but not all and excluding the excluded genres
    movies_any_genre = movies_with_avg_rating.loc[condition_any_genre & ~condition_all_genres & condition_excluded_genres].sort_values("bayesian_avg_rating", ascending=False)

    # Determine the number of recommendations from each set
    num_all_genres = int(num_recommendations * 0.7) 
    num_any_genres = num_recommendations - num_all_genres  

    if len(movies_all_genres) < num_all_genres:
        all_genres_recommendations = movies_all_genres
        num_any_genres = num_recommendations - len(all_genres_recommendations)
    else:
        all_genres_recommendations = movies_all_genres.head(num_all_genres)

    any_genres_recommendations = movies_any_genre.head(num_any_genres)

    # Combine and deduplicate recommendations
    recommendations = pd.concat([all_genres_recommendations, any_genres_recommendations])
    recommendations = recommendations.head(num_recommendations)
    recommendations = recommendations.drop_duplicates(subset=['movieId'])

    return recommendations


In [6]:
genre1="Animation"
genre2="drama"
genre3="adventure"

In [7]:
recommendations = get_genre_recommendations(genre1, genre2, genre3, num_recommendations=10)
recommendations

Unnamed: 0,movieId,title,bayesian_avg_rating,genres_list
2914,3000,Princess Mononoke (Mononoke-hime) (1997),4.096141,"[action, adventure, animation, drama, fantasy]"
6987,7099,NausicaÃ¤ of the Valley of the Wind (Kaze no t...,4.091631,"[adventure, animation, drama, fantasy, sci-fi]"
13767,68954,Up (2009),4.038113,"[adventure, animation, children, drama]"
8970,26662,Kiki's Delivery Service (Majo no takkyÃ»bin) (...,3.949362,"[adventure, animation, children, drama, fantasy]"
2675,2761,"Iron Giant, The (1999)",3.84791,"[adventure, animation, children, drama, sci-fi]"
7924,8607,Tokyo Godfathers (2003),3.845226,"[adventure, animation, drama]"
18513,92210,"Disappearance of Haruhi Suzumiya, The (Suzumiy...",3.783659,"[adventure, animation, drama, mystery, sci-fi]"
315,318,"Shawshank Redemption, The (1994)",4.446958,"[crime, drama]"
843,858,"Godfather, The (1972)",4.364686,"[crime, drama]"
23860,113315,Zero Motivation (Efes beyahasei enosh) (2014),4.32961,"[comedy, drama]"


In [105]:
def compare_movie_genres(film_title, recommendations):
    # Filtrer pour obtenir les genres du film cible
    movie_entry = recommendations[recommendations['title'] == film_title]

    if len(movie_entry) == 0:
        raise ValueError(f"Film '{film_title}' not found in the recommendation list.")

    # Genres du film cible
    target_genres = set(movie_entry['genres_list'].iloc[0])  

    # Genres de tous les autres films
    other_genres = set()
    for index, row in recommendations.iterrows():
        if row['title'] != film_title:
            other_genres.update(row['genres_list'])

    # Genres dans le film cible mais pas dans les autres films
    different_genres = target_genres.difference(other_genres)

    return list(different_genres)


In [88]:
film_title = "Shawshank Redemption, The (1994)"	

In [91]:
different_genres = compare_movie_genres(film_title, recommendations)

In [10]:
get_genre_recommendations(genre1, genre2, genre3, num_recommendations=10, excluded_genres=["crime","war","sci-fi"])

Unnamed: 0,movieId,title,bayesian_avg_rating,genres_list
2914,3000,Princess Mononoke (Mononoke-hime) (1997),4.096141,"[action, adventure, animation, drama, fantasy]"
13767,68954,Up (2009),4.038113,"[adventure, animation, children, drama]"
8970,26662,Kiki's Delivery Service (Majo no takkyÃ»bin) (...,3.949362,"[adventure, animation, children, drama, fantasy]"
7924,8607,Tokyo Godfathers (2003),3.845226,"[adventure, animation, drama]"
360,364,"Lion King, The (1994)",3.779064,"[adventure, animation, children, drama, musica..."
12393,57502,Cat Soup (Nekojiru-so) (2001),3.765002,"[adventure, animation, drama, horror]"
11512,49817,"Plague Dogs, The (1982)",3.761221,"[adventure, animation, drama]"
23860,113315,Zero Motivation (Efes beyahasei enosh) (2014),4.32961,"[comedy, drama]"
1935,2019,Seven Samurai (Shichinin no samurai) (1954),4.274026,"[action, adventure, drama]"
895,912,Casablanca (1942),4.258254,"[drama, romance]"
