<h3>Import Libraries

In [3]:
import pandas as pd
import numpy as np
%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns

<h3> Read data

In [74]:
# Read the ratings data for 4 groups respectively
rating_fy = pd.read_csv("processed_data/female_young_ratings.csv")
rating_fa = pd.read_csv("processed_data/female_adult_ratings.csv")
rating_my = pd.read_csv("processed_data/male_young_ratings.csv")
rating_ma = pd.read_csv("processed_data/male_adult_ratings.csv")

In [75]:
# Read the recommended 5 best movie titles for 4 groups respectively
recommendation_fy = pd.read_csv("output/Female_Young_recommendation.csv", index_col= 0)['Recommendation'].tolist()
recommendation_fa = pd.read_csv("output/Female_Adult_recommendation.csv", index_col= 0)['Recommendation'].tolist()
recommendation_my = pd.read_csv("output/Male_Young_recommendation.csv", index_col= 0)['Recommendation'].tolist()
recommendation_ma = pd.read_csv("output/Male_Adult_recommendation.csv", index_col= 0)['Recommendation'].tolist()

<h1> Evaluation

In [76]:
# Define a function to evalute the recommendations by checking against rating
def evaluate(rating, recommendation):
    # Calculate the count of liked ratings for each movie_title
    liked_ratings_count = rating[rating['like'] == True].groupby('movie title')['like'].count().reset_index()

    # Calculate the total ratings count for each movie_title
    total_ratings_count = rating.groupby('movie title')['like'].count().reset_index()

    # Merge the two DataFrames to get the count of liked ratings and total ratings for each movie_title
    merged_df = liked_ratings_count.merge(total_ratings_count, on='movie title', suffixes=('_count', '_total'))

    # Calculate the percentage of likes
    merged_df['like_percentage'] = (merged_df['like_count'] / merged_df['like_total']) * 100

    # Sort the DataFrame by like_percentage in descending order
    sorted_df = merged_df.sort_values(by='like_percentage', ascending=False)

    # Find the 5 movies that has the highest percentage of likes
    best_5 = sorted_df[:5]['movie title'].tolist()
    print(f'5 Most Liked Movies: {best_5}')
    print(f'5 Recommendations: {recommendation}')

    # Define a function to compare the percentage overlap between top 5 recommendations and top 5 liked
    def percentage_overlap(list1, list2):
        return  len(set(list1) & set(list2)) / 5 * 100 # Convert to percentage

    return f"Percentage Overlap = {percentage_overlap(best_5, recommendation)}%"

<h5> Female, Young Group

In [77]:
# Display the evaluation results for group Female, Young
print(f'Female, Young: ')
print(evaluate(rating_fy,recommendation_fy))

Female, Young: 
5 Most Liked Movies: ['Shawshank Redemption, The', 'Casablanca', 'Usual Suspects, The', "Schindler's List", 'Rear Window']
5 Recommendations: ['Casablanca', 'Shawshank Redemption, The', "Schindler's List", 'Usual Suspects, The', "One Flew Over the Cuckoo's Nest"]
Percentage Overlap = 80.0%


<h5> Female, Young Group

In [70]:
# Display the evaluation results for group Female, Adult
print(f'Female, Adult: ')
print(evaluate(rating_fa,recommendation_fa))

Female, Adult: 
5 Most Liked Movies: ["Schindler's List", 'Fugitive, The', 'Boot, Das', 'Sense and Sensibility', 'To Kill a Mockingbird']
5 Recommendations: ["Schindler's List", 'Sense and Sensibility', 'Casablanca', 'Boot, Das', 'To Kill a Mockingbird']
Percentage Overlap = 80.0%


<h5> Male, Young Group

In [78]:
# Display the evaluation results for group Male, Young
print(f'Male, Young: ')
print(evaluate(rating_my,recommendation_my))

Male, Young: 
5 Most Liked Movies: ['Silence of the Lambs, The', 'Godfather, The', 'Shawshank Redemption, The', 'Casablanca', 'Empire Strikes Back, The']
5 Recommendations: ['Godfather, The', 'Silence of the Lambs, The', 'Shawshank Redemption, The', 'Casablanca', 'Star Wars']
Percentage Overlap = 80.0%


<h5> Male, Adult Group

In [79]:
# Display the evaluation results for group Male, Adult
print(f'Male, Adult: ')
print(evaluate(rating_ma,recommendation_ma))

Male, Adult: 
5 Most Liked Movies: ['Rear Window', 'To Kill a Mockingbird', "Schindler's List", "One Flew Over the Cuckoo's Nest", 'Shawshank Redemption, The']
5 Recommendations: ['To Kill a Mockingbird', "One Flew Over the Cuckoo's Nest", "Schindler's List", 'Shawshank Redemption, The', 'Rear Window']
Percentage Overlap = 100.0%
