# Non-Personalized Recommender Systems: Popularity Based

In [1]:
import os

if os.path.exists('movielens_small.zip'):
    !wget https://files.grouplens.org/datasets/movielens/ml-latest-small.zip 
    !unzip ml-latest-small.zip
    !rm ml-latest-small.zip
    !mv ml-latest-small movielens_small

## Damped Mean

$$ s(i) = \frac{\Sigma_{u \in U_i} r_i + a \times \mu}{|U_i| + a} $$

Where:
- $ s(i) $: The damped mean rating for item $ i $.
- $ \Sigma_{u \in U_i} r_i $: Sum of the ratings for item $ i $.
- $ a $: Damping factor, a value that determines the extent of smoothing.
- $ \mu $: Global mean rating across all items.
- $ |U_i| $: Total number of ratings for item $ i $.


In [3]:
import pandas as pd

def load_data():
    # Load the MovieLens data
    movies_df = pd.read_csv("movielens_small/movies.csv")
    ratings_df = pd.read_csv("movielens_small/ratings.csv")
    return movies_df, ratings_df

def calculate_popularity(movies_df, ratings_df, damping_factor=5):
    # Calculate the number of ratings, mean rating, and sum of ratings for each movie
    num_ratings = ratings_df.groupby("movieId")["rating"].count()
    mean_rating = ratings_df.groupby("movieId")["rating"].mean()
    global_mean = ratings_df["rating"].mean()
    
    # Calculate the damped mean rating for each movie
    damped_numerator = num_ratings * mean_rating + damping_factor * global_mean
    damped_denominator = num_ratings + damping_factor
    damped_mean_rating = damped_numerator / damped_denominator
    
    # Add the popularity data to the movie data
    movies_df['num_ratings'] = movies_df['movieId'].map(num_ratings)
    movies_df['mean_rating'] = movies_df['movieId'].map(mean_rating)
    movies_df['damped_mean_rating'] = movies_df['movieId'].map(damped_mean_rating)
    return movies_df

movies_df, ratings_df = load_data()
movies_df = calculate_popularity(movies_df, ratings_df, damping_factor=10)

Let's see how using num_ratings compares to mean rating & damped mean rating.

In [198]:
movies_df.sort_values(by="num_ratings", ascending=False).head()

Unnamed: 0,movieId,title,genres,num_ratings,mean_rating,damped_mean_rating
314,356,Forrest Gump (1994),Comedy|Drama|Romance|War,329.0,4.164134,4.144589
277,318,"Shawshank Redemption, The (1994)",Crime|Drama,317.0,4.429022,4.400659
257,296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,307.0,4.197068,4.175128
510,593,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller,279.0,4.16129,4.138462
1939,2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller,278.0,4.192446,4.168457


In [204]:
movies_df.sort_values(by="mean_rating", ascending=False).head(5)

Unnamed: 0,movieId,title,genres,num_ratings,mean_rating,damped_mean_rating
7656,88448,Paper Birds (Pájaros de papel) (2010),Comedy|Drama,1.0,5.0,3.637779
8107,100556,"Act of Killing, The (2012)",Documentary,1.0,5.0,3.637779
9083,143031,Jump In! (2007),Comedy|Drama|Romance,1.0,5.0,3.637779
9094,143511,Human (2015),Documentary,1.0,5.0,3.637779
9096,143559,L.A. Slasher (2015),Comedy|Crime|Fantasy,1.0,5.0,3.637779


In [201]:
movies_df.sort_values(by="damped_mean_rating", ascending=False).head(10)

Unnamed: 0,movieId,title,genres,num_ratings,mean_rating,damped_mean_rating
277,318,"Shawshank Redemption, The (1994)",Crime|Drama,317.0,4.429022,4.400659
659,858,"Godfather, The (1972)",Crime|Drama,192.0,4.289062,4.250077
2226,2959,Fight Club (1999),Action|Crime|Drama|Thriller,218.0,4.272936,4.239103
922,1221,"Godfather: Part II, The (1974)",Crime|Drama,129.0,4.25969,4.205148
46,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller,204.0,4.237745,4.203344
224,260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,251.0,4.231076,4.203125
602,750,Dr. Strangelove or: How I Learned to Stop Worr...,Comedy|War,97.0,4.268041,4.196407
914,1213,Goodfellas (1990),Crime|Drama,126.0,4.25,4.194967
461,527,Schindler's List (1993),Drama|War,220.0,4.225,4.193546
6710,58559,"Dark Knight, The (2008)",Action|Crime|Drama|IMAX,149.0,4.238255,4.191922
