In [1]:
# Dependencies
import pandas as pd
import numpy as np


In [2]:
# Loading in Movies CSV
movie_list = pd.read_csv("movies.csv")
movies_db = pd.DataFrame(movie_list)
movies_db


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
10324,146684,Cosmic Scrat-tastrophe (2015),Animation|Children|Comedy
10325,146878,Le Grand Restaurant (1966),Comedy
10326,148238,A Very Murray Christmas (2015),Comedy
10327,148626,The Big Short (2015),Drama


In [3]:
# Loading in Ratings CSV
ratings_list = pd.read_csv("ratings.csv")
ratings_db = pd.DataFrame(ratings_list)
ratings_db

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523
...,...,...,...,...
105334,668,142488,4.0,1451535844
105335,668,142507,3.5,1451535889
105336,668,143385,4.0,1446388585
105337,668,144976,2.5,1448656898


In [4]:
# Merging the Data sets
movie_ratings = movies_db.merge(ratings_db, on="movieId")
movie_ratings

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,2,5.0,859046895
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,1303501039
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,8,5.0,858610933
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,11,4.0,850815810
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,14,4.0,851766286
...,...,...,...,...,...,...
105334,148238,A Very Murray Christmas (2015),Comedy,475,3.0,1451213043
105335,148626,The Big Short (2015),Drama,458,4.0,1452014749
105336,148626,The Big Short (2015),Drama,576,4.5,1451687664
105337,148626,The Big Short (2015),Drama,668,4.5,1451148148


In [5]:
avg_rating = movie_ratings['rating'].mean()
rating_count = movie_ratings['rating'].count()

print(f'Average Rating: {avg_rating}')
print(f'Total Ratings: {rating_count}')


Average Rating: 3.5168503593161127
Total Ratings: 105339


In [6]:
# Creating a column for the number of ratings that movie has
movie_ratings['num_ratings'] = movie_ratings.groupby('title')['title'].transform('count')
movie_ratings

Unnamed: 0,movieId,title,genres,userId,rating,timestamp,num_ratings
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,2,5.0,859046895,232
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,1303501039,232
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,8,5.0,858610933,232
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,11,4.0,850815810,232
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,14,4.0,851766286,232
...,...,...,...,...,...,...,...
105334,148238,A Very Murray Christmas (2015),Comedy,475,3.0,1451213043,1
105335,148626,The Big Short (2015),Drama,458,4.0,1452014749,3
105336,148626,The Big Short (2015),Drama,576,4.5,1451687664,3
105337,148626,The Big Short (2015),Drama,668,4.5,1451148148,3


In [7]:
movie_ratings['avg_rating_col'] = movie_ratings.groupby('title')['rating'].transform('mean')
movie_ratings

Unnamed: 0,movieId,title,genres,userId,rating,timestamp,num_ratings,avg_rating_col
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,2,5.0,859046895,232,3.907328
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,1303501039,232,3.907328
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,8,5.0,858610933,232,3.907328
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,11,4.0,850815810,232,3.907328
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,14,4.0,851766286,232,3.907328
...,...,...,...,...,...,...,...,...
105334,148238,A Very Murray Christmas (2015),Comedy,475,3.0,1451213043,1,3.000000
105335,148626,The Big Short (2015),Drama,458,4.0,1452014749,3,4.333333
105336,148626,The Big Short (2015),Drama,576,4.5,1451687664,3,4.333333
105337,148626,The Big Short (2015),Drama,668,4.5,1451148148,3,4.333333


In [8]:
# Created a variable for the minimum amount of ratings needed for it to be included
m= movie_ratings['num_ratings'].quantile(0.9)
m

157.0

In [9]:
filtered_movies = movie_ratings.copy().loc[movie_ratings['num_ratings'] >= m]
filtered_movies.shape

(10571, 8)

In [10]:
def weighted_rating(x, m=m, C=avg_rating):
    v = x['num_ratings']
    R = x['avg_rating_col']
    # Calculation based on the IMDB formula
    return (v/(v+m) * R) + (m/(m+v) * C)

In [11]:
# Define a new feature 'score' and calculate its value with `weighted_rating()`
filtered_movies['score'] = filtered_movies.apply(weighted_rating, axis=1)

In [12]:
filtered_movies = filtered_movies.sort_values('score', ascending=False)

#Print the top 15 movies
filtered_movies[['title','genres', 'num_ratings', 'avg_rating_col', 'score']].drop_duplicates(subset='title', keep='first', inplace=False).head(15)

Unnamed: 0,title,genres,num_ratings,avg_rating_col,score
9797,"Shawshank Redemption, The (1994)",Crime|Drama,308,4.454545,4.137947
22840,"Godfather, The (1972)",Crime|Drama,210,4.392857,4.018108
2731,"Usual Suspects, The (1995)",Crime|Mystery|Thriller,228,4.328947,3.997781
16088,Schindler's List (1993),Drama|War,248,4.296371,3.994186
53473,"Matrix, The (1999)",Action|Sci-Fi|Thriller,261,4.264368,3.983602
18579,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller,290,4.194828,3.956701
8858,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,325,4.16,3.950509
7654,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,273,4.188645,3.943362
19195,Fargo (1996),Comedy|Crime|Drama|Thriller,201,4.271144,3.940351
28913,Star Wars: Episode V - The Empire Strikes Back...,Action|Adventure|Sci-Fi,228,4.22807,3.93804


In [16]:
filtered_movies['genres'] = filtered_movies['genres'].str.split('|')


In [17]:
filtered_movies['genres']

9797    NaN
9771    NaN
9764    NaN
9765    NaN
9766    NaN
         ..
10565   NaN
10564   NaN
10563   NaN
10562   NaN
10580   NaN
Name: genres, Length: 10571, dtype: float64

In [15]:
for genre in filtered_movies['genres']:
    print(genre)

['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'Drama']
['Crime', 'D

['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action', 'Adventure|Sci-Fi']
['Action

['Adventure', 'Fantasy']
['Adventure', 'Fantasy']
['Adventure', 'Fantasy']
['Adventure', 'Fantasy']
['Adventure', 'Fantasy']
['Adventure', 'Fantasy']
['Adventure', 'Fantasy']
['Adventure', 'Fantasy']
['Adventure', 'Fantasy']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adventure|Horror|Sci-Fi']
['Action', 'Adv

['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action', 'Sci-Fi|Thriller']
['Action',

['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thriller']
['Action', 'Adventure|Comedy|Romance|Thr