## Standard Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [2]:
import warnings
warnings.filterwarnings('ignore')

## Data Loading and Cleaning

In [3]:
movies = pd.read_csv("./movies_metadata.csv")

In [4]:
movies = movies['title genres runtime vote_average vote_count overview'.split()]
movies.head()

Unnamed: 0,title,genres,runtime,vote_average,vote_count,overview
0,Toy Story,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",81.0,7.7,5415.0,"Led by Woody, Andy's toys live happily in his ..."
1,Jumanji,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",104.0,6.9,2413.0,When siblings Judy and Peter discover an encha...
2,Grumpier Old Men,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",101.0,6.5,92.0,A family wedding reignites the ancient feud be...
3,Waiting to Exhale,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",127.0,6.1,34.0,"Cheated on, mistreated and stepped on, the wom..."
4,Father of the Bride Part II,"[{'id': 35, 'name': 'Comedy'}]",106.0,5.7,173.0,Just when George Banks has recovered from his ...


In [5]:
def clean_genres(x):
    gs = []
    x = x.strip("][").split(",")
    for l in range(1,len(x),2):
        y = x[l]
        gen = y.split(':')[1].split('}')[0].replace("'","").strip(" ")
        if gen.isalpha():
            gs.append(gen)
    return gs

In [6]:
movies['genres'] = movies['genres'].apply(clean_genres)

In [7]:
movies.head()

Unnamed: 0,title,genres,runtime,vote_average,vote_count,overview
0,Toy Story,"[Animation, Comedy, Family]",81.0,7.7,5415.0,"Led by Woody, Andy's toys live happily in his ..."
1,Jumanji,"[Adventure, Fantasy, Family]",104.0,6.9,2413.0,When siblings Judy and Peter discover an encha...
2,Grumpier Old Men,"[Romance, Comedy]",101.0,6.5,92.0,A family wedding reignites the ancient feud be...
3,Waiting to Exhale,"[Comedy, Drama, Romance]",127.0,6.1,34.0,"Cheated on, mistreated and stepped on, the wom..."
4,Father of the Bride Part II,[Comedy],106.0,5.7,173.0,Just when George Banks has recovered from his ...


We are not using any user information or any metrics in this notebook we are just displaying the top 20 percentile movies based on our custom ***Score***

In [9]:
m = movies['vote_count'].quantile(0.8)
movies = movies[(movies['runtime'] >= 45) & (movies['runtime'] <= 300)]

Separating movies having runtime between 45 and 300 minutes and having votes above 80 percentile

In [10]:
movies = movies[movies['vote_count'] >= m]

In addtion to above, we are using our own ***Score*** which takes into account the number of persons voted and what is their score. 

<img src="Score_Equation.png">

In our case x = 80

In [11]:
C = movies['vote_average'].mean()
def score(x):
    v = x['vote_count']
    R = x['vote_average']
    return v/(v+m) * R + (m/(m+v)*C)

In [12]:
movies['score'] = movies.apply(score,axis = 1)

In [13]:
movies.head()

Unnamed: 0,title,genres,runtime,vote_average,vote_count,overview,score
0,Toy Story,"[Animation, Comedy, Family]",81.0,7.7,5415.0,"Led by Woody, Andy's toys live happily in his ...",7.687662
1,Jumanji,"[Adventure, Fantasy, Family]",104.0,6.9,2413.0,When siblings Judy and Peter discover an encha...,6.888865
2,Grumpier Old Men,"[Romance, Comedy]",101.0,6.5,92.0,A family wedding reignites the ancient feud be...,6.447711
4,Father of the Bride Part II,[Comedy],106.0,5.7,173.0,Just when George Banks has recovered from his ...,5.846076
5,Heat,"[Action, Crime, Drama, Thriller]",170.0,7.7,1886.0,"Obsessive master thief, Neil McCauley leads a ...",7.665173


In [15]:
movies.sort_values(by = 'score',axis = 0,ascending=False).head(50)

Unnamed: 0,title,genres,runtime,vote_average,vote_count,overview,score
10309,Dilwale Dulhania Le Jayenge,"[Comedy, Drama, Romance]",190.0,9.1,661.0,"Raj is a rich, carefree, happy-go-lucky second...",8.906716
314,The Shawshank Redemption,"[Drama, Crime]",142.0,8.5,8358.0,Framed in the 1940s for the double murder of h...,8.487223
834,The Godfather,"[Drama, Crime]",175.0,8.5,6024.0,"Spanning the years 1945 to 1955, a chronicle o...",8.482314
40251,Your Name.,"[Romance, Animation, Drama]",106.0,8.5,1030.0,High schoolers Mitsuha and Taki are complete s...,8.400532
12481,The Dark Knight,"[Drama, Action, Crime, Thriller]",152.0,8.3,12269.0,Batman raises the stakes in his war on crime. ...,8.292091
2843,Fight Club,[Drama],139.0,8.3,9678.0,A ticking-time-bomb insomniac and a slippery s...,8.289985
292,Pulp Fiction,"[Thriller, Crime]",154.0,8.3,8670.0,"A burger-loving hit man, his philosophical par...",8.288827
522,Schindler's List,"[Drama, History, War]",195.0,8.3,4436.0,The true story of how businessman Oskar Schind...,8.278282
23673,Whiplash,[Drama],105.0,8.3,4376.0,"Under the direction of a ruthless instructor, ...",8.277988
5481,Spirited Away,"[Fantasy, Adventure, Animation, Family]",125.0,8.3,3968.0,A ten year old girl who wanders away from her ...,8.275753
