# Sorting IMDB Top 250 Movies

# Business Problem¶
In this section, we are going to sort IMDB Top 250 movies while benefiting from
two different datasets and appyling the following processes:
* Checking the datasets and getting general information
* MinMaxScaling the related features
* IMDB's ex-method for average rating
* Bayesian average rating
* Combining the two datasets to create a new weighted average rating

# Required Libraries

In [1]:
import math
import warnings
import numpy as np
import pandas as pd
import scipy.stats as st
from sklearn.preprocessing import MinMaxScaler

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.width", 500)
pd.set_option("display.float_format", lambda x: "%.2f" % x)

warnings.filterwarnings('ignore')

# Importing the First Dataset

In [2]:
movies_metadata = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Datasets/movies_metadata.csv")
df = movies_metadata.copy()
df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.95,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.02,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.71,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.86,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,[{'name': 'Twentieth Century Fox Film Corporat...,"[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.39,/e64sOI48hQXyru7naBFyssKFxVd.jpg,"[{'name': 'Sandollar Productions', 'id': 5842}...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


## Updating the DataFrame

We update our dataframe as we don't need most of the columns. The columns that are necessary:

* title
* vote_average
* vote_count

In [3]:
# New df
df = df[["title", "vote_average", "vote_count"]]
df.head()

Unnamed: 0,title,vote_average,vote_count
0,Toy Story,7.7,5415.0
1,Jumanji,6.9,2413.0
2,Grumpier Old Men,6.5,92.0
3,Waiting to Exhale,6.1,34.0
4,Father of the Bride Part II,5.7,173.0


# General Information About the First Dataset

In [4]:
def check_df(dataframe, head=5, tail=5):
  print(20*"*", "HEAD", 20*"*")
  print(dataframe.head(head))
  print(20*"*", "TAIL", 20*"*")
  print(dataframe.tail(tail))
  print(20*"*", "SHAPE", 20*"*")
  print(dataframe.shape)
  print(20*"*", "SIZE", 20*"*")
  print(dataframe.size)
  print(20*"*", "TYPE", 20*"*")
  print(dataframe.dtypes)
  print(20*"*", "NA", 20*"*")
  print(dataframe.isnull().sum())
  print(20*"*", "DESCRIPTION", 20*"*")
  print(dataframe.describe([0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 0.95, 0.99]).T)

In [5]:
check_df(df)

******************** HEAD ********************
                         title  vote_average  vote_count
0                    Toy Story          7.70     5415.00
1                      Jumanji          6.90     2413.00
2             Grumpier Old Men          6.50       92.00
3            Waiting to Exhale          6.10       34.00
4  Father of the Bride Part II          5.70      173.00
******************** TAIL ********************
                     title  vote_average  vote_count
45461               Subdue          4.00        1.00
45462  Century of Birthing          9.00        3.00
45463             Betrayal          3.80        6.00
45464     Satan Triumphant          0.00        0.00
45465             Queerama          0.00        0.00
******************** SHAPE ********************
(45466, 3)
******************** SIZE ********************
136398
******************** TYPE ********************
title            object
vote_average    float64
vote_count      float64
dtype: object


# Sorting the Movies by *vote_average*

The sorting is obviously faulty without taking **vote_count** into consideration.

In [6]:
df.sort_values("vote_average", ascending=False).head()

Unnamed: 0,title,vote_average,vote_count
21642,Ice Age Columbus: Who Were the First Americans?,10.0,1.0
15710,If God Is Willing and da Creek Don't Rise,10.0,1.0
22396,Meat the Truth,10.0,1.0
22395,Marvin Hamlisch: What He Did For Love,10.0,1.0
35343,Elaine Stritch: At Liberty,10.0,1.0


As we can observe, most of the votes start at the 95% percent of the vote_count feature:

|              |   count |      mean |       std |   min |   10% |   20% |   30% |   40% |   50% |   60% |   70% |   80% |   90% |   *95%* |     99% |   max |
|:-------------|--------:|----------:|----------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|------:|--------:|------:|
| vote_average |   45460 |   5.61821 |   1.92422 |     0 |   3.5 |   4.8 |   5.3 |   5.7 |     6 |   6.3 |   6.6 |     7 |   7.4 |   7.8 |    8.7  |    10 |
| vote_count   |   45460 | 109.897   | 491.31    |     0 |   1   |   2   |   4   |   6   |    10 |  15   |  25   |    50 | 160   | =>**434**   | 2183.82 | 14075 |

In [7]:
df[df["vote_count"] > 434].sort_values("vote_average", ascending=False).head()

Unnamed: 0,title,vote_average,vote_count
10309,Dilwale Dulhania Le Jayenge,9.1,661.0
40251,Your Name.,8.5,1030.0
314,The Shawshank Redemption,8.5,8358.0
834,The Godfather,8.5,6024.0
1176,Psycho,8.3,2405.0


The sorting starts to make sense when the *vote_count* is considered. Popular and top movies start to be seen in the dataframe:

|       | title                       |   vote_average |   vote_count |
|------:|:----------------------------|---------------:|-------------:|
| 10309 | Dilwale Dulhania Le Jayenge |            9.1 |          661 |
| 40251 | Your Name.                  |            8.5 |         1030 |
|   314 | ***The Shawshank Redemption***    |            8.5 |         8358 |
|   834 | ***The Godfather***               |            8.5 |         6024 |
|  1176 | ***Psycho***                      |            8.3 |         2405 |

# Min-Max Scaling the Related Features

To comprehend and analyze the data better, we are going to apply **MinMaxScaler** on **vote_count** and **vote_average**.

In [8]:
# Feature range is preferred the same as the IMDB rating range
df["vote_count_score"] = MinMaxScaler(feature_range=(1,10)).fit(df[["vote_count"]]).transform(df[["vote_count"]])

In [9]:
df.sort_values("vote_count_score", ascending=False).head(10)
# Results are getting better

Unnamed: 0,title,vote_average,vote_count,vote_count_score
15480,Inception,8.1,14075.0,10.0
12481,The Dark Knight,8.3,12269.0,8.85
14551,Avatar,7.2,12114.0,8.75
17818,The Avengers,7.4,12000.0,8.67
26564,Deadpool,7.4,11444.0,8.32
22879,Interstellar,8.1,11187.0,8.15
20051,Django Unchained,7.8,10297.0,7.58
23753,Guardians of the Galaxy,7.9,10014.0,7.4
2843,Fight Club,8.3,9678.0,7.19
18244,The Hunger Games,6.9,9634.0,7.16


Now, we take **vote_average** into account.

In [10]:
df["average_count_score"] = df["vote_average"] * df["vote_count_score"]

In [11]:
df.sort_values("average_count_score", ascending=False).head(10)

Unnamed: 0,title,vote_average,vote_count,vote_count_score,average_count_score
15480,Inception,8.1,14075.0,10.0,81.0
12481,The Dark Knight,8.3,12269.0,8.85,73.42
22879,Interstellar,8.1,11187.0,8.15,66.04
17818,The Avengers,7.4,12000.0,8.67,64.18
14551,Avatar,7.2,12114.0,8.75,62.97
26564,Deadpool,7.4,11444.0,8.32,61.55
2843,Fight Club,8.3,9678.0,7.19,59.66
20051,Django Unchained,7.8,10297.0,7.58,59.16
23753,Guardians of the Galaxy,7.9,10014.0,7.4,58.49
292,Pulp Fiction,8.3,8670.0,6.54,54.31


# IMDB Weighted Rating

This is the obsolete formula that IMDB have been using until 2015.

IMDB Ex Formula:

* v => vote_count
* M => required minimum vote_count
* r => vote_average
* C => constant value determined by IMDB

weighted_rating = (v/(v+M)* r) + (M/(v+M)* C)

Taking C into consideration as **7**

C = 7.0

example 1:

* r = 8
* M = 500
* v = 1000

weighted_rating = (1000/(1000+500)* 8) + (500/(1000+500)* 7) = 7.66

example 2:

* r = 8
* M = 500
* v = 3000

weighted_rating = (3000/(3000+500)* 8) + (500/(3000+500)* 7) = 7.86

In [12]:
# IMDB formula as a function
M = 2500
C = df["vote_average"].mean()
def imdb_weighted_rating(r, v, M, C):
  return (v/(v+M)*r) + (M/(v+M)*C)

In [13]:
df["imdb_weighted_rating"] = imdb_weighted_rating(r=df["vote_average"], v=df["vote_count"], M=M, C=C)

In [14]:
df.sort_values("imdb_weighted_rating", ascending=False).head(10)

Unnamed: 0,title,vote_average,vote_count,vote_count_score,average_count_score,imdb_weighted_rating
12481,The Dark Knight,8.3,12269.0,8.85,73.42,7.85
314,The Shawshank Redemption,8.5,8358.0,6.34,53.93,7.84
2843,Fight Club,8.3,9678.0,7.19,59.66,7.75
15480,Inception,8.1,14075.0,10.0,81.0,7.73
292,Pulp Fiction,8.3,8670.0,6.54,54.31,7.7
834,The Godfather,8.5,6024.0,4.85,41.24,7.65
22879,Interstellar,8.1,11187.0,8.15,66.04,7.65
351,Forrest Gump,8.2,8147.0,6.21,50.92,7.59
7000,The Lord of the Rings: The Return of the King,8.1,8226.0,6.26,50.71,7.52
4863,The Lord of the Rings: The Fellowship of the Ring,8.0,8892.0,6.69,53.49,7.48


# Bayesian Average Rating (BAR)

This time, we import **another dataset** to benefit from BAR to sort the dataset.

Bayesian average rating formula:

![Alt text](https://wikimedia.org/api/rest_v1/media/math/render/svg/061e8b71312618ff414a60ab575232c9b6a878b4)

In [15]:
# Bayesian average rating as a function
def bayesian_average_rating(n, confidence=0.95):
    if sum(n) == 0:
        return 0
    K = len(n)
    z = st.norm.ppf(1 - (1 - confidence) / 2)
    N = sum(n)
    first_part = 0.0
    second_part = 0.0
    for k, n_k in enumerate(n):
        first_part += (k + 1) * (n[k] + 1) / (N + K)
        second_part += (k + 1) * (k + 1) * (n[k] + 1) / (N + K)
    score = first_part - z * math.sqrt((second_part - first_part * first_part) / (N + K + 1))
    return score

# Importing the Second Dataset

In [16]:
imdb_top = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Datasets/imdb_ratings.csv")
df_2 = imdb_top.copy()
df_2.head()

Unnamed: 0.1,Unnamed: 0,id,movieName,rating,ten,nine,eight,seven,six,five,four,three,two,one
0,0,111161,1. The Shawshank Redemption (1994),9.2,1295382,600284,273091,87368,26184,13515,6561,4704,4355,34733
1,1,68646,2. The Godfather (1972),9.1,837932,402527,199440,78541,30016,16603,8419,6268,5879,37128
2,2,71562,3. The Godfather: Part II (1974),9.0,486356,324905,175507,70847,26349,12657,6210,4347,3892,20469
3,3,468569,4. The Dark Knight (2008),9.0,1034863,649123,354610,137748,49483,23237,11429,8082,7173,30345
4,4,50083,5. 12 Angry Men (1957),8.9,246765,225437,133998,48341,15773,6278,2866,1723,1478,8318


In [17]:
#  We crop the first column because we don't need it in our DataFrame
df_2 = df_2.iloc[:, 1:]
df_2.head()

Unnamed: 0,id,movieName,rating,ten,nine,eight,seven,six,five,four,three,two,one
0,111161,1. The Shawshank Redemption (1994),9.2,1295382,600284,273091,87368,26184,13515,6561,4704,4355,34733
1,68646,2. The Godfather (1972),9.1,837932,402527,199440,78541,30016,16603,8419,6268,5879,37128
2,71562,3. The Godfather: Part II (1974),9.0,486356,324905,175507,70847,26349,12657,6210,4347,3892,20469
3,468569,4. The Dark Knight (2008),9.0,1034863,649123,354610,137748,49483,23237,11429,8082,7173,30345
4,50083,5. 12 Angry Men (1957),8.9,246765,225437,133998,48341,15773,6278,2866,1723,1478,8318


# General Information About the Second Dataset

In [18]:
check_df(df_2)

******************** HEAD ********************
       id                                 movieName  rating      ten    nine   eight   seven    six   five   four  three   two    one
0  111161  1.       The Shawshank Redemption (1994)    9.20  1295382  600284  273091   87368  26184  13515   6561   4704  4355  34733
1   68646             2.       The Godfather (1972)    9.10   837932  402527  199440   78541  30016  16603   8419   6268  5879  37128
2   71562    3.       The Godfather: Part II (1974)    9.00   486356  324905  175507   70847  26349  12657   6210   4347  3892  20469
3  468569           4.       The Dark Knight (2008)    9.00  1034863  649123  354610  137748  49483  23237  11429   8082  7173  30345
4   50083              5.       12 Angry Men (1957)    8.90   246765  225437  133998   48341  15773   6278   2866   1723  1478   8318
******************** TAIL ********************
          id                                 movieName  rating    ten   nine   eight  seven    six  fi

In [19]:
df_2["bar_score"] = df_2.apply(lambda x: bayesian_average_rating(x[["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"]]), axis=1)

In [20]:
df_2.sort_values("bar_score", ascending=False).head(10)
# Much more similar to the IMDB's current rating system.

Unnamed: 0,id,movieName,rating,ten,nine,eight,seven,six,five,four,three,two,one,bar_score
0,111161,1. The Shawshank Redemption (1994),9.2,1295382,600284,273091,87368,26184,13515,6561,4704,4355,34733,9.15
1,68646,2. The Godfather (1972),9.1,837932,402527,199440,78541,30016,16603,8419,6268,5879,37128,8.94
3,468569,4. The Dark Knight (2008),9.0,1034863,649123,354610,137748,49483,23237,11429,8082,7173,30345,8.9
2,71562,3. The Godfather: Part II (1974),9.0,486356,324905,175507,70847,26349,12657,6210,4347,3892,20469,8.81
4,50083,5. 12 Angry Men (1957),8.9,246765,225437,133998,48341,15773,6278,2866,1723,1478,8318,8.77
6,167260,7. The Lord of the Rings: The Return of ...,8.9,703093,433087,270113,117411,44760,21818,10873,7987,6554,28990,8.75
5,108052,6. Schindler's List (1993),8.9,453906,383584,220586,82367,27219,12922,6234,4572,4289,19328,8.74
11,109830,12. Forrest Gump (1994),8.8,622104,553654,373644,151284,51140,22720,11692,7647,5941,12110,8.7
12,1375666,13. Inception (2010),8.7,724798,627987,408686,174229,60668,26910,13436,8703,6932,17621,8.69
10,137523,11. Fight Club (1999),8.8,637087,572654,371752,152295,53059,24755,12648,8606,6948,17435,8.67


# Using both Ex-IMDB Formula and Bayesian Average Rating

We have to merge necessary features into one DataFrame to apply both formulas. But they differentiate from each other in some aspects.

Such as, **different titles**, **different score columns**.



Example of the difference:

|       | title (first dataframe)                                            | movieName (second dataframe)                                                    |
|------:|:--------------------------------------------------|:--------------------------------------------------------------|
| 12481 | The Dark Knight                                   | 4.       The Dark Knight (2008)                               |
|   314 | The Shawshank Redemption                          | 1.       The Shawshank Redemption (1994)                      |
|  2843 | Fight Club                                        | 11.       Fight Club (1999)                                   |
| 15480 | Inception                                         | 13.       Inception (2010)                                    |
|   834 | The Godfather                                     | 2.       The Godfather (1972)                                 |
|   351 | Forrest Gump                                      | 12.       Forrest Gump (1994)                                 |
|  7000 | The Lord of the Rings: The Return of the King     | 7.       The Lord of the Rings: The Return of the King (2003) |

In [21]:
# updating the titles of the second dataframe
new_title_list = []
for title in df_2["movieName"]:
  new_title = title[4:-6].strip()
  new_title_list.append(new_title)
df_2["movieName"] = new_title_list
df_2.head()

Unnamed: 0,id,movieName,rating,ten,nine,eight,seven,six,five,four,three,two,one,bar_score
0,111161,The Shawshank Redemption,9.2,1295382,600284,273091,87368,26184,13515,6561,4704,4355,34733,9.15
1,68646,The Godfather,9.1,837932,402527,199440,78541,30016,16603,8419,6268,5879,37128,8.94
2,71562,The Godfather: Part II,9.0,486356,324905,175507,70847,26349,12657,6210,4347,3892,20469,8.81
3,468569,The Dark Knight,9.0,1034863,649123,354610,137748,49483,23237,11429,8082,7173,30345,8.9
4,50083,12 Angry Men,8.9,246765,225437,133998,48341,15773,6278,2866,1723,1478,8318,8.77


Another problem is that the titles of the second dataframe contains their original title; on the other hand, first dataframe has English version of them.

For example:

| title (first dataframe)                                            | movieName (second dataframe)                                                    |
|:--------------------------------------------------|:--------------------------------------------------------------|
| A Beautiful Life                                   | La vita è bella                               |
| City of God                          | Cidade de Deus                      |
| Princess Mononoke                                        | Mononoke-hime                                  |

In [22]:
# updating the first dataframe to have original titles
df["original_title"] = movies_metadata["original_title"]
df.sort_values("imdb_weighted_rating", ascending=False).head(10)

Unnamed: 0,title,vote_average,vote_count,vote_count_score,average_count_score,imdb_weighted_rating,original_title
12481,The Dark Knight,8.3,12269.0,8.85,73.42,7.85,The Dark Knight
314,The Shawshank Redemption,8.5,8358.0,6.34,53.93,7.84,The Shawshank Redemption
2843,Fight Club,8.3,9678.0,7.19,59.66,7.75,Fight Club
15480,Inception,8.1,14075.0,10.0,81.0,7.73,Inception
292,Pulp Fiction,8.3,8670.0,6.54,54.31,7.7,Pulp Fiction
834,The Godfather,8.5,6024.0,4.85,41.24,7.65,The Godfather
22879,Interstellar,8.1,11187.0,8.15,66.04,7.65,Interstellar
351,Forrest Gump,8.2,8147.0,6.21,50.92,7.59,Forrest Gump
7000,The Lord of the Rings: The Return of the King,8.1,8226.0,6.26,50.71,7.52,The Lord of the Rings: The Return of the King
4863,The Lord of the Rings: The Fellowship of the Ring,8.0,8892.0,6.69,53.49,7.48,The Lord of the Rings: The Fellowship of the Ring


In [23]:
# Adding imdb_weighted_rating to the second dataframe
for title in df["original_title"]:
  if title in df_2["movieName"].values:
    df_2.loc[df_2["movieName"] == title, 'imdb_weighted_rating'] = df.loc[df["original_title"] == title, 'imdb_weighted_rating'].values[0]

In [24]:
df_2.head()

Unnamed: 0,id,movieName,rating,ten,nine,eight,seven,six,five,four,three,two,one,bar_score,imdb_weighted_rating
0,111161,The Shawshank Redemption,9.2,1295382,600284,273091,87368,26184,13515,6561,4704,4355,34733,9.15,7.84
1,68646,The Godfather,9.1,837932,402527,199440,78541,30016,16603,8419,6268,5879,37128,8.94,7.65
2,71562,The Godfather: Part II,9.0,486356,324905,175507,70847,26349,12657,6210,4347,3892,20469,8.81,7.17
3,468569,The Dark Knight,9.0,1034863,649123,354610,137748,49483,23237,11429,8082,7173,30345,8.9,7.85
4,50083,12 Angry Men,8.9,246765,225437,133998,48341,15773,6278,2866,1723,1478,8318,8.77,6.81


In [25]:
# Even with the modifications, there are still 58 lost value
df_2.isnull().sum()

Unnamed: 0,0
id,0
movieName,0
rating,0
ten,0
nine,0
eight,0
seven,0
six,0
five,0
four,0


In [26]:
# Adding a new weighted average rating that contains both BAR and imdb formula
df_2["total_weighted_rating"] = df_2['bar_score'] * 70/100 + df_2['imdb_weighted_rating'] * 30/100

In [27]:
df_2.sort_values("total_weighted_rating", ascending=False).head(20)

Unnamed: 0,id,movieName,rating,ten,nine,eight,seven,six,five,four,three,two,one,bar_score,imdb_weighted_rating,total_weighted_rating
0,111161,The Shawshank Redemption,9.2,1295382,600284,273091,87368,26184,13515,6561,4704,4355,34733,9.15,7.84,8.75
3,468569,The Dark Knight,9.0,1034863,649123,354610,137748,49483,23237,11429,8082,7173,30345,8.9,7.85,8.58
1,68646,The Godfather,9.1,837932,402527,199440,78541,30016,16603,8419,6268,5879,37128,8.94,7.65,8.55
12,1375666,Inception,8.7,724798,627987,408686,174229,60668,26910,13436,8703,6932,17621,8.69,7.73,8.4
10,137523,Fight Club,8.8,637087,572654,371752,152295,53059,24755,12648,8606,6948,17435,8.67,7.75,8.4
6,167260,The Lord of the Rings: The Return of the King,8.9,703093,433087,270113,117411,44760,21818,10873,7987,6554,28990,8.75,7.52,8.38
7,110912,Pulp Fiction,8.8,674884,541946,332876,140886,52091,26828,14203,10425,8912,25610,8.67,7.7,8.38
11,109830,Forrest Gump,8.8,622104,553654,373644,151284,51140,22720,11692,7647,5941,12110,8.7,7.59,8.37
28,816692,Interstellar,8.5,541682,412079,292240,149125,57253,24501,12271,7595,5618,12841,8.62,7.65,8.33
5,108052,Schindler's List,8.9,453906,383584,220586,82367,27219,12922,6234,4572,4289,19328,8.74,7.33,8.32
