In [49]:
import pandas as pd
import tqdm
import datetime

In [50]:
# import and format the data
df = pd.read_csv('/Users/andy/Desktop/Netflix 2020 w IMDB Ratings.csv')
df['CleanDate'] = pd.to_datetime(df['CleanDate'])
df['CleanDate'] = df['CleanDate'].dt.date
df['Originals'] = df['Originals'].astype('category')
df['DevDate'] = df['DevDate'].astype('category')
df['Type'] = df['Type'].astype('category')
df['Y'] = df['Y'].astype('category')
df['Country'] = df['Country'].astype('category')

In [51]:
# import a second dataframe with the other dataset that contains genres
genres = pd.read_csv('/Users/andy/Desktop/US Netflix Data - One Row Per Genre.csv')
genres['DATE'] = pd.to_datetime(genres['DATE'])
genres['DATE'] = genres['DATE'].dt.date
genres['GENRES'] = genres['GENRES'].astype('category')
genres['TYPE'] = genres['TYPE'].astype('category')

# rename the imdbtag column so its the same as the other dataframe
genres = genres.rename(columns={'IMDBTag': 'imdbtag'})

In [52]:
# add the genre column from the other dataframe to the main dataframe so that every movie/tv show has a genre (joining on imdbtag)
df = pd.merge(df,genres[['imdbtag','GENRES']],on='imdbtag')

In [53]:
# make the genre the same format as the other columns and put the column in the right place
df = df.rename(columns={'GENRES':'Genre'})
cols = ['Unnamed: 0', 'CleanDate', 'Y', 'Title', 'DevDate', 'Genre','imdbtag', 'Type',
       'Country', 'Originals', 'Rating', 'N_Ratings']
df = df.reindex(columns=cols)

In [55]:
# make sure the columns are formatted correctly
df.head(10)

Unnamed: 0.1,Unnamed: 0,CleanDate,Y,Title,DevDate,Genre,imdbtag,Type,Country,Originals,Rating,N_Ratings
0,2,2020-01-24,2020.0,A Sun,2019,Dramas,tt10883506,movies,at,1,8.0,326.0
1,2,2020-01-24,2020.0,A Sun,2019,Independent Movies,tt10883506,movies,at,1,8.0,326.0
2,2,2020-01-24,2020.0,A Sun,2019,International Movies,tt10883506,movies,at,1,8.0,326.0
3,4145,NaT,,A Sun,2019,Dramas,tt10883506,movies,br,1,8.0,326.0
4,4145,NaT,,A Sun,2019,Independent Movies,tt10883506,movies,br,1,8.0,326.0
5,4145,NaT,,A Sun,2019,International Movies,tt10883506,movies,br,1,8.0,326.0
6,12553,2020-01-24,2020.0,A Sun,2019,Dramas,tt10883506,movies,dk,1,8.0,326.0
7,12553,2020-01-24,2020.0,A Sun,2019,Independent Movies,tt10883506,movies,dk,1,8.0,326.0
8,12553,2020-01-24,2020.0,A Sun,2019,International Movies,tt10883506,movies,dk,1,8.0,326.0
9,19748,2020-01-24,2020.0,A Sun,2019,Dramas,tt10883506,movies,fi,1,8.0,326.0


In [56]:
# get average ratings for all content, all original content, and all licensed content
avgREverything = df.Rating.mean()
avgRAllOriginals = df[df['Originals'] == 1].Rating.mean()
avgRAllLicensed = df[df['Originals'] == 0].Rating.mean()

In [57]:
# get average ratings for original vs. licensed movies and TV
avgROriginalMovies = df[(df['Originals'] == 1) & (df['Type'] == 'movies')].Rating.mean()
avgROriginalTV = df[(df['Originals'] == 1) & (df['Type'] == 'tvshows')].Rating.mean()
avgRLicensedMovies = df[(df['Originals'] == 0) & (df['Type'] == 'movies')].Rating.mean()
avgRLicensedTV = df[(df['Originals'] == 0) & (df['Type'] == 'tvshows')].Rating.mean()

In [58]:
# HYPOTHESIS 1: Netflix originals have higher IMDB ratings than other non-original shows on Netflix

In [59]:
# average rating of the entire dataset
avgREverything

6.630430920431134

In [60]:
# average rating for all Netflix original content
avgRAllOriginals

6.918253630673347

In [61]:
# average rating for all licensed content
avgRAllLicensed

6.500491043261357

In [62]:
# average rating for Netflix original movies
avgROriginalMovies

6.824673899792817

In [63]:
# average rating for Netflix original TV shows
avgROriginalTV

7.011255149018706

In [64]:
# average rating for licensed movies
avgRLicensedMovies

6.430578093306254

In [65]:
# average rating for licensed TV shows
avgRLicensedTV

6.582884121197618

In [66]:
# CONCLUSION #1: NETFLIX ORIGINALS HAVE HIGHER RATINGS THAN LICENSED CONTENT

In [67]:
# HYPOTHESIS 2: Netflix original content only has higher ratings because it tends to be within specific genres that are more popular

In [68]:
# see the average rating of each genre for licensed vs original content
df.groupby(['Genre','Originals']).agg({'Rating':['mean']})

Unnamed: 0_level_0,Unnamed: 1_level_0,Rating
Unnamed: 0_level_1,Unnamed: 1_level_1,mean
Genre,Originals,Unnamed: 2_level_2
Action & Adventure,0,5.870451
Action & Adventure,1,6.543363
Anime Features,0,6.075294
Anime Series,0,7.487047
Anime Series,1,7.414146
British TV Shows,0,7.297482
British TV Shows,1,7.527891
Children & Family Movies,0,5.759858
Children & Family Movies,1,5.481518
Classic & Cult TV,0,7.364706


In [69]:
# if we wanted to be more thorough and scientific, we could run a regression analysis to see the effect that a genre has on a rating
# anecdotally though, it looks like Netflix original content is better than licensed content no matter the genre (with a few exceptions)

In [70]:
# CONCLUSION: NETFLIX CONTENT IS BETTER ACROSS MOST GENRES

In [None]:
# HYPOTHESIS 3: Netflix originals are more popular than non-original counterparts because they are made in line with changing genre preferences
# note: hypothesis 2 has already been disproven, so we will just look at how genre preferences have changed over time

In [71]:
df.groupby(['Genre','DevDate']).agg({'Rating':['mean']})

Unnamed: 0_level_0,Unnamed: 1_level_0,Rating
Unnamed: 0_level_1,Unnamed: 1_level_1,mean
Genre,DevDate,Unnamed: 2_level_2
Action & Adventure,1975,8.200000
Action & Adventure,1978,6.300000
Action & Adventure,1979,6.600000
Action & Adventure,1983,6.050000
Action & Adventure,1985,5.500000
Action & Adventure,1990,7.600000
Action & Adventure,1995,6.860000
Action & Adventure,1996,6.500000
Action & Adventure,1997,6.569231
Action & Adventure,1998,6.200000


In [160]:
# put it in a format that's more intuitive to read
byYear = df.groupby(['Genre','DevDate'],as_index=False).agg({'Rating':['mean']})
byYear.columns = byYear.columns.droplevel(1)
byYear.pivot(index='Genre',columns='DevDate')

Unnamed: 0_level_0,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating,Rating
DevDate,1975,1978,1979,1983,1985,1990,1995,1996,1997,1998,...,1967,1972,1993,1962,1974,1981,1988,1989,1987,1991
Genre,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Action & Adventure,8.2,6.3,6.6,6.05,5.5,7.6,6.86,6.5,6.569231,6.2,...,,,,,,,,,,
Anime Features,,,,,,,,,,,...,,,,,,,,,,
Anime Series,,,,,,,,,7.4,,...,,,,,,,,,,
British TV Shows,,,,,,,,,,,...,,,,,,,,,,
Children & Family Movies,,5.1,,,,,,6.4,,,...,,,,,,,,,,
Classic & Cult TV,,,,,,8.0,,,,,...,,,,,,,,,,
Classic Movies,8.2,,6.6,,,,,,,,...,7.1,8.6,8.0,,,,,,,
Comedies,8.2,,,,5.925,7.2,6.9,6.4,6.0,6.2,...,,,8.0,7.1,7.4,5.3,7.6,,,
Crime TV Shows,,,,,,,,,,,...,,,,,,,,,,
Cult Movies,,,6.6,,,,,,,7.1,...,,,8.0,,,,7.6,,,


In [None]:
# CONCLUSION: Genre preferences change quite a bit over time. anecdotally, no trends can be identified

In [None]:
# EXPERIMENTATION BELOW, NOT PART OF ORIGINAL HYPOTHESES

In [28]:
# what is the best content on Netflix worldwide?
df.sort_values('Rating',ascending=False).drop_duplicates(['Title']).head(20)

Unnamed: 0.1,Unnamed: 0,CleanDate,Y,Title,DevDate,imdbtag,Type,Country,Originals,Rating,N_Ratings
135895,135895,2013-02-08,2013.0,Breaking Bad,2013,tt0903747,tvshows,us,0,9.5,1303196.0
133226,133226,2018-11-06,2018.0,Planet Earth II,2016,tt5491994,tvshows,uk,0,9.5,83871.0
133353,133353,2018-03-30,2018.0,Planet Earth: The Complete Collection,2006,tt0795176,tvshows,uk,0,9.4,156531.0
132547,132547,2019-10-06,2019.0,Blue Planet II,2017,tt6769208,tvshows,uk,0,9.4,26376.0
54268,54268,2019-05-04,2019.0,Gezegenimiz,2019,tt9253866,movies,tr,1,9.3,21242.0
12624,12624,2020-01-01,2020.0,En verden udenfor,1994,tt0111161,movies,dk,0,9.3,2183286.0
82047,82047,2018-09-15,2018.0,Les Evadés,1994,tt0111161,tvshows,fr,0,9.3,2183286.0
12703,12703,2019-12-22,2019.0,Rick and Morty,2019,tt2861424,movies,dk,0,9.3,317362.0
17422,17422,2019-05-04,2019.0,Nuestro planeta,2019,tt9253866,movies,es,1,9.3,21242.0
82767,82767,2017-04-11,2017.0,Rick et Morty,2017,tt2861424,tvshows,fr,0,9.3,317362.0


In [29]:
# what is the best Netflix original content worldwide?
df[df['Originals'] == 1].sort_values('Rating',ascending=False).drop_duplicates(['Title']).head(20)

Unnamed: 0.1,Unnamed: 0,CleanDate,Y,Title,DevDate,imdbtag,Type,Country,Originals,Rating,N_Ratings
81248,81248,2019-05-04,2019.0,Notre planète,2019,tt9253866,tvshows,fr,1,9.3,21242.0
107148,107148,2019-05-04,2019.0,Our Planet,2019,tt9253866,tvshows,se,1,9.3,21242.0
100352,100352,2019-05-04,2019.0,Nasza planeta,2019,tt9253866,tvshows,pl,1,9.3,21242.0
88964,88964,2019-05-04,2019.0,Nuestro planeta,2019,tt9253866,tvshows,mx,1,9.3,21242.0
66481,66481,2019-05-04,2019.0,Unser Planet,2019,tt9253866,tvshows,de,1,9.3,21242.0
54268,54268,2019-05-04,2019.0,Gezegenimiz,2019,tt9253866,movies,tr,1,9.3,21242.0
85180,85180,2019-05-04,2019.0,Il nostro pianeta,2019,tt9253866,tvshows,it,1,9.3,21242.0
5702,5702,2019-05-04,2019.0,Nosso Planeta,2019,tt9253866,movies,br,1,9.3,21242.0
77579,77579,2019-05-04,2019.0,Our Planet: Uskomaton planeettamme,2019,tt9253866,tvshows,fi,1,9.3,21242.0
40304,40304,2019-05-04,2019.0,Our Planet – Vårt utrolige hjem,2019,tt9253866,movies,no,1,9.3,21242.0


In [30]:
# what is the best licensed content worldwide?
df[df['Originals'] == 0].sort_values('Rating',ascending=False).drop_duplicates(['Title']).head(20)

Unnamed: 0.1,Unnamed: 0,CleanDate,Y,Title,DevDate,imdbtag,Type,Country,Originals,Rating,N_Ratings
135895,135895,2013-02-08,2013.0,Breaking Bad,2013,tt0903747,tvshows,us,0,9.5,1303196.0
133226,133226,2018-11-06,2018.0,Planet Earth II,2016,tt5491994,tvshows,uk,0,9.5,83871.0
133353,133353,2018-03-30,2018.0,Planet Earth: The Complete Collection,2006,tt0795176,tvshows,uk,0,9.4,156531.0
132547,132547,2019-10-06,2019.0,Blue Planet II,2017,tt6769208,tvshows,uk,0,9.4,26376.0
67925,67925,2018-01-29,2018.0,Rick and Morty,2017,tt2861424,tvshows,de,0,9.3,317362.0
18480,18480,2018-03-19,2018.0,Rick y Morty,2017,tt2861424,movies,es,0,9.3,317362.0
26294,26294,2017-04-11,2017.0,Rick et Morty,2017,tt2861424,movies,fr,0,9.3,317362.0
35461,35461,NaT,,The Shawshank Redemption,1994,tt0111161,movies,nl,0,9.3,2183286.0
35509,35509,2019-12-22,2019.0,Rick en Morty,2019,tt2861424,movies,nl,0,9.3,317362.0
95557,95557,2020-01-01,2020.0,Frihetens regn,1994,tt0111161,tvshows,no,0,9.3,2183286.0


In [34]:
# US ONLY for the next few queries
# what is the best content on Netflix in the US?
df[df['Country'] == 'us'].sort_values('Rating',ascending=False).drop_duplicates(['Title']).head(10)

Unnamed: 0.1,Unnamed: 0,CleanDate,Y,Title,DevDate,imdbtag,Type,Country,Originals,Rating,N_Ratings
135895,135895,2013-02-08,2013.0,Breaking Bad,2013,tt0903747,tvshows,us,0,9.5,1303196.0
134649,134649,2019-05-04,2019.0,Our Planet,2019,tt9253866,tvshows,us,1,9.3,21242.0
134269,134269,2019-01-10,2019.0,College Romance,2018,tt8809646,tvshows,us,1,9.2,19037.0
135055,135055,2018-01-09,2018.0,Age of Rebellion,2018,tt8931836,tvshows,us,0,9.2,30.0
135066,135066,2018-08-31,2018.0,Yeh Meri Family,2018,tt8595766,tvshows,us,0,9.2,16516.0
135311,135311,2018-01-01,2018.0,Fullmetal Alchemist: Brotherhood,2010,tt1355642,tvshows,us,0,9.1,99203.0
135592,135592,2017-04-15,2017.0,Leyla and Mecnun,2014,tt1831164,tvshows,us,0,9.1,84913.0
135143,135143,2018-06-20,2018.0,The Vietnam War: A Film by Ken Burns and Lynn ...,2017,tt1877514,tvshows,us,0,9.1,15456.0
126990,126990,2018-01-03,2018.0,Hans Zimmer: Live in Prague,2017,tt5732482,movies,us,0,9.1,2060.0
126742,126742,2018-01-06,2018.0,Natsamrat - Asa Nat Hone Nahi,2016,tt5311546,movies,us,0,9.1,4503.0


In [39]:
# what is the best Netflix original content on Netflix in the US?
df[(df['Country'] == 'us') & (df['Originals'] == 1)].sort_values('Rating',ascending=False).drop_duplicates(['Title']).head(10)
# NOTE: there are some errors in the data, e.g. Lord of the Rings is definitely not a Netflix original

Unnamed: 0.1,Unnamed: 0,CleanDate,Y,Title,DevDate,imdbtag,Type,Country,Originals,Rating,N_Ratings
134649,134649,2019-05-04,2019.0,Our Planet,2019,tt9253866,tvshows,us,1,9.3,21242.0
134269,134269,2019-01-10,2019.0,College Romance,2018,tt8809646,tvshows,us,1,9.2,19037.0
124447,124447,2020-01-21,2020.0,KD (A) Karuppudurai,2019,tt8747560,movies,us,1,9.0,171.0
134661,134661,2019-01-04,2019.0,Peasants Rebellion,2018,tt9011036,tvshows,us,1,9.0,57.0
134547,134547,2019-05-31,2019.0,When They See Us,2019,tt7137906,tvshows,us,1,9.0,59839.0
134404,134404,2019-01-08,2019.0,Regiment Diaries,2018,tt9642576,tvshows,us,1,8.9,27.0
124576,124576,2020-01-01,2020.0,The Lord of the Rings: The Return of the King,2003,tt0167260,movies,us,1,8.9,1553201.0
134161,134161,2019-08-11,2019.0,Greatest Events of WWII in Colour,2019,tt9103932,tvshows,us,1,8.8,1979.0
134217,134217,2019-10-25,2019.0,The Untamed,2019,tt10554898,tvshows,us,1,8.8,620.0
134467,134467,2019-04-07,2019.0,Stranger Things,2019,tt4574334,tvshows,us,1,8.8,701022.0


In [38]:
# what is the best licensed content on Netflix in the US?
df[(df['Country'] == 'us') & (df['Originals'] == 0)].sort_values('Rating',ascending=False).drop_duplicates(['Title']).head(10)

Unnamed: 0.1,Unnamed: 0,CleanDate,Y,Title,DevDate,imdbtag,Type,Country,Originals,Rating,N_Ratings
135895,135895,2013-02-08,2013.0,Breaking Bad,2013,tt0903747,tvshows,us,0,9.5,1303196.0
135066,135066,2018-08-31,2018.0,Yeh Meri Family,2018,tt8595766,tvshows,us,0,9.2,16516.0
135055,135055,2018-01-09,2018.0,Age of Rebellion,2018,tt8931836,tvshows,us,0,9.2,30.0
126742,126742,2018-01-06,2018.0,Natsamrat - Asa Nat Hone Nahi,2016,tt5311546,movies,us,0,9.1,4503.0
126990,126990,2018-01-03,2018.0,Hans Zimmer: Live in Prague,2017,tt5732482,movies,us,0,9.1,2060.0
135592,135592,2017-04-15,2017.0,Leyla and Mecnun,2014,tt1831164,tvshows,us,0,9.1,84913.0
135570,135570,2017-05-15,2017.0,Sherlock,2017,tt1475582,tvshows,us,0,9.1,742814.0
135311,135311,2018-01-01,2018.0,Fullmetal Alchemist: Brotherhood,2010,tt1355642,tvshows,us,0,9.1,99203.0
135143,135143,2018-06-20,2018.0,The Vietnam War: A Film by Ken Burns and Lynn ...,2017,tt1877514,tvshows,us,0,9.1,15456.0
135006,135006,2018-01-10,2018.0,Still Game,2016,tt0281491,tvshows,us,0,9.0,4766.0


In [41]:
# what are the best Netflix original movies on Netflix in the US?
df[(df['Country'] == 'us') & (df['Originals'] == 1) & (df['Type'] == 'movies')].sort_values('Rating',ascending=False).drop_duplicates(['Title']).head(10)

Unnamed: 0.1,Unnamed: 0,CleanDate,Y,Title,DevDate,imdbtag,Type,Country,Originals,Rating,N_Ratings
124447,124447,2020-01-21,2020.0,KD (A) Karuppudurai,2019,tt8747560,movies,us,1,9.0,171.0
124576,124576,2020-01-01,2020.0,The Lord of the Rings: The Return of the King,2003,tt0167260,movies,us,1,8.9,1553201.0
124527,124527,2020-01-01,2020.0,Inception,2010,tt1375666,movies,us,1,8.8,1915095.0
124877,124877,2019-04-11,2019.0,Oththa Seruppu Size 7,2019,tt10370116,movies,us,1,8.7,1350.0
124922,124922,2019-01-11,2019.0,The Matrix,1999,tt0133093,movies,us,1,8.7,1571978.0
124577,124577,2020-01-01,2020.0,The Lord of the Rings: The Two Towers,2002,tt0167261,movies,us,1,8.7,1402043.0
124624,124624,2019-12-31,2019.0,Gol Maal,1979,tt0079221,movies,us,1,8.6,16309.0
124501,124501,2020-01-01,2020.0,City of God,2002,tt0317248,movies,us,1,8.6,665034.0
124638,124638,2019-12-31,2019.0,Koshish,1972,tt0215911,movies,us,1,8.6,1337.0
124771,124771,2019-01-12,2019.0,The Departed,2006,tt0407887,movies,us,1,8.5,1116573.0


In [42]:
# what are the best licensed movies on Netflix in the US?
df[(df['Country'] == 'us') & (df['Originals'] == 0) & (df['Type'] == 'movies')].sort_values('Rating',ascending=False).drop_duplicates(['Title']).head(10)

Unnamed: 0.1,Unnamed: 0,CleanDate,Y,Title,DevDate,imdbtag,Type,Country,Originals,Rating,N_Ratings
126742,126742,2018-01-06,2018.0,Natsamrat - Asa Nat Hone Nahi,2016,tt5311546,movies,us,0,9.1,4503.0
126990,126990,2018-01-03,2018.0,Hans Zimmer: Live in Prague,2017,tt5732482,movies,us,0,9.1,2060.0
125310,125310,2019-06-26,2019.0,Unbroken,2019,tt9663460,movies,us,0,9.0,445.0
126554,126554,2018-01-08,2018.0,Mahabharat,2013,tt3212600,movies,us,0,8.8,3275.0
125815,125815,2019-02-15,2019.0,Ayana,2017,tt6891660,movies,us,0,8.8,90.0
125969,125969,2019-01-01,2019.0,Merku Thodarchi Malai,2018,tt7794052,movies,us,0,8.8,1358.0
125644,125644,2019-01-04,2019.0,The World's Most Wanted Animal,2018,tt8653894,movies,us,0,8.8,31.0
126649,126649,2018-06-07,2018.0,Luciano Mellera: Infantiloide,2018,tt7607400,movies,us,0,8.7,1471.0
127914,127914,2017-02-28,2017.0,Be Here Now,2015,tt2473476,movies,us,0,8.7,2980.0
127218,127218,2017-01-12,2017.0,Eh Janam Tumhare Lekhe,2015,tt4280824,movies,us,0,8.7,333.0


In [43]:
# what are the best Netflix original TV shows on Netflix in the US?
df[(df['Country'] == 'us') & (df['Originals'] == 1) & (df['Type'] == 'tvshows')].sort_values('Rating',ascending=False).drop_duplicates(['Title']).head(10)

Unnamed: 0.1,Unnamed: 0,CleanDate,Y,Title,DevDate,imdbtag,Type,Country,Originals,Rating,N_Ratings
134649,134649,2019-05-04,2019.0,Our Planet,2019,tt9253866,tvshows,us,1,9.3,21242.0
134269,134269,2019-01-10,2019.0,College Romance,2018,tt8809646,tvshows,us,1,9.2,19037.0
134547,134547,2019-05-31,2019.0,When They See Us,2019,tt7137906,tvshows,us,1,9.0,59839.0
134661,134661,2019-01-04,2019.0,Peasants Rebellion,2018,tt9011036,tvshows,us,1,9.0,57.0
134404,134404,2019-01-08,2019.0,Regiment Diaries,2018,tt9642576,tvshows,us,1,8.9,27.0
134467,134467,2019-04-07,2019.0,Stranger Things,2019,tt4574334,tvshows,us,1,8.8,701022.0
134023,134023,2020-04-01,2020.0,Go! Go! Cory Carson,2020,tt8115702,tvshows,us,1,8.8,31.0
134371,134371,2019-08-15,2019.0,Sacred Games,2019,tt6077448,tvshows,us,1,8.8,63593.0
134314,134314,2019-09-15,2019.0,The Universe,2009,tt1051155,tvshows,us,1,8.8,4756.0
134275,134275,2019-01-10,2019.0,Signal,2016,tt5332206,tvshows,us,1,8.8,2190.0


In [44]:
# what are the best licensed TV shows on Netflix in the US?
df[(df['Country'] == 'us') & (df['Originals'] == 0) & (df['Type'] == 'tvshows')].sort_values('Rating',ascending=False).drop_duplicates(['Title']).head(10)

Unnamed: 0.1,Unnamed: 0,CleanDate,Y,Title,DevDate,imdbtag,Type,Country,Originals,Rating,N_Ratings
135895,135895,2013-02-08,2013.0,Breaking Bad,2013,tt0903747,tvshows,us,0,9.5,1303196.0
135066,135066,2018-08-31,2018.0,Yeh Meri Family,2018,tt8595766,tvshows,us,0,9.2,16516.0
135055,135055,2018-01-09,2018.0,Age of Rebellion,2018,tt8931836,tvshows,us,0,9.2,30.0
135592,135592,2017-04-15,2017.0,Leyla and Mecnun,2014,tt1831164,tvshows,us,0,9.1,84913.0
135143,135143,2018-06-20,2018.0,The Vietnam War: A Film by Ken Burns and Lynn ...,2017,tt1877514,tvshows,us,0,9.1,15456.0
135311,135311,2018-01-01,2018.0,Fullmetal Alchemist: Brotherhood,2010,tt1355642,tvshows,us,0,9.1,99203.0
135570,135570,2017-05-15,2017.0,Sherlock,2017,tt1475582,tvshows,us,0,9.1,742814.0
135690,135690,2016-12-15,2016.0,Humsafar,2011,tt2403201,tvshows,us,0,9.0,3066.0
135698,135698,2016-12-15,2016.0,Zindagi Gulzar Hai,2012,tt2828240,tvshows,us,0,9.0,2534.0
135653,135653,2017-02-22,2017.0,The War: A Film by Ken Burns and Lynn Novick,2007,tt0996994,tvshows,us,0,9.0,4404.0


In [80]:
# what are the years that produced the best content worldwide?
df.groupby(['DevDate'])['Rating'].mean().nlargest(10)

DevDate
1941    8.300000
1958    8.300000
1974    8.290476
1972    8.270968
1939    8.050000
1968    8.036364
1955    8.000000
1936    7.900000
1952    7.900000
1979    7.809868
Name: Rating, dtype: float64

In [None]:
# NOTE: some years only have a handful of movies/TVshows, so we should only take years with >20 movies/TVshows. e.g. 1936 has 2 movies

In [73]:
# what are the years (with 20 or more entries) that produced the best content worldwide?
df.groupby('DevDate').filter(lambda x: len(x) >= 20).groupby(['DevDate'])['Rating'].mean().nlargest(10)

DevDate
1974    8.290476
1972    8.270968
1968    8.036364
1979    7.809868
1960    7.737500
1975    7.665476
1969    7.334783
1990    7.318824
1971    7.304762
1977    7.287629
Name: Rating, dtype: float64

In [74]:
# which countries have the best content?
df.groupby(['Country'])['Rating'].mean().nlargest(10)

Country
tr    6.680347
pt    6.675753
es    6.644062
at    6.631998
de    6.631145
pl    6.630285
fi    6.625428
dk    6.624187
it    6.623834
se    6.622116
Name: Rating, dtype: float64