In [21]:
#Setting up any and all dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import json
import scipy.stats as stats

import warnings
warnings.filterwarnings('ignore')

#Read Hulu file
hulu_df = pd.read_csv("HuluRaw.csv")
hulu_df.head()

#Read Netflix file 
netflix_df = pd.read_csv("netflix_titles.csv")
netflix_df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,81145628,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China","September 9, 2019",2019,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...
1,80117401,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,United Kingdom,"September 9, 2016",2016,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...
2,70234439,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",United States,"September 8, 2018",2013,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob..."
3,80058654,TV Show,Transformers: Robots in Disguise,,"Will Friedle, Darren Criss, Constance Zimmer, ...",United States,"September 8, 2018",2016,TV-Y7,1 Season,Kids' TV,When a prison ship crash unleashes hundreds of...
4,80125979,Movie,#realityhigh,Fernando Lebrija,"Nesta Cooper, Kate Walsh, John Michael Higgins...",United States,"September 8, 2017",2017,TV-14,99 min,Comedies,When nerdy high schooler Dani finally attracts...


In [15]:
#Check out how many TV shows are in this file
num_of_types = netflix_df["type"].value_counts()
num_of_types

Movie      4265
TV Show    1969
Name: type, dtype: int64

In [16]:
#Grab only TV shows
netflix_tv_df = netflix_df.loc[netflix_df["type"] == "TV Show"]
netflix_tv_df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
2,70234439,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",United States,"September 8, 2018",2013,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob..."
3,80058654,TV Show,Transformers: Robots in Disguise,,"Will Friedle, Darren Criss, Constance Zimmer, ...",United States,"September 8, 2018",2016,TV-Y7,1 Season,Kids' TV,When a prison ship crash unleashes hundreds of...
5,80163890,TV Show,Apaches,,"Alberto Ammann, Eloy Azorín, Verónica Echegui,...",Spain,"September 8, 2017",2016,TV-MA,1 Season,"Crime TV Shows, International TV Shows, Spanis...",A young journalist is forced into a life of cr...
8,80117902,TV Show,Fire Chasers,,,United States,"September 8, 2017",2017,TV-MA,1 Season,"Docuseries, Science & Nature TV","As California's 2016 fire season rages, brave ..."
26,80244601,TV Show,Castle of Stars,,"Chaiyapol Pupart, Jintanutda Lummakanon, Worra...",,"September 7, 2018",2015,TV-14,1 Season,"International TV Shows, Romantic TV Shows, TV ...",As four couples with different lifestyles go t...


In [17]:
#Only grab the columns we want
netflix_tv_df2 = netflix_tv_df[["show_id","title","listed_in","rating","description"]]
netflix_tv_df2.head()

Unnamed: 0,show_id,title,listed_in,rating,description
2,70234439,Transformers Prime,Kids' TV,TV-Y7-FV,"With the help of three human allies, the Autob..."
3,80058654,Transformers: Robots in Disguise,Kids' TV,TV-Y7,When a prison ship crash unleashes hundreds of...
5,80163890,Apaches,"Crime TV Shows, International TV Shows, Spanis...",TV-MA,A young journalist is forced into a life of cr...
8,80117902,Fire Chasers,"Docuseries, Science & Nature TV",TV-MA,"As California's 2016 fire season rages, brave ..."
26,80244601,Castle of Stars,"International TV Shows, Romantic TV Shows, TV ...",TV-14,As four couples with different lifestyles go t...


In [18]:
#Rename columns to match the Hulu file:
netflix_tv_df2 = netflix_tv_df2.rename(columns={"show_id":"Show ID",
                                      "title":"Show Name",
                                      "listed_in":"Genre",
                                      "rating":"TV-Rating",
                                      "description":"Description"})

netflix_tv_df2 = netflix_tv_df2.sort_values(by=["Show Name"], ascending=True)
netflix_tv_df2.head()

Unnamed: 0,Show ID,Show Name,Genre,TV-Rating,Description
1342,80234414,100% Hotter,"British TV Shows, International TV Shows, Real...",TV-14,"A stylist, a hair designer and a makeup artist..."
1674,80098046,12 Years Promise,"International TV Shows, Korean TV Shows, Roman...",TV-14,A pregnant teen is forced by her family to lea...
6151,80117470,13 Reasons Why,"Crime TV Shows, TV Dramas, TV Mysteries",TV-MA,"After a teenage girl's perplexing suicide, a c..."
6152,80996791,13 Reasons Why: Beyond the Reasons,"Crime TV Shows, Docuseries",TV-MA,"Cast members, writers, producers and mental he..."
1058,80205593,1983,"Crime TV Shows, International TV Shows, TV Dramas",TV-MA,"In this dark alt-history thriller, a naïve law..."


In [33]:
#Add Column to signify available on Netflix
netflix_tv_df2["Available on Netflix"] = "Yes" 
netflix_tv_df2.head()

Rosario Tijeras    2
Top Boy            2
Life               2
Tunnel             2
Lovesick           2
Name: Show Name, dtype: int64

In [48]:
# Remove the duplicate rows 
netflix_tv_df2.drop_duplicates(subset = "Show ID", keep = "last", inplace = True)
netflix_tv_df2.head()

Unnamed: 0,Show ID,Show Name,Genre,TV-Rating,Description,Available on Netflix
1342,80234414,100% Hotter,"British TV Shows, International TV Shows, Real...",TV-14,"A stylist, a hair designer and a makeup artist...",Yes
1674,80098046,12 Years Promise,"International TV Shows, Korean TV Shows, Roman...",TV-14,A pregnant teen is forced by her family to lea...,Yes
6151,80117470,13 Reasons Why,"Crime TV Shows, TV Dramas, TV Mysteries",TV-MA,"After a teenage girl's perplexing suicide, a c...",Yes
6152,80996791,13 Reasons Why: Beyond the Reasons,"Crime TV Shows, Docuseries",TV-MA,"Cast members, writers, producers and mental he...",Yes
1058,80205593,1983,"Crime TV Shows, International TV Shows, TV Dramas",TV-MA,"In this dark alt-history thriller, a naïve law...",Yes


In [22]:
# Extract relevant columns (show ID, show name, genre, TV-rating, Description)
hulu_df_2 = ["show/id", "show/name", "show/genre", "show/show_rollups/subscriber/highest_rating", "show/description"]
hulu_df_2 = hulu_df[hulu_df_2].copy()
hulu_df_2.head()

Unnamed: 0,show/id,show/name,show/genre,show/show_rollups/subscriber/highest_rating,show/description
0,54,Family Guy,Animation and Cartoons,TV-MA,The adventures of an endearingly ignorant dad ...
1,6979,South Park,Comedy,TV-MA,"Underpants-stealing gnomes, a talking Christma..."
2,53,American Dad!,Comedy,TV-14,This screwball family full of radically differ...
3,389,Law & Order: Special Victims Unit,Drama,TV-14,This hard-hitting and emotional series from NB...
4,1603,Naruto Shippuden,Anime,TV-14,The Village Hidden in the Leaves is home to th...


In [24]:
# Rename columns 
hulu_df_2 = hulu_df_2.rename(columns={"show/id":"Show ID", 
                                      "show/name":"Show Name",
                                      "show/genre":"Genre",
                                      "show/show_rollups/subscriber/highest_rating":"TV-Rating",
                                      "show/description":"Description"})

#Add Column to signify available on Hulu
hulu_df_2["Available on Hulu"] = "Yes"
hulu_df_2.head()

Unnamed: 0,Show ID,Show Name,Genre,TV-Rating,Description,Available on Hulu
0,54,Family Guy,Animation and Cartoons,TV-MA,The adventures of an endearingly ignorant dad ...,Yes
1,6979,South Park,Comedy,TV-MA,"Underpants-stealing gnomes, a talking Christma...",Yes
2,53,American Dad!,Comedy,TV-14,This screwball family full of radically differ...,Yes
3,389,Law & Order: Special Victims Unit,Drama,TV-14,This hard-hitting and emotional series from NB...,Yes
4,1603,Naruto Shippuden,Anime,TV-14,The Village Hidden in the Leaves is home to th...,Yes


In [26]:
# Sort shows alphabetically 
hulu_df_2 = hulu_df_2.sort_values(by=["Show Name"], ascending=True)
hulu_df_2.head()

Unnamed: 0,Show ID,Show Name,Genre,TV-Rating,Description,Available on Hulu
700,22416,Adventure Time,Kids,TV-PG,It's one crazy adventure after another for hum...,Yes
106,22416,Adventure Time,Kids,TV-PG,It's one crazy adventure after another for hum...,Yes
502,22416,Adventure Time,Kids,TV-PG,It's one crazy adventure after another for hum...,Yes
601,22416,Adventure Time,Kids,TV-PG,It's one crazy adventure after another for hum...,Yes
205,22416,Adventure Time,Kids,TV-PG,It's one crazy adventure after another for hum...,Yes


In [28]:
# Assess duplicates 
num_of_shows = hulu_df_2["Show Name"].value_counts()
num_of_shows

The Cleveland Show    10
Sleepy Hollow         10
The Voice             10
Once Upon a Time      10
Project Runway        10
Name: Show Name, dtype: int64

In [29]:
# Remove the duplicate rows 
hulu_df_2.drop_duplicates(subset = "Show ID", keep = "last", inplace = True)
hulu_df_2.head()

Unnamed: 0,Show ID,Show Name,Genre,TV-Rating,Description,Available on Hulu
403,22416,Adventure Time,Kids,TV-PG,It's one crazy adventure after another for hum...,Yes
51,969,America's Got Talent,Reality and Game Shows,TV-PG,America votes in the ultimate talent show to d...,Yes
42,8322,America's Next Top Model,Reality and Game Shows,TV-14,"Young women of various backgrounds, shapes and...",Yes
200,53,American Dad!,Comedy,TV-14,This screwball family full of radically differ...,Yes
931,3433,Archer,Animation and Cartoons,TV-MA,Archer chronicles an international spy agency ...,Yes


In [49]:
# Confirm duplicates were removed 
num_of_shows2 = hulu_df_2["Show Name"].value_counts()
num_of_shows2.head()

The Cleveland Show    1
Grey's Anatomy        1
The Goldbergs         1
Brooklyn Nine-Nine    1
The Mindy Project     1
Name: Show Name, dtype: int64

In [58]:
#109 tv shows for hulu, 1969 for Netflix
#Merge two dataframes together
df = pd.concat([netflix_tv_df2, hulu_df_2],ignore_index=True)

df = df[['Show ID','Show Name','Genre','TV-Rating', 'Description', 'Available on Hulu', 'Available on Netflix']]
df.head()

Unnamed: 0,Show ID,Show Name,Genre,TV-Rating,Description,Available on Hulu,Available on Netflix
0,80234414,100% Hotter,"British TV Shows, International TV Shows, Real...",TV-14,"A stylist, a hair designer and a makeup artist...",,Yes
1,80098046,12 Years Promise,"International TV Shows, Korean TV Shows, Roman...",TV-14,A pregnant teen is forced by her family to lea...,,Yes
2,80117470,13 Reasons Why,"Crime TV Shows, TV Dramas, TV Mysteries",TV-MA,"After a teenage girl's perplexing suicide, a c...",,Yes
3,80996791,13 Reasons Why: Beyond the Reasons,"Crime TV Shows, Docuseries",TV-MA,"Cast members, writers, producers and mental he...",,Yes
4,80205593,1983,"Crime TV Shows, International TV Shows, TV Dramas",TV-MA,"In this dark alt-history thriller, a naïve law...",,Yes


In [61]:
#Make all NaN's to No
df.fillna("No", inplace=True)
df.head()

Unnamed: 0,Show ID,Show Name,Genre,TV-Rating,Description,Available on Hulu,Available on Netflix
0,80234414,100% Hotter,"British TV Shows, International TV Shows, Real...",TV-14,"A stylist, a hair designer and a makeup artist...",No,Yes
1,80098046,12 Years Promise,"International TV Shows, Korean TV Shows, Roman...",TV-14,A pregnant teen is forced by her family to lea...,No,Yes
2,80117470,13 Reasons Why,"Crime TV Shows, TV Dramas, TV Mysteries",TV-MA,"After a teenage girl's perplexing suicide, a c...",No,Yes
3,80996791,13 Reasons Why: Beyond the Reasons,"Crime TV Shows, Docuseries",TV-MA,"Cast members, writers, producers and mental he...",No,Yes
4,80205593,1983,"Crime TV Shows, International TV Shows, TV Dramas",TV-MA,"In this dark alt-history thriller, a naïve law...",No,Yes


In [62]:
#Convert & Save as csv file
df.to_csv("netflix_hulu_tvshows.csv")