In [1]:
#Dependencies
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pprint as pprint

from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager

import requests
import pymongo

In [2]:
#Get all tv shows by API calls to TV Maze Website 
base_url = "http://api.tvmaze.com/shows"
response = requests.get(base_url).json()
df = pd.DataFrame(response)


In [3]:
#Get Required columns
df = df[['id', 'url', 'name', 'type', 'language', 'genres', 'status', 'runtime','premiered', 'officialSite', 'schedule', 'network']]
df.head()

Unnamed: 0,id,url,name,type,language,genres,status,runtime,premiered,officialSite,schedule,network
0,1,https://www.tvmaze.com/shows/1/under-the-dome,Under the Dome,Scripted,English,"[Drama, Science-Fiction, Thriller]",Ended,60.0,2013-06-24,http://www.cbs.com/shows/under-the-dome/,"{'time': '22:00', 'days': ['Thursday']}","{'id': 2, 'name': 'CBS', 'country': {'name': '..."
1,2,https://www.tvmaze.com/shows/2/person-of-interest,Person of Interest,Scripted,English,"[Action, Crime, Science-Fiction]",Ended,60.0,2011-09-22,http://www.cbs.com/shows/person_of_interest/,"{'time': '22:00', 'days': ['Tuesday']}","{'id': 2, 'name': 'CBS', 'country': {'name': '..."
2,3,https://www.tvmaze.com/shows/3/bitten,Bitten,Scripted,English,"[Drama, Horror, Romance]",Ended,60.0,2014-01-11,http://bitten.space.ca/,"{'time': '22:00', 'days': ['Friday']}","{'id': 7, 'name': 'CTV Sci-Fi Channel', 'count..."
3,4,https://www.tvmaze.com/shows/4/arrow,Arrow,Scripted,English,"[Drama, Action, Science-Fiction]",Ended,60.0,2012-10-10,http://www.cwtv.com/shows/arrow,"{'time': '21:00', 'days': ['Tuesday']}","{'id': 5, 'name': 'The CW', 'country': {'name'..."
4,5,https://www.tvmaze.com/shows/5/true-detective,True Detective,Scripted,English,"[Drama, Crime, Thriller]",Running,60.0,2014-01-12,http://www.hbo.com/true-detective,"{'time': '21:00', 'days': ['Sunday']}","{'id': 8, 'name': 'HBO', 'country': {'name': '..."


In [4]:
#Cleaning the Data, replace the empty values with NA

#Replace the empty data values to 'NA' which are not avaiable 
df.replace(np.NAN,'NA', inplace=True)

#Replace the data values which are not relevant to 'NA' 
df.replace('[]','NA', inplace=True)

In [5]:
#Clean the dataframe to remove the duplicates TV shows 
duplicate = df[df.duplicated('name')]
if(duplicate.empty):
    print("No duplicate TV shows in the Dataframe")
else:
    print(duplicate)
    print('---------------------------------')
    
    

      id                                      url    name     type language  \
145  153  https://www.tvmaze.com/shows/153/utopia  Utopia  Reality  English   

    genres status runtime   premiered officialSite  \
145     []  Ended      60  2014-09-07           NA   

                                  schedule  \
145  {'time': '20:00', 'days': ['Friday']}   

                                               network  
145  {'id': 4, 'name': 'FOX', 'country': {'name': '...  
---------------------------------


In [6]:
print(f"Current Record Count:   {len(df)}")
print('Dropping the duplicates....')
df.drop_duplicates(subset=['name'], keep='first', inplace=True)
print(f"New Record Count:   {len(df)}")

Current Record Count:   240
Dropping the duplicates....
New Record Count:   239


In [7]:
#Changing the Index of a DataFrame
df = df.set_index('name')


In [8]:
#Read the schedule which was in a dictionary, transformed the data in to new columns- Time and Day
time_lst = []
day_lst=[]

schedule = df['schedule']

for sch in schedule:
    time_lst.append(sch['time'])
    day_lst.append(sch['days'])
    
#Add these two new columns to the existing DataFrame.
df['Time'] = time_lst
df['Day'] = day_lst

In [9]:
#Network column has data stored as a dictonary of dictionaries.
#Read the data fetched the dictionary items, seperated in new lists and  appended the 
nw_name_lst = []
country_name_lst=[]
country_code=[]
timezone_lst=[]

network = df['network']

for nw in network:
    if(nw!='NA'):
        nw_name_lst.append(nw['name'])
        country_name_lst.append(nw['country']['name'])
        country_code.append(nw['country']['code'])
        timezone_lst.append(nw['country']['timezone'])
    else:
         nw_name_lst.append('')
         country_name_lst.append('')
         country_code.append('')
         timezone_lst.append('')
    
df['Network'] = nw_name_lst
df['Country'] = country_name_lst   
df['Country Code'] = country_code
df['Timezone'] = timezone_lst
df.head()


Unnamed: 0_level_0,id,url,type,language,genres,status,runtime,premiered,officialSite,schedule,network,Time,Day,Network,Country,Country Code,Timezone
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Under the Dome,1,https://www.tvmaze.com/shows/1/under-the-dome,Scripted,English,"[Drama, Science-Fiction, Thriller]",Ended,60,2013-06-24,http://www.cbs.com/shows/under-the-dome/,"{'time': '22:00', 'days': ['Thursday']}","{'id': 2, 'name': 'CBS', 'country': {'name': '...",22:00,[Thursday],CBS,United States,US,America/New_York
Person of Interest,2,https://www.tvmaze.com/shows/2/person-of-interest,Scripted,English,"[Action, Crime, Science-Fiction]",Ended,60,2011-09-22,http://www.cbs.com/shows/person_of_interest/,"{'time': '22:00', 'days': ['Tuesday']}","{'id': 2, 'name': 'CBS', 'country': {'name': '...",22:00,[Tuesday],CBS,United States,US,America/New_York
Bitten,3,https://www.tvmaze.com/shows/3/bitten,Scripted,English,"[Drama, Horror, Romance]",Ended,60,2014-01-11,http://bitten.space.ca/,"{'time': '22:00', 'days': ['Friday']}","{'id': 7, 'name': 'CTV Sci-Fi Channel', 'count...",22:00,[Friday],CTV Sci-Fi Channel,Canada,CA,America/Halifax
Arrow,4,https://www.tvmaze.com/shows/4/arrow,Scripted,English,"[Drama, Action, Science-Fiction]",Ended,60,2012-10-10,http://www.cwtv.com/shows/arrow,"{'time': '21:00', 'days': ['Tuesday']}","{'id': 5, 'name': 'The CW', 'country': {'name'...",21:00,[Tuesday],The CW,United States,US,America/New_York
True Detective,5,https://www.tvmaze.com/shows/5/true-detective,Scripted,English,"[Drama, Crime, Thriller]",Running,60,2014-01-12,http://www.hbo.com/true-detective,"{'time': '21:00', 'days': ['Sunday']}","{'id': 8, 'name': 'HBO', 'country': {'name': '...",21:00,[Sunday],HBO,United States,US,America/New_York


In [10]:
#Drop the columns that are not further required 
del df['id']
del df['schedule']
del df['network']


In [11]:
#Rename the columns 
df.rename(columns = {'url':'Show URL','name':'Name','type':'Show Type','language':'Language','genres':'Genres','status':'Status','runtime':'Runtime','premiered':'Premiere Date','officialSite':'Official Site'}, inplace = True)


In [12]:
df.head()

Unnamed: 0_level_0,Show URL,Show Type,Language,Genres,Status,Runtime,Premiere Date,Official Site,Time,Day,Network,Country,Country Code,Timezone
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Under the Dome,https://www.tvmaze.com/shows/1/under-the-dome,Scripted,English,"[Drama, Science-Fiction, Thriller]",Ended,60,2013-06-24,http://www.cbs.com/shows/under-the-dome/,22:00,[Thursday],CBS,United States,US,America/New_York
Person of Interest,https://www.tvmaze.com/shows/2/person-of-interest,Scripted,English,"[Action, Crime, Science-Fiction]",Ended,60,2011-09-22,http://www.cbs.com/shows/person_of_interest/,22:00,[Tuesday],CBS,United States,US,America/New_York
Bitten,https://www.tvmaze.com/shows/3/bitten,Scripted,English,"[Drama, Horror, Romance]",Ended,60,2014-01-11,http://bitten.space.ca/,22:00,[Friday],CTV Sci-Fi Channel,Canada,CA,America/Halifax
Arrow,https://www.tvmaze.com/shows/4/arrow,Scripted,English,"[Drama, Action, Science-Fiction]",Ended,60,2012-10-10,http://www.cwtv.com/shows/arrow,21:00,[Tuesday],The CW,United States,US,America/New_York
True Detective,https://www.tvmaze.com/shows/5/true-detective,Scripted,English,"[Drama, Crime, Thriller]",Running,60,2014-01-12,http://www.hbo.com/true-detective,21:00,[Sunday],HBO,United States,US,America/New_York


In [13]:
#Generate an output as CSV from the API call
df.to_csv('Output/tvmaze_api_output.csv', encoding='utf-8')

In [14]:
# Get further data on TV shows by using Web Scrapping 

In [15]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)







[WDM] - Current google-chrome version is 90.0.4430
[WDM] - Get LATEST driver version for 90.0.4430
[WDM] - Driver [C:\Users\amita\.wdm\drivers\chromedriver\win32\90.0.4430.24\chromedriver.exe] found in cache


In [16]:
#Visit every TV show website, web scrape the data, get the details of Show like -
#Created By, Full Image, Med Image, Rating, Rating Count

#Create empty list for the new datafrom web scrapping
show_sum = []
full_img=[]
med_img=[]
crtd_by = []
rtng = []
rtng_cnt=[]
url_list = df['Show URL']

#Pass each TV URl,call the URL, fetch the data
#There are around 239 TV shows, some may have the show-info details some may not, 
#so added checks and validations to fetch data for allshows.

for url in url_list:
    print(url)
    browser.visit(url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    gen_info = soup.find(id='general-information')
    show_sum.append(gen_info.p.text)
    med_img.append(gen_info.img['src'])
   
    full_img.append(soup.find("meta",  property="og:image")['content'])
    
    soup_info=soup.find(id="general-info-panel")
    all_div = soup_info.find_all('div')

    isPresent = False
    for tag in all_div:
        if "Created by:" not in tag.text: 
            continue
        else:
            crtd_by.append(tag.find('span').text)
            isPresent = True
            break
            
    if(isPresent==False):
        crtd_by.append('')
        
    rating = soup_info.find_all("span", itemprop="aggregateRating")

    for tag in rating:
        if(tag.find("b")):
            rtng.append(tag.b.text)
        else:
            rtng.append('')   
    
    for tag in rating:
        if(tag.find("span", itemprop="ratingCount")): 
            rtng_cnt.append(tag.find("span", itemprop="ratingCount").text)
        else:
            rtng_cnt.append('')
    
         
    


https://www.tvmaze.com/shows/1/under-the-dome
https://www.tvmaze.com/shows/2/person-of-interest
https://www.tvmaze.com/shows/3/bitten
https://www.tvmaze.com/shows/4/arrow
https://www.tvmaze.com/shows/5/true-detective
https://www.tvmaze.com/shows/6/the-100
https://www.tvmaze.com/shows/7/homeland
https://www.tvmaze.com/shows/8/glee
https://www.tvmaze.com/shows/9/revenge
https://www.tvmaze.com/shows/10/grimm
https://www.tvmaze.com/shows/11/gotham
https://www.tvmaze.com/shows/12/lost-girl
https://www.tvmaze.com/shows/13/the-flash
https://www.tvmaze.com/shows/14/continuum
https://www.tvmaze.com/shows/15/constantine
https://www.tvmaze.com/shows/16/penny-dreadful
https://www.tvmaze.com/shows/18/the-amazing-race
https://www.tvmaze.com/shows/19/supernatural
https://www.tvmaze.com/shows/20/the-strain
https://www.tvmaze.com/shows/21/the-last-ship
https://www.tvmaze.com/shows/22/true-blood
https://www.tvmaze.com/shows/23/once-upon-a-time-in-wonderland
https://www.tvmaze.com/shows/24/hawaii-five-0


https://www.tvmaze.com/shows/192/hello-ladies
https://www.tvmaze.com/shows/193/dads
https://www.tvmaze.com/shows/194/hannibal
https://www.tvmaze.com/shows/195/bates-motel
https://www.tvmaze.com/shows/196/the-following
https://www.tvmaze.com/shows/197/da-vincis-demons
https://www.tvmaze.com/shows/198/the-fosters
https://www.tvmaze.com/shows/199/mistresses
https://www.tvmaze.com/shows/200/the-tomorrow-people
https://www.tvmaze.com/shows/201/devious-maids
https://www.tvmaze.com/shows/202/almost-human
https://www.tvmaze.com/shows/203/the-carrie-diaries
https://www.tvmaze.com/shows/204/stargate-sg-1
https://www.tvmaze.com/shows/205/graceland
https://www.tvmaze.com/shows/206/stargate-atlantis
https://www.tvmaze.com/shows/207/stargate-universe
https://www.tvmaze.com/shows/208/the-chair
https://www.tvmaze.com/shows/209/survivors-remorse
https://www.tvmaze.com/shows/210/doctor-who
https://www.tvmaze.com/shows/211/666-park-avenue
https://www.tvmaze.com/shows/212/alcatraz
https://www.tvmaze.com/s

In [17]:
#Check if all lists return exactly 240 records
print(len(show_sum))
print(len(full_img))
print(len(med_img))
print(len(crtd_by))
print(len(rtng))
print(len(rtng_cnt))

239
239
239
239
239
239


In [18]:
#Clean the Data remove the unwanted characters from the list
crtd_by = [s.replace("\n", "") for s in crtd_by]
print(crtd_by)

['Stephen King', 'Jonathan Nolan', 'Grant RosenbergDaegan Fryklind', '', 'Nic Pizzolatto', '', 'Gideon Raff', 'Brad FalchukIan BrennanRyan Murphy', 'Mike Kelley', 'Stephen CarpenterDavid GreenwaltJim Kouf', 'Bruno Heller', 'Michelle Lovretta', '', 'Simon Barry', 'Daniel CeroneDavid S. Goyer', 'John Logan', 'Elise DoganieriBertram van Munster', 'Eric Kripke', 'Guillermo del ToroChuck Hogan', 'Hank SteinbergSteven Kane', 'Alan Ball', 'Edward KitsisAdam HorowitzJane EspensonZack Estrin', 'Leonard Freeman', '', 'Kohta Hirano', '', 'Tom Kapinos', 'Michael Hirst', 'Brad FalchukRyan Murphy', 'Joss WhedonJed WhedonMaurissa Tancharoen', 'Noah Hawley', 'Lee ShipmanBrian McGreevy', 'Cameron Porsandeh', '', 'Glen Morgan', 'Karl SchaeferCraig Engler', '', 'Takeshi ObataTsugumi Ooba', 'Jack Burditt', 'Len WisemanRoberto OrciAlex KurtzmanPhillip Iscove', '', '', 'Gary Glasberg', 'Matthew Miller', '', 'Barbara Hall', 'Dan GoorMichael Schur', 'Timothy J. Sexton', 'Jack AmielMichael Begler', 'Peter Nowa

In [19]:
#Add these new columns to the existing DataFrame.
df['Summary'] = show_sum
df['Full Img'] = full_img
df['Med img'] = med_img
df['Created By'] = crtd_by
df['Ratings'] = rtng
df['Ratings Count'] = rtng_cnt
df.head()

Unnamed: 0_level_0,Show URL,Show Type,Language,Genres,Status,Runtime,Premiere Date,Official Site,Time,Day,Network,Country,Country Code,Timezone,Summary,Full Img,Med img,Created By,Ratings,Ratings Count
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Under the Dome,https://www.tvmaze.com/shows/1/under-the-dome,Scripted,English,"[Drama, Science-Fiction, Thriller]",Ended,60,2013-06-24,http://www.cbs.com/shows/under-the-dome/,22:00,[Thursday],CBS,United States,US,America/New_York,Under the Dome is the story of a small town th...,http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,Stephen King,6.6,218
Person of Interest,https://www.tvmaze.com/shows/2/person-of-interest,Scripted,English,"[Action, Crime, Science-Fiction]",Ended,60,2011-09-22,http://www.cbs.com/shows/person_of_interest/,22:00,[Tuesday],CBS,United States,US,America/New_York,You are being watched. The government has a se...,http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,Jonathan Nolan,8.9,317
Bitten,https://www.tvmaze.com/shows/3/bitten,Scripted,English,"[Drama, Horror, Romance]",Ended,60,2014-01-11,http://bitten.space.ca/,22:00,[Friday],CTV Sci-Fi Channel,Canada,CA,America/Halifax,Based on the critically acclaimed series of no...,http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,Grant RosenbergDaegan Fryklind,7.5,80
Arrow,https://www.tvmaze.com/shows/4/arrow,Scripted,English,"[Drama, Action, Science-Fiction]",Ended,60,2012-10-10,http://www.cwtv.com/shows/arrow,21:00,[Tuesday],The CW,United States,US,America/New_York,"After a violent shipwreck, billionaire playboy...",http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,,7.4,488
True Detective,https://www.tvmaze.com/shows/5/true-detective,Scripted,English,"[Drama, Crime, Thriller]",Running,60,2014-01-12,http://www.hbo.com/true-detective,21:00,[Sunday],HBO,United States,US,America/New_York,Touch darkness and darkness touches you back. ...,http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,Nic Pizzolatto,8.3,298


In [20]:
#Again Replace the empty data values to for new columns to 'NA', which were not avaiable 
df.replace('','NA', inplace=True)


In [21]:
#Generate the output of api + webscrapping
df.to_csv('Output/tvmaze_api_webscrape_output.csv', encoding='utf-8')

In [22]:
#Read a CSV with data of TV shows 

In [23]:
df_csv = pd.read_csv('Resources/TV_Shows.csv',encoding = "utf-8")
df_csv = df_csv[['name','release_year','duration','cast']]
df_csv.head()

Unnamed: 0,name,release_year,duration,cast
0,3%,2020,4 Seasons,"João Miguel, Bianca Comparato, Michel Gomes, R..."
1,7:19,2016,93 min,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ..."
2,23:59,2011,78 min,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ..."
3,9,2009,80 min,"Elijah Wood, John C. Reilly, Jennifer Connelly..."
4,21,2008,123 min,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar..."


In [24]:
df.head()

Unnamed: 0_level_0,Show URL,Show Type,Language,Genres,Status,Runtime,Premiere Date,Official Site,Time,Day,Network,Country,Country Code,Timezone,Summary,Full Img,Med img,Created By,Ratings,Ratings Count
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Under the Dome,https://www.tvmaze.com/shows/1/under-the-dome,Scripted,English,"[Drama, Science-Fiction, Thriller]",Ended,60,2013-06-24,http://www.cbs.com/shows/under-the-dome/,22:00,[Thursday],CBS,United States,US,America/New_York,Under the Dome is the story of a small town th...,http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,Stephen King,6.6,218
Person of Interest,https://www.tvmaze.com/shows/2/person-of-interest,Scripted,English,"[Action, Crime, Science-Fiction]",Ended,60,2011-09-22,http://www.cbs.com/shows/person_of_interest/,22:00,[Tuesday],CBS,United States,US,America/New_York,You are being watched. The government has a se...,http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,Jonathan Nolan,8.9,317
Bitten,https://www.tvmaze.com/shows/3/bitten,Scripted,English,"[Drama, Horror, Romance]",Ended,60,2014-01-11,http://bitten.space.ca/,22:00,[Friday],CTV Sci-Fi Channel,Canada,CA,America/Halifax,Based on the critically acclaimed series of no...,http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,Grant RosenbergDaegan Fryklind,7.5,80
Arrow,https://www.tvmaze.com/shows/4/arrow,Scripted,English,"[Drama, Action, Science-Fiction]",Ended,60,2012-10-10,http://www.cwtv.com/shows/arrow,21:00,[Tuesday],The CW,United States,US,America/New_York,"After a violent shipwreck, billionaire playboy...",http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,,7.4,488
True Detective,https://www.tvmaze.com/shows/5/true-detective,Scripted,English,"[Drama, Crime, Thriller]",Running,60,2014-01-12,http://www.hbo.com/true-detective,21:00,[Sunday],HBO,United States,US,America/New_York,Touch darkness and darkness touches you back. ...,http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,Nic Pizzolatto,8.3,298


In [25]:
#Merge this data with the existing api and scrapping data
#New columns (Netflix release year, Duration, cast) from the CSV are fetched
#These details are merged with previous DF on 'Name of the Show'

merged_df = pd.merge(df, df_csv, on='name', how='left')
merged_df.head()

Unnamed: 0,name,Show URL,Show Type,Language,Genres,Status,Runtime,Premiere Date,Official Site,Time,...,Timezone,Summary,Full Img,Med img,Created By,Ratings,Ratings Count,release_year,duration,cast
0,Under the Dome,https://www.tvmaze.com/shows/1/under-the-dome,Scripted,English,"[Drama, Science-Fiction, Thriller]",Ended,60,2013-06-24,http://www.cbs.com/shows/under-the-dome/,22:00,...,America/New_York,Under the Dome is the story of a small town th...,http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,Stephen King,6.6,218,,,
1,Person of Interest,https://www.tvmaze.com/shows/2/person-of-interest,Scripted,English,"[Action, Crime, Science-Fiction]",Ended,60,2011-09-22,http://www.cbs.com/shows/person_of_interest/,22:00,...,America/New_York,You are being watched. The government has a se...,http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,Jonathan Nolan,8.9,317,2016.0,5 Seasons,"Jim Caviezel, Michael Emerson, Taraji P. Henso..."
2,Bitten,https://www.tvmaze.com/shows/3/bitten,Scripted,English,"[Drama, Horror, Romance]",Ended,60,2014-01-11,http://bitten.space.ca/,22:00,...,America/Halifax,Based on the critically acclaimed series of no...,http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,Grant RosenbergDaegan Fryklind,7.5,80,2016.0,3 Seasons,"Laura Vandervoort, Greyston Holt, Greg Bryk, S..."
3,Arrow,https://www.tvmaze.com/shows/4/arrow,Scripted,English,"[Drama, Action, Science-Fiction]",Ended,60,2012-10-10,http://www.cwtv.com/shows/arrow,21:00,...,America/New_York,"After a violent shipwreck, billionaire playboy...",http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,,7.4,488,2019.0,8 Seasons,"Stephen Amell, Katie Cassidy, David Ramsey, Wi..."
4,True Detective,https://www.tvmaze.com/shows/5/true-detective,Scripted,English,"[Drama, Crime, Thriller]",Running,60,2014-01-12,http://www.hbo.com/true-detective,21:00,...,America/New_York,Touch darkness and darkness touches you back. ...,http://static.tvmaze.com/uploads/images/origin...,//static.tvmaze.com/uploads/images/medium_port...,Nic Pizzolatto,8.3,298,,,


In [26]:
#Again Replace the empty data values to for new columns to 'NA', which were not avaiable 
merged_df.replace(np.NAN,'NA', inplace=True)


In [27]:
merged_df.to_csv('Output/TVShows_final_output.csv', encoding='utf-8')

In [28]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [29]:
# Define database and collection
db = client.TVShows_db
collection = db.items

In [30]:
#Check if collection exists, if does then drop
results = collection.find()
if results.count() >= 1:
    collection.drop()


  if results.count() >= 1:


In [31]:
collection.insert_many(merged_df.to_dict('records'))


<pymongo.results.InsertManyResult at 0x1c889788180>

In [32]:
results = collection.find()
for result in results:
    print(result)

{'_id': ObjectId('6084e305273769203de55296'), 'name': 'Under the Dome', 'Show URL': 'https://www.tvmaze.com/shows/1/under-the-dome', 'Show Type': 'Scripted', 'Language': 'English', 'Genres': ['Drama', 'Science-Fiction', 'Thriller'], 'Status': 'Ended', 'Runtime': 60.0, 'Premiere Date': '2013-06-24', 'Official Site': 'http://www.cbs.com/shows/under-the-dome/', 'Time': '22:00', 'Day': ['Thursday'], 'Network': 'CBS', 'Country': 'United States', 'Country Code': 'US', 'Timezone': 'America/New_York', 'Summary': "Under the Dome is the story of a small town that is suddenly and inexplicably sealed off from the rest of the world by an enormous transparent dome. The town's inhabitants must deal with surviving the post-apocalyptic conditions while searching for answers about the dome, where it came from and if and when it will go away.", 'Full Img': 'http://static.tvmaze.com/uploads/images/original_untouched/81/202627.jpg', 'Med img': '//static.tvmaze.com/uploads/images/medium_portrait/81/202627.j