# Get Data Using API - no key

API stands for Application Programming Interface. On a basic level, it allows a user to "talk" to another server and request information. The API receives the request and responds back with the information.

In [None]:
import requests  #similar to urllib, this library allows a computer to ping a website
import json      #library to handle JSON formatted data

### The Walking Dead Episode Data via TVMaze API

In this example, we do not need an API key (a method of authentication) in order to request data. So think of this method as being similar to web scraping but from the back end.

In [None]:
#URL to TVMaze API
url = r"http://api.tvmaze.com/singlesearch/shows?q=the-walking-dead&embed=episodes"

In [None]:
#the get function checks to make sure that the website/server is responding back
#200 means that we're good
#https://www.restapitutorial.com/httpstatuscodes.html
resp = requests.get(url)
resp

In [None]:
#send a request to the website to return back text data from the API
#returns data as JSON string
str_data = resp.text
str_data

In [None]:
#loads function reversed dictionary order
#dictionary objects are unordered in general
WDdata = json.loads(str_data)
WDdata

In [None]:
#verify that JSON object is one big dictionary
type(WDdata)

In [None]:
#first level keys in JSON object
WDdata.keys()

In [None]:
#dumps function reverses order again
#can currently see correct order of objects
print(json.dumps(WDdata,indent=4))

In [None]:
#a single episode
#dict['key']['key'][index]
#dictionary name, dictionary key, dictionary key, then list index
WDdata['_embedded']['episodes'][0]

In [None]:
#verify which keys we can get information from per episode
WDdata['_embedded']['episodes'][0].keys()

In [None]:
#set list of episodes to variable
#will cycle (iterate) through the list to get value of keys in episodes
episodes = WDdata['_embedded']['episodes']

In [None]:
#set empty lists to hold each feature's information

epnamels = [] #episode name
seasonls = [] #season number
epnumls = []  #episode number
datels = []   #airdate
timels = []   #airtime
runls = []    #runtime
epsumls = []  #summary

In [None]:
#make a function to remove <p> and </p> tags from summary text

def cleanText(text):
    import re
    
    clean = re.compile('<.*?>') #regular expression that looks for any pattern in a string that has <>
    return re.sub(clean, '', text)

In [None]:
#test cleanText function
teststr = WDdata['_embedded']['episodes'][0]['summary']

cleanText(teststr)

In [None]:
#fill lists with data

for episode in episodes:
    
    epnamels.append(episode['name'])
    seasonls.append(episode['season'])
    epnumls.append(episode['number'])
    datels.append(episode['airdate'])
    timels.append(episode['airtime'])
    runls.append(episode['runtime'])
    
    #some episodes do not have a summary in them; causes error when appending to list
    #will assign a value of None to append to list if episode summary raises error
    try:
        text = cleanText(episode['summary'])
    except:
        text = None
        
    epsumls.append(text)

In [None]:
#cleanText function worked
epsumls[:5]

In [None]:
#verify that each list has same number of items
print(len(epnamels))
print(len(seasonls))
print(len(epnumls))
print(len(datels))
print(len(timels))
print(len(runls))
print(len(epsumls))

In [None]:
#zip all lists together and make one big list of lists
TWDlist = list(zip(epnamels, seasonls, epnumls, datels, timels, runls, epsumls))

colnames = ['title', 'season', 'number', 'airdate', 'airtime', 'runtime', 'summary']

In [None]:
#make list into a dataframe

import pandas as pd

df = pd.DataFrame(TWDlist, columns=colnames)

df.head()