In [145]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import html5lib

Okay, so we have noticed that there is a spike in Presidential names during their respective campaigning years (There's nothing like showing your political affiliation by naming your first born after your candidate crush!), but this trend tends to decrease throughout their presidency. You know what else goes down during a presidency? Approval ratings!  

Does the decline (or *incline*) in baby names associated with a president decline (or *incline*) with their approval rating? So, first things first, I have to scrape some approval rating data. Looks like `http://www.presidency.ucsb.edu/data/popularity.php?` is a good candidate for this information. 

In [72]:
def theurl(name):
    answer={"Obama":"http://www.presidency.ucsb.edu/data/popularity.php?pres=44&sort=time&direct=DESC&Submit=DISPLAY",
            "Bush G" : "http://www.presidency.ucsb.edu/data/popularity.php?pres=43&sort=time&direct=DESC&Submit=DISPLAY",
            "Clinton": "http://www.presidency.ucsb.edu/data/popularity.php?pres=42&sort=time&direct=DESC&Submit=DISPLAY",
            "Bush GHW": "http://www.presidency.ucsb.edu/data/popularity.php?pres=41&sort=time&direct=DESC&Submit=DISPLAY",
            "Reagan": "http://www.presidency.ucsb.edu/data/popularity.php?pres=40&sort=time&direct=DESC&Submit=DISPLAY",
            "Carter": "http://www.presidency.ucsb.edu/data/popularity.php?pres=39&sort=time&direct=DESC&Submit=DISPLAY",
            "Ford": "http://www.presidency.ucsb.edu/data/popularity.php?pres=38&sort=time&direct=DESC&Submit=DISPLAY",
            "Nixon": "http://www.presidency.ucsb.edu/data/popularity.php?pres=37&sort=time&direct=DESC&Submit=DISPLAY",
            "Johnson": "http://www.presidency.ucsb.edu/data/popularity.php?pres=36&sort=time&direct=DESC&Submit=DISPLAY",
            "Kennedy": "http://www.presidency.ucsb.edu/data/popularity.php?pres=35&sort=time&direct=DESC&Submit=DISPLAY",
            "Eisenhower": "http://www.presidency.ucsb.edu/data/popularity.php?pres=34&sort=time&direct=DESC&Submit=DISPLAY",
            "Truman": "http://www.presidency.ucsb.edu/data/popularity.php?pres=33&sort=time&direct=DESC&Submit=DISPLAY",
            "Roosevelt": "http://www.presidency.ucsb.edu/data/popularity.php?pres=32&sort=time&direct=DESC&Submit=DISPLAY"
           }
    return answer[name]

In [3]:
def make_soup(url):
    response=requests.get(url)
    soup=BeautifulSoup(response.content,"lxml")
    return soup

We need a way to extract individual data frames for each president listed in the function defined above. That's what the following function does.

In [69]:
def presidentialDataFramer(soup):
    this=make_soup(theurl(soup))
    
    # find all tables then specifiy which table
    tables = this.findChildren('table')
    table = tables[10]
    
    # extract the data
    data   = [[td.text for td in row.select('td')]
             for row in table.findAll('tr')]
    
    # create the headers
    headers = data[4]
    
    # find the body of the data and zip it together
    body =data[6:]
    cols   =  zip(*body)
    tbl_d  = {name:col for name, col in zip(headers,cols)}
    
    # create a dataframe from the html and trim the first column off
    df = pd.DataFrame(tbl_d)
    trimmed = df.drop(df.columns[[0]], axis=1)
    return trimmed

Here is one example of how it works with none other than William Clinton.

In [146]:
presidentialDataFramer("Clinton")

Unnamed: 0,Approving,Disapproving,End Date,President,Start Date,unsure/no data
0,66,29,01/14/2001,William J. Clinton,01/10/2001,5
1,63,31,01/07/2001,,01/05/2001,4
2,71,26,12/17/2000,,12/15/2000,1
3,59,35,12/04/2000,,12/02/2000,5
4,63,32,11/15/2000,,11/13/2000,4
5,56,38,10/26/2000,,10/25/2000,5
6,58,36,10/09/2000,,10/06/2000,4
7,60,34,09/13/2000,,09/11/2000,5
8,62,35,09/03/2000,,08/29/2000,2
9,61,35,08/19/2000,,08/18/2000,3


It would be simplest if all of these data frames were accessible in one data frame. In order to do this, we will have to loop through the presidents who have data available and concatenate their frames together. While we are at it, let's wrap this in a function defined below.

In [147]:
def approvalCombinater():
    presidents = ["Obama", "Bush G" ,"Clinton","Bush GHW","Reagan","Carter","Ford" ,
            "Nixon","Johnson","Kennedy","Eisenhower","Truman","Roosevelt"]
    
    # create a blank list that we will use to add all of the dataframes
    list_ = []
    
    # loop through each president and create a data frame of approval ratings for each one
    for president in presidents: 
        df = presidentialDataFramer(president)
        list_.append(df)
    
    # return the concatenated frame and fill empty values with respective president names
    combined = pd.concat(list_)
    frame = combined.replace('\xa0',np.nan)
    final = frame.fillna(method="ffill")
    return final.reset_index()


In [148]:
pres_approval= approvalCombinater()

Voila! Now let's save it as a csv and call it a day.

In [149]:
pres_approval.to_csv("data/presApproval.csv")