## Gather Employment to Population Ratio Data for Several Demographic Groups

Use the BLS API to first identify the series code for several demographic groups then to retrieve data on each group's employment-to-population ratio. 

In [1]:
import pandas as pd
import math
import requests
import json
import config

In [2]:
series_url = 'https://download.bls.gov/pub/time.series/ln/ln.series'

In [3]:
df = pd.read_table(series_url)

In [4]:
df1 = df[(df['lfst_code'] == 23) 
   & (df['seasonal'] == 'U') 
   & (df['periodicity_code'] == 'M')
   & (df['ages_code'].isin([10]))
   & (df['sexs_code'].isin([1,2]))
   & (df['orig_code'].isin([0]))
   & (df['vets_code'].isin([0]))
   & (df['race_code'].isin([0]))
        ][['series_id', 'series_title', 'ages_code', 'sexs_code', 'orig_code', 'race_code']]

#df1 = df1[~(df1['race_code'] == 0) | ~(df1['orig_code'] == 0)]
df1['series'] = [x.strip(' ') for x in df1['series_id']]
series_dict1 = pd.Series(df1['series_title'].values,index=df1['series'])[:25].to_dict()
series_dict2 = pd.Series(df1['series_title'].values,index=df1['series'])[25:].to_dict()

In [5]:
# Include the start and end year here
date_range = (1993, 2012)

# Divide the date range into BLS-API-friendly length requests
req_no = int(math.ceil((date_range[1] - date_range[0]) / 10.0))
dates = []
for i in range(0,req_no):
    d1 = str(date_range[0]+i*10)
    d2 = str(date_range[0]+i*10+9)
    dates.append((d1,d2))
dates[-1] = (dates[-1][0], str(date_range[1]))

dates

[('1993', '2002'), ('2003', '2012')]

In [7]:
# URL, key, and headers same as above
url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'
key = '?registrationkey={}'.format(config.bls_key)
headers = {'Content-type': 'application/json'}

df3 = pd.DataFrame()  # blank pandas dataframe to be filled later
df4 = pd.DataFrame()  # blank pandas dataframe to be filled later

In [8]:
for sd in [series_dict1]:
    for start, end in dates: 

        # The data sent in the post request now includes a start and end year
        data = json.dumps({"seriesid":sd.keys(), "startyear":start, "endyear":end})
        p = requests.post('{}{}'.format(url, key), headers=headers, data=data).json()
        d = {} # New dictionary to be filled with data
        for series in p['Results']['series']:
            s = series['seriesID']  # Shorten name to 's'

            # Add dictionary entry with series and reverse series order
            d[s] = pd.DataFrame(series['data']).iloc[::-1]
            if len(series['data']) > 0:  # This if/else is to allow for series of different lengths

                # Convert BLS API dates to readable format (YYYY-MM-DD)
                d[s]['date'] = pd.to_datetime(d[s]['period'] + ' ' + d[s]['year'])

                # Keep only date and series values
                d[s] = d[s].set_index('date')['value'].astype(float)

                # Rename and identify values as floating point numbers
                d[s] = d[s].rename(sd[s])
            else:  # If blank, leave as a blank pandas series
                d[s]['date'] = d[s]['value'] = pd.Series()
                d[s] = d[s]['value'].rename(sd[s])

        # Combine the dataframes for each range of years into one by appending        
        if sd == series_dict1:
            df3 = df3.append(pd.concat([d[k] for k in sd.keys()], axis=1))
        else: 
            df4 = df4.append(pd.concat([d[k] for k in sd.keys()], axis=1))

In [9]:
full_data = df3.join(df4)

In [10]:
full_data.to_csv('full_data_young.csv')

In [11]:
high_tax = pd.Series(full_data.ix['1993-01-01':'2000-12-01'].mean())
low_tax = pd.Series(full_data.ix['2001-01-01':'2012-12-01'].mean())

In [12]:
pd.concat([high_tax, low_tax], axis=1)

Unnamed: 0,0,1
"(Unadj) Employment-Population Ratio - 16-24 yrs., Men",60.819792,52.465972
"(Unadj) Employment-Population Ratio - 16-24 yrs., Women",55.901042,50.351389


In [13]:
df = pd.read_csv('full_data_young.csv')
df = df.set_index(pd.to_datetime(df['date']))

In [14]:
annual = df.groupby(df.index.year).transform('mean').drop_duplicates()

In [15]:
annual

Unnamed: 0_level_0,"(Unadj) Employment-Population Ratio - 16-24 yrs., Men","(Unadj) Employment-Population Ratio - 16-24 yrs., Women"
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1993-01-01,60.141667,54.316667
1994-01-01,60.991667,55.266667
1995-01-01,61.458333,55.066667
1996-01-01,60.075,55.175
1997-01-01,60.141667,55.9
1998-01-01,60.783333,57.166667
1999-01-01,61.033333,56.941667
2000-01-01,61.933333,57.375
2001-01-01,59.383333,56.025
2002-01-01,57.091667,54.275


In [16]:
annual.to_csv('full_data_ann_young.csv')