In [1]:
import requests
import pandas as pd 
import json
import config
import math

%matplotlib inline

ModuleNotFoundError: No module named 'config'

In [None]:
# The url for BLS API v2
url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'

# API key in config.py which contains: bls_key = 'key'
key = '?registrationkey={}'.format(config.bls_key)

headers = {'Content-type': 'application/json'}

series_dict = {'LNS12300060': 'EPOP'}

df = pd.DataFrame()  # blank pandas dataframe to be filled later

In [None]:
# Include the start and end year here
date_range = (1994, 2017)

# Divide the date range into BLS-API-friendly length requests
req_no = int(math.ceil((date_range[1] - date_range[0]) / 10.0))
dates = []
for i in range(0,req_no):
    d1 = str(date_range[0]+i*10)
    d2 = str(date_range[0]+i*10+9)
    dates.append((d1,d2))
dates[-1] = (dates[-1][0], str(date_range[1]))

dates

In [None]:
for start, end in dates:
        
    # The data sent in the post request now includes a start and end year
    data = json.dumps({"seriesid":series_dict.keys(), "startyear":start, "endyear":end})
    p = requests.post('{}{}'.format(url, key), headers=headers, data=data).json()
    d = {} # New dictionary to be filled with data
    for series in p['Results']['series']:
        s = series['seriesID']  # Shorten name to 's'
        
        # Add dictionary entry with series and reverse series order
        d[s] = pd.DataFrame(series['data']).iloc[::-1]
        if len(series['data']) > 0:  # This if/else is to allow for series of different lengths
            
            # Convert BLS API dates to readable format (YYYY-MM-DD)
            d[s]['date'] = pd.to_datetime(d[s]['period'] + ' ' + d[s]['year'])
            
            # Keep only date and series values
            d[s] = d[s].set_index('date')['value'].astype(float)
            
            # Rename and identify values as floating point numbers
            d[s] = d[s].rename(series_dict[s])
        else:  # If blank, leave as a blank pandas series
            d[s]['date'] = d[s]['value'] = pd.Series()
            d[s] = d[s]['value'].rename(series_dict[s])
            
    # Combine the dataframes for each range of years into one by appending        
    df = df.append(pd.concat([d[k] for k in series_dict.keys()], axis=1))

In [None]:
df.resample('3M', closed='left').mean().to_csv('epop.csv')

In [None]:
epop = df.resample('3M', closed='left').mean()[:-1]
eci = pd.read_csv('ECI.csv').set_index('date')

In [None]:
df2 = epop.join(eci)
df2['UPOP'] = [100 - df2['EPOP'].values][0]

In [None]:
df2.tail()

In [None]:
df2[['ECI', 'UPOP']].plot(kind='scatter', x='UPOP', y='ECI')

In [None]:
import statsmodels.api as sm

In [None]:
X = df2['UPOP'] ## X usually means our input variables (or independent variables)
y = df2['ECI'] ## Y usually means our output/dependent variable
X = sm.add_constant(X) ## let's add an intercept (beta_0) to our model

# Note the difference in argument order
model = sm.OLS(y, X).fit() ## sm.OLS(output, input)
predictions = model.predict(X)

# Print out the statistics
model.summary()