Generate State Value Groupings for Chartbook Maps
=======

*Brian Dew*<br>
*September 17, 2017*<br>
*@bd_econ*

In [1]:
# Import preliminaries
import requests
import pandas as pd
import config   ## File with API key
api_key = config.key

# Load list of state names and postal abbreviations
states = pd.read_csv('state_abbrevs.csv').set_index('State')

## Mapping state-level changes to real GDP

Data source: U.S. Bureau of Economic Analysis

https://www.bea.gov/newsreleases/regional/gdp_state/qgsp_newsrelease.htm

In [2]:
# Components of request
base = 'https://www.bea.gov/api/data/?&UserID={}'.format(api_key)
m = '&method=GetData'
dsn = '&datasetname=RegionalProduct'
ind = '&IndustryId=1'
comp = '&Component=RGDP_SQN'
geo = '&GeoFIPS=STATE'
year = '&Year=LAST5'
fmt = '&ResultFormat=json'

# Combined url for request
url = '{}{}{}{}{}{}{}{}'.format(base, m, dsn, ind, comp, geo, year, fmt)

In [3]:
# Make request and load data into pandas dataframe, show last 5 obs
r = requests.get(url).json()
df = pd.DataFrame(r['BEAAPI']['Results']['Data'])
df = df[df['GeoName'].isin(states.index)]
df['date'] = pd.to_datetime(df['TimePeriod'])
df = df.sort_values(['GeoName', 'date'])
df = df.set_index(['GeoName', 'date'])['DataValue']
df = df.apply(pd.to_numeric, errors='coerce').unstack(0)

In [4]:
# Calculate growth rate by state
for s in df.keys():
    df[s] = ((1 + (df[s] - df[s].shift(1))/df[s].shift(1))**4 - 1) * 100

In [5]:
date = '{} Q{}'.format(df.index[-1].year, df.index[-1].quarter)
df1 = df.tail(1).stack().reset_index()[['GeoName', 0]]
df1['Abbrev'] = df1['GeoName'].map(states.to_dict()['Abbreviation'])
df1 = df1.set_index('GeoName').set_value('Michigan', 'Abbrev', 'MI, SP')

gdp_rng = [(-10, -5), (-5, -2), (-2, 0), (0, 2), (2, 5), (5, 10)]

In [6]:
for i, val in enumerate(gdp_rng):
    print(', '.join(df1[(df1[0] > val[0]) & 
                       (df1[0] <= val[1])]['Abbrev'].values))


IA, NE, SD
HI, KS, MN, MT
AL, AK, AZ, AR, CA, CO, CT, DE, DC, FL, GA, IL, IN, KY, LA, ME, MA, MI, SP, MS, MO, NV, NH, NJ, NY, NC, ND, OH, OK, OR, PA, RI, SC, TN, UT, VT, WY
ID, MD, NM, TX, VA, WA, WV, WI



## Employment-Population Ratio by State

In [10]:
data_url = 'https://www.bls.gov/web/laus/ststdsadata.txt'

cols = ['State', 'Pop', 'LF', 'LFsh', 'Empl', 'EPOP', 'Unempl', 'Unrate']

epop_rng = [(50, 55), (55, 60), (60, 65), (65, 70), (70, 80)]

In [11]:
df = pd.read_table(data_url, header=None).iloc[-54:].reset_index()[0]
month = df[0].strip()
df = df[1:]
df = df.str.strip().str.split('\s\s+', expand=True)
df.columns = cols
df['State'] = [s.split(' .')[0] for s in df['State']]
df = df[(~df['State'].str.contains('city')) & 
        (~df['State'].str.contains('County'))]
df['Abbr'] = df['State'].map(states.to_dict()['Abbreviation'])
df['Abbr'] = df['Abbr'].str.replace('MI', 'MI, SP')
df['EPOP'] = df['EPOP'].astype(float)

In [12]:
for i, val in enumerate(epop_rng):
    print(', '.join(df[(df['EPOP'] > val[0]) & 
                       (df['EPOP'] <= val[1])]['Abbr'].values))

AL, LA, MS, NM, WV
AZ, AR, CA, DE, FL, KY, MI, SP, NV, NY, NC, OH, OK, PA, SC, TN
AK, CT, GA, HI, ID, IL, IN, KS, ME, MA, MO, MT, NJ, OR, RI, TX, VT, VA, WA, WY
CO, DC, IA, MD, MN, NE, NH, SD, UT, WI
ND


## Unemployment Rate by State

In [13]:
lau_url = 'https://download.bls.gov/pub/time.series/la/'
data_url = '{}la.data.3.AllStatesS'.format(lau_url)
state_url = '{}la.state_region_division'.format(lau_url)

state_codes = pd.read_table(state_url)['srd_code'].to_dict()
s = ['LASST{:02}0000000000003'.format(k) for k, v in state_codes.items()]

urate_rng = [(0,3), (3,4), (4,5), (5,6), (6,7), (7,20)]

In [14]:
df = pd.read_table(data_url, sep='\s+')
month, year = df[df['series_id'] == s[0]][['period', 'year']].iloc[-1]
date = pd.to_datetime('{}-{}-01'.format(year, month[1:]))

In [15]:
data = df[(df['series_id'].isin(s)) & 
          (df['period']==month) & 
          (df['year']==year)].reset_index()
data.loc[:, 'state'] = [state_codes[float(s_id[5:7])] 
                        for s_id in data['series_id']]
data.loc[:, 'abbr'] = data['state'].map(
                        states.to_dict()['Abbreviation'])
data.loc[:, 'abbr'] = data['abbr'].str.replace('MI', 'MI, SP')
data.loc[:, 'value'] = data['value'].astype(float)
data = data.dropna()

In [16]:
for i, val in enumerate(urate_rng):
    d_rng = data[(data['value'] > val[0]) & 
                         (data['value'] <= val[1])]['abbr'].values
    if len(d_rng) > 1:
        print(', '.join(d_rng))
    else: 
        print(d_rng.item())

CO, HI, ID, NE, NH, ND, VT
AR, FL, IN, IA, KS, ME, MD, MI, SP, MN, MO, MT, SC, SD, TN, UT, VA, WI, WY
AL, AZ, CT, DE, GA, IL, MA, NV, NJ, NY, NC, OK, OR, PA, RI, TX, WA, WV
CA, KY, LA, MS, OH
DC, NM
AK


#### Convert to .tex input



In [7]:
# Tex strings 
#c_str = '\\tikzset{set state val/.style args={#1}{#1='
#s_str = '\\tikzset{set state val/.list={'

# Color categories
#C1 = '{fill=blue!50!green}}}'
#C2 = '{fill=green!80!blue}}}'
#C3 = '{fill=yellow!70!green}}}'
#C4 = '{fill=orange!60!yellow}}}'
#C5 = '{fill=red!40!orange!80}}}'
#C6 = '{fill=red!80!black!80}}}'

In [8]:
#S1 = ', '.join(map(str, df1[df1[0] >= 5]['Abbrev'].values))
#S2 = ', '.join(map(str, df1[(df1[0] < 5) & (df1[0] >= 2)]['Abbrev'].values))
#S3 = ', '.join(map(str, df1[(df1[0] < 2) & (df1[0] >= 0)]['Abbrev'].values))
#S4 = ', '.join(map(str, df1[(df1[0] < 0) & (df1[0] >= -2)]['Abbrev'].values))
#S5 = ', '.join(map(str, df1[(df1[0] < -2) & (df1[0] >= -5)]['Abbrev'].values))
#S6 = ', '.join(map(str, df1[df1[0] < -5]['Abbrev'].values))

In [9]:
#d = {C1: S1, C2: S2, C3: S3, C4: S4, C5: S5, C6: S6}
#for key, value in d.iteritems():
#    print '{}{}\n{}{}}}}}'.format(c_str, key, s_str, value)