Generate State Value Groupings for Chartbook Maps
=======

*Brian Dew*<br>
*September 17, 2017*<br>
*@bd_econ*

In [1]:
# Import preliminaries
import requests
import pandas as pd
import config   ## File with API key
api_key = config.key

# Load list of state names and postal abbreviations
states = pd.read_csv('state_abbrevs.csv').set_index('State')

## Mapping state-level changes to real GDP

Data source: U.S. Bureau of Economic Analysis

https://www.bea.gov/newsreleases/regional/gdp_state/qgsp_newsrelease.htm

In [2]:
# Components of request
base = 'https://www.bea.gov/api/data/?&UserID={}'.format(api_key)
m = '&method=GetData'
dsn = '&datasetname=RegionalProduct'
ind = '&IndustryId=1'
comp = '&Component=RGDP_SQN'
geo = '&GeoFIPS=STATE'
year = '&Year=LAST5'
fmt = '&ResultFormat=json'

# Combined url for request
url = '{}{}{}{}{}{}{}{}'.format(base, m, dsn, ind, comp, geo, year, fmt)

In [3]:
# Make request and load data into pandas dataframe, show last 5 obs
r = requests.get(url).json()
df = pd.DataFrame(r['BEAAPI']['Results']['Data'])
df = df[df['GeoName'].isin(states.index)]
df['date'] = pd.to_datetime(df['TimePeriod'])
df = df.sort_values(['GeoName', 'date'])
df = df.set_index(['GeoName', 'date'])['DataValue']
df = df.apply(pd.to_numeric, errors='coerce').unstack(0)

In [4]:
df.iloc[-1] - df.iloc[8]

GeoName
Alabama                   4592
Alaska                     422
Arizona                  18977
Arkansas                  1982
California              139322
Colorado                 22036
Connecticut               3304
Delaware                  2624
District of Columbia      4059
Florida                  44151
Georgia                  22898
Hawaii                    2344
Idaho                     3377
Illinois                 22227
Indiana                  13598
Iowa                      4313
Kansas                    3206
Kentucky                  9236
Louisiana                  589
Maine                     1754
Maryland                 14440
Massachusetts            21652
Michigan                 22721
Minnesota                13009
Mississippi               1240
Missouri                  7693
Montana                    558
Nebraska                  2299
Nevada                    9045
New Hampshire             2408
New Jersey               12227
New Mexico                1684


In [5]:
# Calculate growth rate by state
for s in df.keys():
    df[s] = ((1 + (df[s] - df[s].shift(1))/df[s].shift(1))**4 - 1) * 100

In [6]:
date = '{} Q{}'.format(df.index[-1].year, df.index[-1].quarter)
df1 = df.tail(1).stack().reset_index()[['GeoName', 0]]
df1['Abbrev'] = df1['GeoName'].map(states.to_dict()['Abbreviation'])
df1 = df1.set_index('GeoName').set_value('Michigan', 'Abbrev', 'MI, SP')

gdp_rng = [(-10, -5), (-5, -2), (-2, 0), (0, 2), (2, 5), (5, 10)]

  after removing the cwd from sys.path.


In [7]:
df1

Unnamed: 0_level_0,0,Abbrev
GeoName,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,1.23222,AL
Alaska,0.983114,AK
Arizona,2.539475,AZ
Arkansas,-0.014454,AR
California,1.477127,CA
Colorado,3.027893,CO
Connecticut,1.64745,CT
Delaware,1.262049,DE
District of Columbia,2.013588,DC
Florida,2.530964,FL


In [8]:
for i, val in enumerate(gdp_rng):
    print(', '.join(df1[(df1[0] > val[0]) & 
                       (df1[0] <= val[1])]['Abbrev'].values))



AR, ND
AL, AK, CA, CT, DE, GA, ID, IL, IN, KS, ME, MD, MA, MI, SP, MN, MS, NE, NV, NH, NJ, NM, NY, OH, OK, OR, RI, SC, WV, WI
AZ, CO, DC, FL, HI, IA, KY, LA, MO, MT, NC, PA, SD, TN, TX, UT, VT, VA, WA, WY



In [9]:
for i, val in enumerate(gdp_rng):
    print(', '.join(df1[(df1[0] > val[0]) & 
                       (df1[0] <= val[1])]['Abbrev'].values))



AR, ND
AL, AK, CA, CT, DE, GA, ID, IL, IN, KS, ME, MD, MA, MI, SP, MN, MS, NE, NV, NH, NJ, NM, NY, OH, OK, OR, RI, SC, WV, WI
AZ, CO, DC, FL, HI, IA, KY, LA, MO, MT, NC, PA, SD, TN, TX, UT, VT, VA, WA, WY



## Employment-Population Ratio by State

In [2]:
data_url = 'https://www.bls.gov/web/laus/ststdsadata.txt'

cols = ['State', 'Pop', 'LF', 'LFsh', 'Empl', 'EPOP', 'Unempl', 'Unrate']

epop_rng = [(50, 55), (55, 60), (60, 65), (65, 70), (70, 80)]

In [3]:
df = pd.read_table(data_url, header=None).iloc[-54:].reset_index()[0]
month = df[0].strip()
df = df[1:]
df = df.str.strip().str.split('\s\s+', expand=True)
df.columns = cols
df['State'] = [s.split(' .')[0] for s in df['State']]
df = df[(~df['State'].str.contains('city')) & 
        (~df['State'].str.contains('County'))]
df['Abbr'] = df['State'].map(states.to_dict()['Abbreviation'])
df['Abbr'] = df['Abbr'].str.replace('MI', 'MI, SP')
df['EPOP'] = df['EPOP'].astype(float)

In [4]:
for i, val in enumerate(epop_rng):
    print(i)
    print(', '.join(df[(df['EPOP'] > val[0]) & 
                       (df['EPOP'] <= val[1])]['Abbr'].values))

0
AL, MS, NM, WV
1
AZ, AR, CA, FL, KY, LA, MI, SP, MT, NV, NJ, NY, NC, OH, OK, PA, SC, TN
2
AK, CT, DE, GA, HI, ID, IL, IN, KS, ME, MD, MA, MO, OR, RI, TX, VA, WA, WY
3
CO, DC, IA, MN, NE, NH, ND, SD, UT, VT, WI
4



In [5]:
month

'July 2018'

## Unemployment Rate by State

In [6]:
lau_url = 'https://download.bls.gov/pub/time.series/la/'
data_url = '{}la.data.3.AllStatesS'.format(lau_url)
state_url = '{}la.state_region_division'.format(lau_url)

state_codes = pd.read_table(state_url)['srd_code'].to_dict()
s = ['LASST{:02}0000000000003'.format(k) for k, v in state_codes.items()]

urate_rng = [(0,3), (3,4), (4,5), (5,6), (6,7), (7,20)]

In [10]:
df = pd.read_table(data_url, sep='\s+')
month, year = df[df['series_id'] == s[0]][['period', 'year']].iloc[-1]
date = pd.to_datetime('{}-{}-01'.format(year, month[1:]))

In [11]:
df

Unnamed: 0,series_id,year,period,value,footnote_codes
0,LASST010000000000003,1976,M01,6.7,s
1,LASST010000000000003,1976,M02,6.7,s
2,LASST010000000000003,1976,M03,6.6,s
3,LASST010000000000003,1976,M04,6.5,s
4,LASST010000000000003,1976,M05,6.4,s
5,LASST010000000000003,1976,M06,6.5,s
6,LASST010000000000003,1976,M07,6.6,s
7,LASST010000000000003,1976,M08,6.8,s
8,LASST010000000000003,1976,M09,6.9,s
9,LASST010000000000003,1976,M10,7.0,s


In [12]:
data = df[(df['series_id'].isin(s)) & 
          (df['period']==month) & 
          (df['year']==year)].reset_index()
data.loc[:, 'state'] = [state_codes[float(s_id[5:7])] 
                        for s_id in data['series_id']]
data.loc[:, 'abbr'] = data['state'].map(
                        states.to_dict()['Abbreviation'])
data = data.dropna()
data.loc[:, 'abbr'] = data['abbr'].str.replace('MI', 'MI, SP')
data.loc[:, 'value'] = data['value'].astype(float)

In [13]:
for i, val in enumerate(urate_rng):
    print(i)
    d_rng = data[(data['value'] > val[0]) & 
                         (data['value'] <= val[1])]['abbr'].values
    if len(d_rng) > 1:
        print(', '.join(d_rng))
    elif len(d_rng) == 1: 
        print(d_rng.item())
    else:
        print()

0
CO, HI, ID, IA, ME, MN, NE, NH, ND, VT, WI
1
AR, DE, FL, GA, IN, KS, MA, MO, MT, OK, OR, SC, SD, TN, TX, UT, VA, WY
2
AL, AZ, CA, CT, IL, KY, LA, MD, MI, SP, MS, NV, NJ, NM, NY, NC, OH, PA, RI, WA
3
DC, WV
4
AK
5



In [None]:
data

#### Convert to .tex input



In [None]:
# Tex strings 
#c_str = '\\tikzset{set state val/.style args={#1}{#1='
#s_str = '\\tikzset{set state val/.list={'

# Color categories
#C1 = '{fill=blue!50!green}}}'
#C2 = '{fill=green!80!blue}}}'
#C3 = '{fill=yellow!70!green}}}'
#C4 = '{fill=orange!60!yellow}}}'
#C5 = '{fill=red!40!orange!80}}}'
#C6 = '{fill=red!80!black!80}}}'

In [None]:
#S1 = ', '.join(map(str, df1[df1[0] >= 5]['Abbrev'].values))
#S2 = ', '.join(map(str, df1[(df1[0] < 5) & (df1[0] >= 2)]['Abbrev'].values))
#S3 = ', '.join(map(str, df1[(df1[0] < 2) & (df1[0] >= 0)]['Abbrev'].values))
#S4 = ', '.join(map(str, df1[(df1[0] < 0) & (df1[0] >= -2)]['Abbrev'].values))
#S5 = ', '.join(map(str, df1[(df1[0] < -2) & (df1[0] >= -5)]['Abbrev'].values))
#S6 = ', '.join(map(str, df1[df1[0] < -5]['Abbrev'].values))

In [None]:
#d = {C1: S1, C2: S2, C3: S3, C4: S4, C5: S5, C6: S6}
#for key, value in d.iteritems():
#    print '{}{}\n{}{}}}}}'.format(c_str, key, s_str, value)