# Data_Collector: Jobs Report
------

Updated: January 23, 2018

Status: Collects series, need to do calculations and export

Descr: Python code for reading the BLS jobs report on the first friday of the month, at 8:30am. [PDF release](https://www.bls.gov/news.release/pdf/empsit.pdf)

To Do: Text calculations. Make charts. combine charts and save as picture.

------




#### Packages - Run early

In [1]:
import sys # Check which version of python is being used
print(f'python {sys.version_info[0]}.{sys.version_info[1]}')
import pandas as pd    # Pandas to organize and make calcs
print(f'pandas {pd.__version__}')
from pandas_datareader.data import DataReader as pdr 
import requests        # Make requests to econ data APIs
import json            # Work with JSON data
import config          # Local file config.py with BLS API key

python 3.6
pandas 0.22.0


#### Define custom functions - run early

In [2]:
# Define some tools to use later in calculations
def pr(series):
    '''Multiply by 100 and round'''
    return (series * 100).round(1)

def agr(series):
    '''Calculate percent change from year ago'''
    return pr(series.pct_change(12))

def gr3(series):
    '''Calculate 3 month moving average and annualize'''
    return pr(((series.rolling(3).sum().pct_change(3) + 1) ** 4) - 1)

# Write text for chart annotations
def wt(filename, filetext):
    '''Write string to txt file'''
    with open(filename, 'w') as text_file:
        text_file.write(filetext)

#### Parameters - Inspect and run early

In [3]:
# API access to Bureau of Labor Statistics data
url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'
# API key in config.py which contains: bls_key = 'key'
key = '?registrationkey={}'.format(config.bls_key)
# Series of interest, format is: ('Fred ID', 'BLS ID')
s = [('PAYEMS', 'CES0000000001'), ('UNRATE', 'LNS14000000'), 
     ('U6RATE', 'LNS13327709'), ('LNS12300060', 'LNS12300060'), 
     ('AHETPI', 'CES0500000008'), ('LNS12032197', 'LNS12032197'), 
     ('LNS12032200', 'LNS12032200'), ('LNS12035019', 'LNS12035019'),
     ('LNS13025703', 'LNS13025703')]

headers = {'Content-type': 'application/json'}    # Request json fmt
param = {"seriesid": [i[1] for i in s],           
         "startyear": "2016", "endyear": "2018"}  # Update for 2018

#### Use BLS text table with series info to collect the name of each series in s - run early

In [4]:
d = {}    # Dictionary to fill with series names
for i in ['ln', 'ce']:
    url = f'https://download.bls.gov/pub/time.series/{i}/{i}.series'
    t = pd.read_table(url, sep=r'\s*\t', engine='python')
    t = t[t['series_id'].isin([i[1] for i in s])].set_index('series_id')['series_title']
    for k, v in t.items(): d[k] = v

#### Get long-term values from FRED - Inspect and run early

In [5]:
# Start and end date in datetime format
rng = pd.to_datetime(['1973-01-01', '2015-12-31', '2017-12-31'])
dft = pdr([i[0] for i in s], 'fred', rng[0], rng[1])
dft.columns = [i[1] for i in s]  # Rename to match with BLS

#### Request Jobs Report data from BLS API - run at 8:31

In [None]:
# Request all of the series in s
p = requests.post(f'{url}{key}', data=json.dumps(param), headers=headers).json()
print(f"Status: {p['status']}") # Print request status
p = p['Results']['series']  # Keep data portion of post results

# Use first series to get the datetime values
idx = pd.to_datetime([f"{i['period']} {i['year']}" for i in p[0]['data']])
print(f'Latest: {idx[0]:%B %Y}') # Print latest value

#### Clean up and make calculations for series of interest

In [None]:
# Build empty dataframe df from the API results, p
df = pd.DataFrame()
for s in p: df[s['seriesID']] = pd.Series(index = idx, 
    data = [i['value'] for i in s['data']], dtype='float').iloc[::-1]
df = dft.append(df) # Merge previous data with new BLS data

# Calculate indicators of interest (pr, gr3, agr defined above)
df['ptec'] = pr(df['LNS12032197'] / df['LNS12035019']) #PT economic
df['ptnec'] = pr(df['LNS12032200'] / df['LNS12035019']) #PT non-econ
df['payroll'] = df['CES0000000001'].diff() # Payroll growth
df['ahe'] = gr3(df['CES0500000008'])     # AHE 3m/3m annualized
df['ahe2'] = agr(df['CES0500000008'])    # AHE 12m pct change
df['cpi'] = agr(pdr('CPIAUCSL', 'fred', rng[0], rng[2]))  # CPI

#### Chart data and annotation text

In [7]:
dft

Unnamed: 0_level_0,CES0000000001,LNS14000000,LNS13327709,LNS12300060,CES0500000008,LNS12032197,LNS12032200,LNS12035019,LNS13025703
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1973-01-01,75621,4.9,,69.7,4.03,2043,9701,79705,9.5
1973-02-01,76017,5.0,,70.1,4.04,2182,10202,80497,8.4
1973-03-01,76285,4.9,,70.2,4.06,2172,10303,80983,8.9
1973-04-01,76455,5.0,,70.3,4.08,2150,10462,81152,7.5
1973-05-01,76646,4.9,,70.4,4.10,2216,10412,81272,8.0
1973-06-01,76887,4.9,,70.5,4.12,2543,10094,81676,7.5
1973-07-01,76911,4.8,,70.7,4.15,2490,9915,81759,6.5
1973-08-01,77166,4.8,,70.5,4.16,2444,10287,81779,7.7
1973-09-01,77276,4.8,,70.7,4.19,2424,10620,82146,6.8
1973-10-01,77606,4.6,,70.9,4.21,2424,10738,82563,8.0


#### Use BLS text files to look up series IDs

In [None]:
ln_series = 'https://download.bls.gov/pub/time.series/ln/ln.series'
ln = pd.read_table(ln_series, sep=r'\s*\t', engine='python')

In [21]:
ln[(ln['series_title'].str.contains('Unemployment Rate')) & 
   (ln['series_title'].str.contains('Black')) & 
   (ln['periodicity_code'] == 'M') & 
   (ln['seasonal'] == 'S')][['series_id', 'series_title']]

Unnamed: 0,series_id,series_title
915,LNS14000006,(Seas) Unemployment Rate - Black or African Am...
917,LNS14000007,(Seas) Unemployment Rate - Black or African Am...
919,LNS14000008,(Seas) Unemployment Rate - Black or African Am...
935,LNS14000018,"(Seas) Unemployment Rate - 16-19 yrs., Black o..."
937,LNS14000019,"(Seas) Unemployment Rate - 16-19 yrs., Black o..."
939,LNS14000020,"(Seas) Unemployment Rate - 16-19 yrs., Black o..."
959,LNS14000030,"(Seas) Unemployment Rate - 20 yrs. & over, Bla..."
961,LNS14000031,"(Seas) Unemployment Rate - 20 yrs. & over, Bla..."
963,LNS14000032,"(Seas) Unemployment Rate - 20 yrs. & over, Bla..."


In [15]:
ln_series = 'https://download.bls.gov/pub/time.series/ln/ln.series'
ln = pd.read_table(ln_series, sep=r'\s*\t', engine='python')

In [None]:
ce['series_id'].str

In [9]:
ce_series = 'https://download.bls.gov/pub/time.series/ce/ce.series'
ln_series = 'https://download.bls.gov/pub/time.series/ln/ln.series'

In [None]:
s[0][1]

In [None]:
d = {}
for i in ['ln', 'ce']:
    url = f'https://download.bls.gov/pub/time.series/{i}/{i}.series'
    t = pd.read_table(url, sep=r'\s*\t', engine='python')
    t = t[t['series_id'].isin([i[1] for i in s])].set_index('series_id')['series_title']
    for k, v in t.items(): d[k] = v

In [None]:
t[t['series_id'].isin([i[1] for i in s])].set_index('series_id')['series_title']

In [None]:
    
ce_series = 'https://download.bls.gov/pub/time.series/ce/ce.series'
ln_series = 'https://download.bls.gov/pub/time.series/ln/ln.series'
ce = pd.read_table(ce_series, sep=r'\s*\t', engine='python')[['series_id', 'series_title']]
ln = pd.read_table(ln_series, sep=r'\s*\t', engine='python')[['series_id', 'series_title']]

ce[ce['series_id'].isin([i[1] for i in s])].append(ln[ln['series_id'].isin([i[1] for i in s])])

In [None]:
ce['series_id'][0]

In [None]:
ce[ce['series_id'].isin([i[1] for i in s])].append(ln[ln['series_id'].isin([i[1] for i in s])])

In [None]:
ce['series_id'][0]

In [None]:
ce.iloc[0]['series_title']

In [None]:
ce

In [None]:
df['ahe'] = df['CES0500000008'].rolling(3).sum().pct_change(3)
df['ahe'] = ((((df['ahe']+1)**4)-1)*100).round(1) # annualize

In [None]:
(((df['CES0500000008'].rolling(3).sum().pct_change(3)+1)**4)-1)*100 

In [None]:
df['ahe3'] = agr(df['CES0500000008'])
df['ahe2'] = (df['CES0500000008'].pct_change(12) * 100).round(1)

df[['ahe2', 'ahe3']].tail()

In [None]:
d

In [None]:
cpi.columns = ['cpi']

In [None]:
cpi

In [None]:
cpi = pdr(['CPIAUCSL'], 'fred', rng[0], pd.to_datetime('2017-12-31'))

In [None]:
cpi

In [None]:
df = df.join(cpi.pct_change(12) * 100)

In [None]:
df['ahe2'] = df['CES0500000008'].pct_change(12) * 100

In [None]:
%matplotlib inline
df.loc['1995-01-01':,['LNS14000000', 'LNS13327709']].plot()

In [None]:
df.loc['1995-01-01':,'LNS12300060'].plot()

In [None]:
df.loc['1995-01-01':,['ahe2', 'ahe', 'CWSR0000SA0']].plot()

In [None]:
df.loc['1994-01-01':,['ptnec', 'ptec']].rolling(12).mean().plot()

In [None]:
df.loc['1995-01-01':,'payroll'].plot()

In [None]:
df = df_prev.append(df)

In [None]:
(df['CES0500000003'].rolling(3).mean().pct_change(3)
             .add(1).pow(4).subtract(1).multiply(100).round(1))

In [None]:
df_prev.columns = [s_id for name, s_id in s]

In [None]:
df_prev

In [None]:
df

In [None]:
pd.to_datetime(['1973-01-01', '2015-12-31'])[0]

### Turn json data into a pandas dataframe

In [None]:
# Build dataframe df from the API results, p
df = pd.DataFrame() # Empty dataframe to fill with values
for s in p: df[s['seriesID']] = pd.Series(index = idx, 
        data = [i['value'] for i in s['data']]
        ).astype(float).iloc[::-1]

### Calculate series for graphs

In [None]:
print(f"Latest: {idx[0]:%B %Y}") 

In [None]:
today = datetime.datetime.now()
print(f'Cell run: {today:%b %d, %Y}')

In [None]:
['{} {}'.format(i['period'], i['year']) for i in r]

In [None]:
idx = pd.to_datetime([f"{i['period']} {i['year']}" for i in p[0]['data']])

In [None]:
dd = {}
for x, y in s:
    if 
    dd[x]
    data = {param['seriesid'][0]: [float(i['value']) for i in r]}

In [None]:
# Empty dictionary to fill with values
dd = {}

# Build a pandas series from the API results, p
for s in p:
    dd[s['seriesID']] = pd.Series(index = idx, 
        data = [i['value'] for i in s['data']]
        ).astype(float).iloc[::-1]

# Combine series into one pandas dataframe
df = pd.DataFrame(dd)

In [None]:
p

In [None]:
# List comprehensions - clean json data and covert the 
# date, value, and footnote into pandas dataframe columns
dates = ['{} {}'.format(i['period'], i['year']) for i in r]
index = pd.to_datetime(dates)
data = {param['seriesid'][0]: [float(i['value']) for i in r]}
# .iloc[::-1] reverses the sorting/order of the dataframe
df = pd.DataFrame(index=index, data=data).iloc[::-1]