In [1]:
import pandas as pd
import numpy as np
import os
import requests
import json
import config

os.chdir('/home/brian/Documents/CPS/data/clean/')

In [None]:
# Code to update CPI as needed
api_url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'

# API key in config.py which contains: bls_key = 'key'
key = '?registrationkey={}'.format(config.bls_key)

# Series stored as a dictionary
series_dict = {'CUSR0000SA0': 'ALL'}

# Start year and end year
date_r = (1994, 2018)

# Handle dates
dates = [(str(date_r[0]), str(date_r[1]))]
while int(dates[-1][1]) - int(dates[-1][0]) > 10:
    dates = [(str(date_r[0]), str(date_r[0]+9))]
    d1 = int(dates[-1][0])
    while int(dates[-1][1]) < date_r[1]:
        d1 = d1 + 10
        d2 = min([date_r[1], d1+9])
        dates.append((str(d1),(d2)))
        
df = pd.DataFrame()

for start, end in dates:
    # Submit the list of series as data
    data = json.dumps({
        "seriesid": list(series_dict.keys()),
        "startyear": start, "endyear": end})

    # Post request for the data
    p = requests.post(
        '{}{}'.format(api_url, key), 
        headers={'Content-type': 'application/json'}, 
        data=data).json()
    for s in p['Results']['series']:
        col = series_dict[s['seriesID']]
        for r in s['data']:
            date = pd.to_datetime('{} {}'.format(
                r['periodName'], r['year']))
            df.set_value(date, col, float(r['value']))
df = df.sort_index()
# Output results
print('Post Request Status: {}'.format(p['status']))

cpi = (df.iloc[-1] / df)
cpi.to_csv('cpi.csv')

In [None]:
cpi = pd.read_csv('cpi.csv', index_col=[0], parse_dates=True)

In [None]:
# Federal minimum wage--update monthly
fmw = pd.read_csv('fmw.csv', index_col=[0], parse_dates=True)

In [None]:
fed_min_wage = 7.25

data = (pd.read_feather('cps.ft', nthreads=3)
          .filter(items=['HRWAGE', 'PWORWGT', 'HRMONTH'])
          .query(f'HRWAGE >= {fed_min_wage}')
          .sort_values('HRWAGE')
          .assign(WAGE_RANGE = lambda x: pd.cut(x['HRWAGE'], 
                                                list(np.arange(0.125,200,0.25)), 
                                                include_lowest=True),
                  CS = lambda x: x['PWORWGT'].cumsum()))

midpt = data['PWORWGT'].sum() * 0.5

mid_interval = data.iloc[(data['CS']-midpt).abs().argsort()[:1]].WAGE_RANGE.values[0]

wage_bins = list(data['WAGE_RANGE'].unique())

mi_loc = wage_bins.index(mid_interval)
lowval = data[data['WAGE_RANGE'] == wage_bins[mi_loc-1]].iloc[-1].CS
highval = data[data['WAGE_RANGE'] == wage_bins[mi_loc]].iloc[-1].CS

binned_med = ((((midpt - lowval) / (highval - lowval)) * 0.5) + mid_interval.left)

print(f'binned, weighted median: ${binned_med: .2f}')

In [None]:
data.iloc[(data['CS']-midpt).abs().argsort()[:1]].WAGE_RANGE.values[0]

In [None]:
n = wage_bins.index(data.iloc[(data['CS']-midpt).abs().argsort()[:1]].WAGE_RANGE.values[0])
lowval = data[data['WAGE_RANGE'] == wage_bins[n-1]].iloc[-1].CS
highval = data[data['WAGE_RANGE'] == wage_bins[n]].iloc[-1].CS

binned_med = ((((midpt - lowval) / (highval - lowval)) * 0.5) + 
                  data.iloc[(data['CS']-midpt).abs().argsort()[:1]].WAGE_RANGE.values[0].left)

In [None]:
wage_bins[n-1]

In [None]:
data.head()

In [None]:
lowval = data[data['WAGE_RANGE'] == wage_bins[n-1]].iloc[-1].CS
highval = data[data['WAGE_RANGE'] == wage_bins[n]].iloc[-1].CS

binned_med = ((((midpt - lowval) / (highval - lowval)) * 0.5) + 
                  data.iloc[(data['CS']-midpt).abs().argsort()[:1]].WAGE_RANGE.values[0].left)

In [None]:
binned_med = ((((midpt - lowval) / (highval - lowval)) * 0.5) + 
                  data.iloc[(data['CS']-midpt).abs().argsort()[:1]].WAGE_RANGE.values[0].left)

In [None]:
binned_med

In [None]:
data.iloc[(data['CS']-midpt).abs().argsort()[:1]].WAGE_RANGE.values[0]

In [None]:
for month in list(data['HRMONTH'].unique()):
    df = data[data['HRMONTH'] == month]
    df['wage_range'] = pd.cut(df['HRWAGE'], list(np.arange(0.25,1000,0.5)), include_lowest=True)
    df = df.sort_values('HRWAGE')#.dropna(subset=['wage_range'])
    midpt = df['PWORWGT'].sum() * 0.5
    df['cs'] = df['PWORWGT'].cumsum()
    n = list(df['wage_range'].unique()).index(df.iloc[(df['cs']-midpt).abs().argsort()[:1]].wage_range.values[0])
    lowval = df[df['wage_range'] == list(df['wage_range'].unique())[n-1]].iloc[-1].cs
    highval = df[df['wage_range'] == list(df['wage_range'].unique())[n]].iloc[-1].cs
    binned_med = ((((midpt - lowval) / (highval - lowval)) * 0.5) + 
                  df.iloc[(df['cs']-midpt).abs().argsort()[:1]].wage_range.values[0].left)
    print(month)
    print(f'binned, weighted median: ${binned_med: .2f}')