In [2]:
import pandas as pd
import numpy as np

In [3]:
def binned_wage(group, wage_var='WKWAGE', percentile=0.1, bins=list(np.arange(25, 3000, 50.0)), bin_size=50.0):
    """Return BLS-styled binned decile/percentile wage"""
    
    import pandas as pd
    import numpy as np
    
    # Use ORG weight since wage defined only for ORG sample
    weight = 'PWORWGT'
    
    # Cut wage series according to bins of bin_size
    bin_cut = lambda x: pd.cut(x[wage_var], bins, include_lowest=True)
    
    # Calculate cumulative sum for weight variable
    cum_sum = lambda x: x[weight].cumsum()
    
    # Sort wages then apply bin_cut and cum_sum
    df = (group.sort_values(wage_var)
               .assign(WAGE_BIN = bin_cut, CS = cum_sum))
    
    # Find the weight at the percentile of interest
    pct_wgt = df[weight].sum() * percentile

    # Find wage bin for person nearest to weighted percentile
    pct_bin = df.iloc[df['CS'].searchsorted(pct_wgt)].WAGE_BIN
    
    # Weight at bottom and top of bin
    wgt_btm, wgt_top = (df.loc[df['WAGE_BIN'] == pct_bin, 'CS']
                          .iloc[[0, -1]].values)
    
    # Find where in the bin the percentile is and return that value
    pct_value = ((((pct_wgt - wgt_btm) / 
                   (wgt_top - wgt_btm)) * bin_size) + pct_bin.left)
    
    return pct_value

In [14]:
path = '/home/brian/Documents/CPS/data/clean'
data1, data2 = pd.Series(), pd.Series()
columns = ['MONTH', 'YEAR', 'AGE', 'PWORWGT', 'WKWAGE', 'HRSUSL1']
for year in range(1989, 2020):
    df = (pd.read_feather(f'{path}/cps{year}.ft', columns=columns)
        .query('WKWAGE > 0 and HRSUSL1 >= 35'))
    data = df.groupby(['YEAR', 'MONTH']).apply(binned_wage)
    data.index = [pd.to_datetime(f'{ti[0]}-{ti[1]}-01') for ti in data.index]
    data1 = data1.append(data)
    
df = pd.DataFrame({'All': data1})

In [6]:
%matplotlib inline

In [15]:
df.loc['2015']

Unnamed: 0,All
2015-01-01,388.204247
2015-02-01,386.799389
2015-03-01,386.708309
2015-04-01,386.238328
2015-05-01,394.42423
2015-06-01,392.124004
2015-07-01,392.735574
2015-08-01,383.872513
2015-09-01,395.233653
2015-10-01,399.011286
