In [3]:
import os
os.chdir('/home/brian/Documents/CPS/data/clean/')

In [33]:
import pandas as pd
import numpy as np
import itertools
import datetime as dt1
import matplotlib as mpl
import matplotlib.pyplot as plt
import wquantiles

age_tuple = (18, 64)
path_feather = ''

# Age and sex demographics in March 2001
filter_cols = ['AGE','BASICWGT', 'MONTH','IND03D','OCC03M','HRSUSL1','WORKFT', 'USLFT', 'PWORWGT','HRWAGE', 'PRICEADJ']

# Python function returns percentile using interpolation
def binned_wage2(df, wage_var='HRWAGE', perc=0.5, 
                bins=np.arange(-0.25, 300, 0.5)):
    '''
    Returns wage estimate based on linear interpolation through 
    the bin containing the wage.
    
    perc = percentile of interest (0.5 is median)
    bins = list of bin start locations
    '''
    cdf = (df.groupby(pd.cut(df[wage_var], bins))
             .PWORWGT.sum().cumsum() / df.PWORWGT.sum())
    
    return np.interp(perc, cdf, bins[1:])

rhrwage = lambda x: x.HRWAGE * x.PRICEADJ

ind_dict={'Agriculture':1,
 'Forestry_logging':2,
 'Mining':3,
 'Construction':4}

occ_dict={'prodn_occpn':9,'sales_related_occpn':4}
ind_occ_list=list(itertools.product(list(ind_dict.keys()),list(occ_dict.keys())))
results = {}
for idx,val in enumerate(ind_occ_list):
    results[val[0]]={}
    results[val[0]][val[1]]=pd.DataFrame()
    for year in range(2019, 2021):
        annual_data = (pd.read_feather(f'{path_feather}//cps{year}.ft', columns=filter_cols)
                        .query(f'{age_tuple[0]} <= AGE <= {age_tuple[1]} and HRSUSL1 >= 35 and OCC03M == 9 and HRWAGE > 0 and WORKFT == 1 and PWORWGT > 0 and IND03D == {ind_dict[val[0]]}')
                        .assign(RHRWAGE = rhrwage))
        for month, df in annual_data.groupby('MONTH'):
            # df=annual_data.query(f"'MONTH'=={month}")        
            date = pd.to_datetime(f'{year}-{month}-01')
            results[val[0]][val[1]].at[date, 'wq_wage'] = wquantiles.median(df['HRWAGE'], df['PWORWGT'])
            results[val[0]][val[1]].at[date, 'bw'] = binned_wage2(df)

In [34]:
results

{'Agriculture': {'sales_related_occpn':               wq_wage         bw
  2019-01-01  19.020000  19.000000
  2019-02-01   7.211500   7.000000
  2019-03-01  16.259642  17.924459
  2019-04-01  16.565003  15.191605
  2019-05-01  16.000000  16.031366
  2019-06-01  23.059999  23.000000
  2019-07-01  42.000000  42.000000
  2019-08-01  15.588489  15.825183
  2019-11-01  17.000000  17.000000
  2019-12-01  16.965986  16.212837
  2020-01-01  12.000000  12.000000
  2020-02-01  24.839750  25.000000
  2020-03-01  21.500000  21.500000
  2020-04-01  16.220330  16.783271
  2020-05-01  13.490110  12.185347
  2020-06-01  16.850000  17.000000
  2020-07-01  16.698788  16.793628
  2020-08-01  14.185547  15.792685
  2020-11-01  19.446164  19.058897
  2020-12-01  14.000000  14.000000},
 'Forestry_logging': {'sales_related_occpn':               wq_wage         bw
  2019-04-01  12.449462   1.655970
  2019-05-01  18.500000  18.500000
  2019-06-01  20.539100  20.049289
  2019-08-01  23.332766  27.388371
  2019-

In [8]:
df = pd.read_feather('cps2019.ft')

In [9]:
for key in df.keys():
    print(key)

HHID
MONTH
YEAR
FAMINC
HHWGT
MIS
HHID2
CBSA
GTCBSASZ
CSA
PARENT
SPOUSE
PRDTRACE
LINENO
FAMNUM
PRFAMREL
HRSUSL1
HRSUSL2
HRSUSLT
HRSACT1
HRSACT2
HRSACTT
UNEMPDUR
PRWKSTAT
PRAGNA
IND03D
IND203D
OCC03D
OCC203D
IND03M
IND203M
OCC03M
OCC203M
PTWK
PWFMWGT
PWLGWGT
PWORWGT
PWSSWGT
PRCHLD
PRNMCHLD
WKEARNFLG
QSTNUM
OCCURNUM
BASICWGT
IND12
OCC10
IND212
OCC210
AGE
FEMALE
STATE
REGION
EDUCDT
EDUC
SCHENR
SCHOOL
RETIRED
MARRIED
WBHAO
WBAO
HISPANIC
VETERAN
FORBORN
CITIZEN
UNEMPTYPE
JLTYPE
LAYOFF
PTECON
USLFT
WORKFT
FTLF
SAMEEMP
CHDUTIES
SAMEACT
CHJOBACT
NOTATWORK
ABSTYPE
ABSPAID
PTREASON
WANTFT
DWTYPE
PAIDHRLY
PROXY
LFS
COW1
COW2
INDGRP
MANAGER
MJH
NUMJOBS
NILFREASON
NLFFAM
WKEARN
HRWAGE
OTCAMT
PRICEADJ
MINWAGE
CTYBIRTH
WBHAOM
HISPDT03
HISPDT
ATLFLG
INDM
ASIANDT
CERT
DISABILITY
CPSID
COUNTY
METSTAT
MPCSTAT
WKEARNADJ
HRSUSL1I
HRWAGEADJ
UNION
UNIONMEM


In [28]:
df.query('USLFT == -4')

Unnamed: 0,AGE,BASICWGT,MONTH,IND03D,OCC03M,HRSUSL1,WORKFT,USLFT,PWORWGT,HRWAGE
