In [57]:
# Import python packages
import pandas as pd
import numpy as np
import calendar

In [80]:
# Generate a list of file names matching the Census microdata files
latest_month = '7/1/2018'
#latest_month = '12/1/2015'
cps_files = [f'{calendar.month_abbr[m.month].lower()}{m.year % 100}pub.dat' 
               for m in pd.date_range(end=latest_month, periods=24, freq='MS')]

In [81]:
# Microdata files are fixed-width
# The variable locations are from the data dictionary
cps_vars = [('GTCBSA', 95, 100),     # CBSA (geo area)
            ('PRTAGE', 121, 123),    # Person's age
            ('PRDTOCC1', 475, 477),     # Person's occupation 1
            ('PRDTOCC2', 477, 479),     # Person's occupation 2
            ('PWCMPWGT', 845, 855)]  # Person's composite weight

path = 'E:/08_Other/Archive/data'  # Location of microdata files

In [82]:
d = [] # Empty dictionary to store monthly files
for file in cps_files:
    d += [tuple(int(line[i[1]:i[2]]) for i in cps_vars) 
          for line in open(f'{path}/{file}', 'rb')]

df = (pd.DataFrame(d, columns=[v[0] for v in cps_vars])
      .query('PWCMPWGT > 0 and GTCBSA > 0 and PRTAGE < 65'))

In [83]:
# Check sample size to make sure I have at least 500 observations
min_obs = df.groupby('GTCBSA')['PRDTOCC1'].agg('count').min()
max_obs = df.groupby('GTCBSA')['PRDTOCC1'].agg('count').max()

print(f'Between {min_obs} and {max_obs} observations per CBSA')

Between 535 and 81063 observations per CBSA


In [84]:
#bc = [14, 18, 19, 20, 21, 22]
bc = [21]

In [85]:
# Calculate disabled NILF share of age group, by CBSA
bcw = lambda x: np.average(np.where((x['PRDTOCC1'].isin(bc)) | (x['PRDTOCC2'].isin(bc)), 1, 0), weights=x['PWCMPWGT'])
bcw_list = df.groupby('GTCBSA').apply(bcw)
bcw_list.index = bcw_list.index.map(str)

In [86]:
bcw2016 = bcw_list.sort_values()

In [88]:
bcw2016.sort_values()

GTCBSA
12700    0.006852
47900    0.011372
29700    0.012323
15680    0.014355
37860    0.014407
27980    0.014508
42020    0.014910
15180    0.015730
23540    0.016031
25940    0.016221
12580    0.017460
27260    0.017486
17020    0.017586
14500    0.017601
14010    0.017610
15980    0.017747
27340    0.018057
38940    0.018202
34940    0.018761
35620    0.018982
44100    0.019001
36740    0.019248
35840    0.019707
12420    0.019835
25420    0.019940
10740    0.020521
42140    0.020603
12100    0.020911
33100    0.021017
45940    0.021085
           ...   
16540    0.081663
47220    0.081884
29540    0.082079
22420    0.082132
40420    0.082529
24340    0.084133
43900    0.084231
24660    0.085426
49620    0.085643
15940    0.086007
45820    0.086267
28700    0.087040
15500    0.087939
17420    0.089247
39540    0.090603
23580    0.092723
22900    0.092809
34740    0.098418
48140    0.099132
45780    0.102667
27100    0.102879
43780    0.103935
14540    0.104444
11540    0.110446
129

In [87]:
(bcw2016-bcw2014).dropna().sort_values()

GTCBSA
24140   -0.052003
11100   -0.049115
27740   -0.043996
45460   -0.043470
21780   -0.042114
24780   -0.038354
23540   -0.034839
41420   -0.034223
24580   -0.033293
22520   -0.033233
47940   -0.030339
46540   -0.029296
14020   -0.028351
40420   -0.027374
30980   -0.027255
13140   -0.026157
28420   -0.025073
33780   -0.024458
48700   -0.023470
17900   -0.022660
12020   -0.021874
18580   -0.021624
32780   -0.021415
44140   -0.020687
48660   -0.020000
46340   -0.019645
22660   -0.017832
25420   -0.017799
10180   -0.017258
47380   -0.016400
           ...   
25860    0.016724
26980    0.016983
38220    0.017033
31420    0.017919
20100    0.018240
42200    0.018603
22900    0.018796
11700    0.018877
34820    0.019455
20500    0.020077
17300    0.020560
39140    0.020864
24020    0.021028
28700    0.021337
13460    0.021673
46700    0.021882
28660    0.022041
17420    0.022437
27780    0.023241
45820    0.023837
42220    0.024433
25180    0.024538
14540    0.025664
27100    0.025886
289

In [None]:
df = pd.DataFrame(data, columns=[v[0] for v in dd_sel_var]).query('PWCMPWGT > 0 and GTCBSA > 0'))

In [11]:
df.groupby('GTCBSA')['PEMLR'].agg('count').sort_values().loc[41540]

7971

In [25]:
d

[<generator object <genexpr> at 0x0000021141D69200>,
 <generator object <genexpr> at 0x00000211257F18E0>,
 <generator object <genexpr> at 0x0000021141D69360>,
 <generator object <genexpr> at 0x0000021141D69468>,
 <generator object <genexpr> at 0x0000021141D69518>,
 <generator object <genexpr> at 0x0000021141D695C8>,
 <generator object <genexpr> at 0x0000021141D69678>,
 <generator object <genexpr> at 0x0000021141D69728>,
 <generator object <genexpr> at 0x0000021141D697D8>,
 <generator object <genexpr> at 0x0000021141D69888>,
 <generator object <genexpr> at 0x0000021141D69938>,
 <generator object <genexpr> at 0x0000021141D699E8>,
 <generator object <genexpr> at 0x0000021141D69A98>,
 <generator object <genexpr> at 0x0000021141D69B48>,
 <generator object <genexpr> at 0x0000021141D69BF8>,
 <generator object <genexpr> at 0x0000021141D69CA8>,
 <generator object <genexpr> at 0x0000021141D69D58>,
 <generator object <genexpr> at 0x0000021141D69E08>,
 <generator object <genexpr> at 0x0000021141D6

In [12]:
file

'jul18pub.dat'

In [14]:
# Convert raw data into a list of tuples
data = [tuple(int(line[i[1]:i[2]]) for i in cps_vars) 
        for line in open(f'{path}/{file}', 'rb')]

In [15]:
data

[(26620, -1, -1, -1),
 (26620, 61, 1, 19878448),
 (26620, 64, 5, 17050404),
 (33860, -1, -1, -1),
 (13820, 63, 5, 17050404),
 (13820, 71, 5, 19244555),
 (33860, 31, 1, 23135183),
 (33860, 34, 1, 28380323),
 (33860, 4, -1, 0),
 (33860, 64, 1, 20909703),
 (33860, 66, 5, 20343360),
 (33860, 65, 5, 19060049),
 (26620, 30, 1, 22542404),
 (26620, 66, 5, 22263811),
 (26620, 68, 5, 17662854),
 (26620, 2, -1, 0),
 (26620, 8, -1, 0),
 (26620, 61, 5, 19871268),
 (26620, 56, 1, 24826396),
 (26620, 54, 7, 24404325),
 (26620, 24, 1, 26522598),
 (33660, -1, -1, -1),
 (33660, 64, 5, 23188004),
 (33660, 38, 1, 20753495),
 (33660, -1, -1, -1),
 (13820, 62, 5, 21378000),
 (13820, 66, 5, 24541066),
 (13820, 36, 7, 18437927),
 (13820, 85, 5, 18435188),
 (13820, 60, 5, 20593828),
 (13820, -1, -1, -1),
 (33660, -1, -1, -1),
 (13820, -1, -1, -1),
 (13820, 44, 1, 21136198),
 (13820, 44, 4, 24528416),
 (13820, 16, 7, 22727516),
 (13820, 21, 4, 27162948),
 (13820, 1, -1, 0),
 (13820, -1, -1, -1),
 (33860, -1, -1