In [10]:
import pandas as pd
import numpy as np
import microdf as mdf

p = pd.read_csv("../data/asec_2019_ipums.csv.gz")

# Preprocess
p.columns = p.columns.str.lower()

# Replace NIU codes.
NIU_CODES = {
    "adjginc": 99999999,
    "offtotval": 9999999999,
    # Secondary individuals under 15 have NIU for cutoff.
    # We set them to zero as to give them no UBI
    # (they're still part of SPM units with others who get UBI).
    "cutoff": 999999,
}
for column, niu in NIU_CODES.items():
    p[column].replace({niu: 0}, inplace=True)

p["fam"] = p.marbasecidh.astype(str) + "-" + p.famid.astype(str)

In [17]:
p[p.offcutoff == 999999]
# p.offcutoff.max()

Unnamed: 0,year,serial,month,cpsid,asecflag,asecwth,marbasecidh,pernum,cpsidp,asecwt,...,spmcaphous,spmwt,spmeitc,spmwic,spmheat,spmsnap,spmtotres,spmthresh,spmfamunit,fam
546,2020,463,3,20200302817600,1,1327.45,1120070965,3,20200302817604,1153.39,...,0.0,1327.45,2210,0.0,0.0,0,69233.0,27420.0,463001,1120070965-3
547,2020,463,3,20200302817600,1,1327.45,1120070965,4,20200302817603,1109.87,...,0.0,1327.45,2210,0.0,0.0,0,69233.0,27420.0,463001,1120070965-4
1969,2020,1485,3,20190303971100,1,537.29,1120099024,7,20190303971107,668.92,...,0.0,537.29,0,0.0,0.0,0,128727.0,51020.0,1091001,1120099024-2
3726,2020,2619,3,20190106442600,1,197.20,1120158841,4,20190106442604,185.53,...,0.0,197.20,5923,0.0,0.0,0,34773.0,28450.0,2619001,1120158841-2
4589,2020,3276,3,0,1,360.93,20002814,5,0,403.53,...,0.0,360.93,0,0.0,0.0,0,114229.0,40710.0,2959001,20002814-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155866,2020,90419,3,20190301965800,1,842.48,1120048939,7,20190301965807,734.94,...,0.0,842.48,0,0.0,0.0,0,83039.0,50180.0,90347001,1120048939-4
155867,2020,90419,3,20190301965800,1,842.48,1120048939,8,20190301965806,787.46,...,0.0,842.48,0,0.0,0.0,0,83039.0,50180.0,90347001,1120048939-5
156086,2020,90546,3,0,1,643.99,20099557,4,0,455.16,...,0.0,643.99,0,0.0,0.0,0,41576.0,35600.0,90474001,20099557-2
156254,2020,90642,3,20181201914600,1,520.05,1120047632,6,20181201914606,632.25,...,0.0,520.05,0,0.0,0.0,0,110685.0,38990.0,90642001,1120047632-2


In [29]:
p[p.fam == "1120070965-3"][["offtotval", "offcutoff", "cutoff", "spmfamunit", "spmtotres", "spmthresh", "adjginc", "cpsidp", "cpsid", "spmsnap", "age"]]

Unnamed: 0,offtotval,offcutoff,cutoff,spmfamunit,spmtotres,spmthresh,adjginc,cpsidp,cpsid,spmsnap,age
546,0,999999,999999,463001,69233.0,27420.0,0,20200302817604,20200302817600,0,11


In [34]:
p[p.cpsid == 20190301965800][["offtotval", "offcutoff", "cutoff", "spmfamunit", "spmtotres", "spmthresh", "adjginc", "cpsidp", "cpsid", "spmsnap", "age", "famid"]]

Unnamed: 0,offtotval,offcutoff,cutoff,spmfamunit,spmtotres,spmthresh,adjginc,cpsidp,cpsid,spmsnap,age,famid
155860,42200,26017,26017,90347001,83039.0,50180.0,41487,20190301965801,20190301965800,0,31,1
155861,42200,26017,26017,90347001,83039.0,50180.0,0,20190301965805,20190301965800,0,12,1
155862,42200,26017,26017,90347001,83039.0,50180.0,0,20190301965803,20190301965800,0,5,1
155863,42200,26017,26017,90347001,83039.0,50180.0,0,20190301965802,20190301965800,0,4,1
155864,50000,13300,13300,90347001,83039.0,50180.0,32000,20190301965804,20190301965800,0,30,2
155865,0,999999,999999,90347001,83039.0,50180.0,0,20190301965808,20190301965800,0,14,3
155866,0,999999,999999,90347001,83039.0,50180.0,0,20190301965807,20190301965800,0,13,4
155867,0,999999,999999,90347001,83039.0,50180.0,0,20190301965806,20190301965800,0,5,5


In [30]:
(p[["offcutoff", "cutoff"]] == 999999).mean()

offcutoff    0.00188
cutoff       0.00188
dtype: float64

In [15]:
f = p.groupby(["fam", "offcutoff", "offtotval"]).adjginc.sum().reset_index()
f.groupby("fam").size().max()

1

In [5]:
p[p.fam == "20100952-1"]

Unnamed: 0,year,serial,month,cpsid,asecflag,asecwth,marbasecidh,pernum,cpsidp,asecwt,...,spmcaphous,spmwt,spmeitc,spmwic,spmheat,spmsnap,spmtotres,spmthresh,spmfamunit,fam
157957,2020,91500,3,0,1,423.84,20100952,1,0,423.84,...,0.0,423.84,0,0.0,0.0,0,77447.0,27260.0,91500001,20100952-1
157958,2020,91500,3,0,1,423.84,20100952,2,0,761.16,...,0.0,423.84,0,0.0,0.0,0,77447.0,27260.0,91500001,20100952-1


In [2]:
p.columns

Index(['year', 'serial', 'month', 'cpsid', 'asecflag', 'asecwth',
       'marbasecidh', 'pernum', 'cpsidp', 'asecwt', 'age', 'race',
       'marbasecidp', 'famid', 'inctot', 'incwelfr', 'adjginc', 'offtotval',
       'offcutoff', 'cutoff', 'spmlunch', 'spmcaphous', 'spmwt', 'spmeitc',
       'spmwic', 'spmheat', 'spmsnap', 'spmtotres', 'spmthresh', 'spmfamunit',
       'fam'],
      dtype='object')

In [None]:
def ubi_fpg()

In [59]:
c.groupby("fam").offcutoff.nunique().max()

1

In [55]:
c.groupby("fam").spmfamunit.nunique().max()

1

In [57]:
c.groupby("spmfamunit").fam.nunique().max()

8

In [29]:
c.columns

Index(['year', 'serial', 'month', 'cpsid', 'asecflag', 'asecwth', 'pernum',
       'cpsidp', 'asecwt', 'age', 'race', 'famid', 'inctot', 'incwelfr',
       'adjginc', 'offtotval', 'offcutoff', 'cutoff', 'spmlunch', 'spmcaphous',
       'spmwt', 'spmeitc', 'spmwic', 'spmheat', 'spmsnap', 'spmtotres',
       'spmthresh', 'spmfamunit'],
      dtype='object')

In [None]:
pd.read_csv()

In [60]:
c[c.cpsid == 20200307030400][["fam", "offcutoff", "offtotval"]]

Unnamed: 0,fam,offcutoff,offtotval
118006,1120175155-1,17622,45015
118007,1120175155-1,17622,45015
118008,1120175155-2,13300,35007


* Aggregate to family and SPM unit
* Determine 

In [22]:
c.groupby("cpsidp").size().max()

53081

In [7]:
c.groupby("offcutoff").size().sort_index()

offcutoff
12261      7520
13300     18105
15453     13728
17120     17512
17555       140
17622      4172
19998     10284
20578     13035
20598      4389
25926     20400
26017      2336
26370      4816
26801      6720
30044       840
30510     10030
31275      4465
31800      1415
32263      2645
33522       294
34161      3756
35239      1968
35965      1878
36576       312
36721       798
36757        91
38262      1092
39635       707
40811       735
41442       483
41709        56
42066       384
42085        84
42348       245
43470       232
44818       304
45881       200
46630       176
47069        24
47485        56
49426       162
51406       245
51727       189
53025       252
54460       163
55503        93
56139        75
56621        19
56895        37
999999      297
dtype: int64

In [9]:
c.groupby("cutoff").size().sort_index()

cutoff
12261      7520
13300     18105
15453     13728
17120     17512
17555       140
17622      4172
19998     10284
20578     13035
20598      4389
25926     20400
26017      2336
26370      4816
26801      6720
30044       840
30510     10030
31275      4465
31800      1415
32263      2645
33522       294
34161      3756
35239      1968
35965      1878
36576       312
36721       798
36757        91
38262      1092
39635       707
40811       735
41442       483
41709        56
42066       384
42085        84
42348       245
43470       232
44818       304
45881       200
46630       176
47069        24
47485        56
49426       162
51406       245
51727       189
53025       252
54460       163
55503        93
56139        75
56621        19
56895        37
999999      297
dtype: int64