In [1]:
from taxcalc import *
from taxcalc.utils import *
from bokeh.io import show, output_notebook
from bokeh.charts import Bar, Scatter
from bokeh.layouts import column
from bokeh.plotting import figure
from collections import OrderedDict
import copy
from notebookfunctions import distribution, index_list, percentile
output_notebook()

### Missing Variables

In [2]:
cps = pd.read_csv('../taxdata/cps_data/cps_ben_full.csv')
usable = open('taxcalc/records_variables.json')
usable_vars = json.load(usable)
usable.close()
missing = 0
for item in usable_vars['read'].keys():
    if item not in cps.columns:
        print '{}: {}'.format(item, usable_vars['read'][item]['desc'])
        missing += 1
print '\nTotal Variables Missing: {}'.format(missing)

p23250: Sch D: Net long-term capital gains/losses
p25470: Sch E: Royalty depletion and/or rental depreciation
e09800: Unreported payroll taxes from Form 4137 or 8919
e02000: Sch E rental, royalty, S-corp, etc, income/loss
e62900: Alternative Minimum Tax foreign tax credit from Form 6251
p08000: Other tax credits (but not including Sch R credit)
e58990: Investment income elected amount from Form 4952
e00700: Taxable refunds of state and local income taxes
e03290: Health savings account deduction from Form 8889
e07240: Retirement savings contributions credit from Form 8880
e19200: Sch A: Interest paid
e27200: Sch E: Farm rent net income or loss
e01200: Other net gain/loss from Form 4797
e03500: Alimony paid
e07260: Residential energy credit from Form 5695
p22250: Sch D: Net short-term capital gains/losses
e03220: Educator expenses
e07400: General business credit from Form 3800
f6251: 1 if Form 6251 (AMT) attached to return; otherwise 0
e03230: Tuition and fees from Form 8917
e03400: Pena

In [3]:
# Data from IRS-SOI Tax Stats. Used for comparison
soi_stats = pd.read_csv('soi_stats.csv', index_col=0)  # Equivalent to tax-calc diagnostic table
soi_income = pd.read_csv('soi_income_stats.csv')  # Distribution of income items
soi_deductions = pd.read_csv('soi_deductions.csv', index_col=0)  # Itemized deductions
soi_deductions['index'] = soi_deductions.index

In [4]:
# Create calculator from the PUF
calc_puf = Calculator(records=Records(), policy=Policy())
calc_puf.advance_to_year(2014)

You loaded data for 2009.
Tax-Calculator startup automatically extrapolated your data to 2013.


In [5]:
# Calculator the CPS
wt = pd.read_csv('../Dropbox/cps_weights.csv')
# wt.drop('SEQUENCE', inplace=True, axis=1)
# wt = wt.loc[:,:'WT2026']
wt *= 100
recs_cps = Records(data=cps,
                   weights=wt,
                   adjust_ratios=None,
                   start_year=2014)
pol_cps = Policy(start_year=2014, num_years=11)
calc_cps = Calculator(records=recs_cps, policy=pol_cps)
calc_cps.advance_to_year(2014)

In [6]:
calc_puf.calc_all()
calc_cps.calc_all()

### CPS Distribution Table

In [7]:
create_distribution_table(calc_cps.records, groupby='weighted_deciles', result_type='weighted_avg')

Unnamed: 0,expanded_income,s006,c00100,num_returns_StandardDed,standard,num_returns_ItemDed,c04470,c04600,c04800,taxbc,c62100,num_returns_AMT,c09600,c05800,c07100,othertaxes,refund,iitax,payrolltax,combined
0,-182.0,16318429.0,-542.0,9212955.0,6107.0,14925.0,7.0,4916.0,26.0,1.0,-547.0,0.0,0.0,1.0,0.0,0.0,90.0,-90.0,160.0,70.0
1,8802.0,16320544.0,4791.0,11461211.0,7374.0,155301.0,93.0,5630.0,116.0,8.0,4755.0,17179.0,1.0,8.0,2.0,0.0,718.0,-711.0,626.0,-85.0
2,15277.0,16319424.0,8267.0,11972754.0,7567.0,879226.0,607.0,6004.0,1315.0,124.0,7993.0,34689.0,1.0,125.0,22.0,0.0,983.0,-881.0,1094.0,214.0
3,21865.0,16319273.0,14491.0,12924656.0,7570.0,2041912.0,1266.0,6657.0,4069.0,414.0,13822.0,32681.0,2.0,416.0,69.0,0.0,1173.0,-826.0,1969.0,1143.0
4,29317.0,16319617.0,21952.0,12693373.0,7585.0,3122581.0,1962.0,7199.0,8603.0,982.0,20856.0,32825.0,3.0,985.0,139.0,0.0,1022.0,-176.0,3025.0,2849.0
5,38837.0,16318693.0,30962.0,12373953.0,7665.0,3758391.0,2620.0,7626.0,15245.0,1817.0,29596.0,25077.0,4.0,1821.0,237.0,0.0,590.0,994.0,4277.0,5272.0
6,51862.0,16320896.0,44180.0,11716124.0,7463.0,4533853.0,3701.0,8015.0,25900.0,3266.0,42326.0,30054.0,3.0,3270.0,365.0,0.0,148.0,2757.0,6029.0,8786.0
7,71353.0,16318688.0,64672.0,10828694.0,7180.0,5484529.0,5081.0,8792.0,43899.0,6218.0,62418.0,28158.0,4.0,6222.0,506.0,0.0,13.0,5703.0,8843.0,14546.0
8,103024.0,16320295.0,94946.0,8161910.0,5773.0,8156266.0,7924.0,9903.0,71451.0,10880.0,91934.0,46130.0,7.0,10887.0,597.0,0.0,2.0,10288.0,13090.0,23378.0
9,245340.0,16319911.0,231860.0,4918038.0,3643.0,11401342.0,14692.0,9765.0,203807.0,46844.0,228039.0,2942303.0,628.0,47471.0,122.0,374.0,0.0,47723.0,21897.0,69621.0


### PUF Distribution Table

In [8]:
create_distribution_table(calc_puf.records, groupby='weighted_deciles', result_type='weighted_avg')

Unnamed: 0,expanded_income,s006,c00100,num_returns_StandardDed,standard,num_returns_ItemDed,c04470,c04600,c04800,taxbc,c62100,num_returns_AMT,c09600,c05800,c07100,othertaxes,refund,iitax,payrolltax,combined
0,-6904.0,16222114.0,-7252.0,11094216.0,5964.0,6150.0,18.0,4135.0,44.0,4.0,-7601.0,0.0,0.0,4.0,0.0,6.0,141.0,-131.0,353.0,222.0
1,8443.0,16220773.0,6016.0,13627903.0,7057.0,48948.0,29.0,4913.0,262.0,23.0,5990.0,20950.0,0.0,24.0,0.0,4.0,774.0,-747.0,821.0,74.0
2,15025.0,16223499.0,10098.0,13243287.0,7608.0,279753.0,180.0,6493.0,1482.0,145.0,9974.0,27189.0,1.0,146.0,19.0,6.0,1650.0,-1517.0,1428.0,-89.0
3,21963.0,16222202.0,15120.0,14169419.0,7787.0,683754.0,548.0,6822.0,3991.0,420.0,14767.0,31485.0,2.0,422.0,82.0,13.0,1460.0,-1107.0,2039.0,932.0
4,30413.0,16223284.0,22440.0,14142144.0,7921.0,1516658.0,1287.0,7187.0,8668.0,999.0,21647.0,18616.0,2.0,1002.0,217.0,17.0,1067.0,-265.0,2881.0,2615.0
5,40936.0,16222555.0,32564.0,13206631.0,7687.0,2863096.0,2661.0,7339.0,16532.0,2017.0,30986.0,15840.0,2.0,2020.0,362.0,26.0,548.0,1136.0,3965.0,5101.0
6,54706.0,16222223.0,46478.0,11339850.0,7067.0,4849894.0,5036.0,7734.0,27243.0,3573.0,43467.0,10617.0,2.0,3575.0,506.0,42.0,164.0,2947.0,5336.0,8283.0
7,74668.0,16222702.0,67782.0,9028824.0,6082.0,7185021.0,8156.0,8574.0,45098.0,6519.0,63224.0,46267.0,5.0,6524.0,681.0,62.0,80.0,5825.0,7708.0,13533.0
8,110558.0,16222104.0,102839.0,5871767.0,4308.0,10349258.0,13730.0,10142.0,74728.0,11702.0,95552.0,184541.0,18.0,11720.0,889.0,100.0,95.0,10835.0,12479.0,23314.0
9,338763.0,16223164.0,321658.0,2019136.0,1497.0,14190613.0,36479.0,9580.0,274512.0,69165.0,306721.0,4448053.0,2351.0,71516.0,1789.0,2138.0,55.0,71811.0,21957.0,93768.0


### Diagnostic Table Comparison

In [9]:
cps_diag = create_diagnostic_table(calc_cps)

In [10]:
puf_diag = create_diagnostic_table(calc_puf)

In [11]:
diag_data = pd.DataFrame()
diag_data['SOI'] = soi_stats['Value']
diag_data['CPS'] = cps_diag[2014]
diag_data['PUF'] = puf_diag[2014]
diag_data['% Change'] = ((cps_diag[2014] / puf_diag[2014]) - 1) * 100

In [12]:
diag_data

Unnamed: 0,SOI,CPS,PUF,% Change
Returns (#m),148.6,163.2,162.2,0.6
AGI ($b),9771.0,8414.2,10021.5,-16.0
Itemizers (#m),44.0,39.5,42.0,-5.8
Itemized Deduction ($b),1206.7,619.4,1105.2,-44.0
Standard Deduction Filers (#m),117.4,106.3,107.7,-1.4
Standard Deduction ($b),876.2,965.8,925.2,4.4
Personal Exemption ($b),1121.6,1110.2,1114.0,-0.3
Taxable Income ($b),6997.9,6110.6,7341.8,-16.8
Regular Tax ($b),,1151.4,1534.2,-24.9
AMT Income ($b),,8179.4,9485.9,-13.8


In [13]:
# Total value of missing itemized deductions
in_billions = 1e-9
state = (calc_puf.records.e18400 * calc_puf.records.s006).sum()
print 'State and Local: {} ($b)'.format(round(state * in_billions, 2))
int_paid = (calc_puf.records.e19200 * calc_puf.records.s006).sum()
print 'Interest Paid: {} ($b)'.format(round(int_paid * in_billions, 2))
net_cas = (calc_puf.records.g20500 * calc_puf.records.s006).sum()
print 'Net Casualty or Theft Loss: {} ($b)'.format(round(net_cas * in_billions, 2))
print '-----------------'
print 'Total: {} ($b)'.format((round((state + int_paid + net_cas) * in_billions, 2)))

State and Local: 321.94 ($b)
Interest Paid: 325.15 ($b)
Net Casualty or Theft Loss: 4.62 ($b)
-----------------
Total: 651.72 ($b)


### Income Levels

In [14]:
inc_dict = OrderedDict()
inc_dict['CPS'] = [] 
inc_dict['PUF'] = []
inc_dict['Diff'] = []
inc_dict['Pct Diff'] = []
inc_list = ['WAS', 'Taxable Interest', 'Ordinary Dividends', 'Qualified Dividends', 'Business Income']
was_cps = (calc_cps.records.e00200 * calc_cps.records.s006).sum()
inc_dict['CPS'].append(was_cps)
was_puf = (calc_puf.records.e00200 * calc_puf.records.s006).sum()
inc_dict['PUF'].append(was_puf)
was_diff = (was_cps - was_puf)
inc_dict['Diff'].append(was_diff)
inc_dict['Pct Diff'].append((was_diff / was_puf) * 100)

int_cps = (calc_cps.records.e00300 * calc_cps.records.s006).sum()
inc_dict['CPS'].append(int_cps)
int_puf = (calc_puf.records.e00300 * calc_puf.records.s006).sum()
inc_dict['PUF'].append(int_puf)
int_diff = (int_cps - int_puf)
inc_dict['Diff'].append(int_diff)
inc_dict['Pct Diff'].append((int_diff / int_puf) * 100)

odiv_cps = (calc_cps.records.e00600 * calc_cps.records.s006).sum()
inc_dict['CPS'].append(odiv_cps)
odiv_puf = (calc_puf.records.e00600 * calc_puf.records.s006).sum()
inc_dict['PUF'].append(odiv_puf)
odiv_diff = (odiv_cps - odiv_puf)
inc_dict['Diff'].append(odiv_diff)
inc_dict['Pct Diff'].append((odiv_diff / odiv_puf) * 100)

qdiv_cps = (calc_cps.records.e00650 * calc_cps.records.s006).sum()
inc_dict['CPS'].append(qdiv_cps)
qdiv_puf = (calc_puf.records.e00650 * calc_puf.records.s006).sum()
inc_dict['PUF'].append(qdiv_puf)
qdiv_diff = (qdiv_cps - qdiv_puf)
inc_dict['Diff'].append(qdiv_diff)
inc_dict['Pct Diff'].append((qdiv_diff / qdiv_puf) * 100)

biz_cps = (calc_cps.records.e00900 * calc_cps.records.s006).sum()
inc_dict['CPS'].append(biz_cps)
biz_puf = (calc_puf.records.e00900 * calc_puf.records.s006).sum()
inc_dict['PUF'].append(biz_puf)
biz_diff = (biz_cps - biz_puf)
inc_dict['Diff'].append(biz_diff)
inc_dict['Pct Diff'].append((biz_diff / biz_puf) * 100)

inc_df = pd.DataFrame.from_dict(inc_dict)
inc_df.index = inc_list
inc_df

Unnamed: 0,CPS,PUF,Diff,Pct Diff
WAS,6648838107939.7,6820912371695.9,-172074263756.2,-2.5
Taxable Interest,91960641825.4,97246196658.4,-5285554833.1,-5.4
Ordinary Dividends,245424347487.0,270137086776.2,-24712739289.2,-9.1
Qualified Dividends,185442636961.2,198124994913.5,-12682357952.3,-6.4
Business Income,309555316495.2,309444223964.1,111092531.1,0.0


### Distribution of Income Variables

In [15]:
# Generate data for distribution plots
cps_dist = pd.DataFrame()
puf_dist = pd.DataFrame()
cps_was = distribution(calc_cps.records.e00200, calc_cps.records.s006, calc_cps.records.c00100)
puf_was = distribution(calc_puf.records.e00200, calc_puf.records.s006, calc_puf.records.c00100)
cps_int = distribution(calc_cps.records.e00300, calc_cps.records.s006, calc_cps.records.c00100)
puf_int = distribution(calc_puf.records.e00300, calc_puf.records.s006, calc_puf.records.c00100)
cps_odiv = distribution(calc_cps.records.e00600, calc_cps.records.s006, calc_cps.records.c00100)
puf_odiv = distribution(calc_puf.records.e00600, calc_puf.records.s006, calc_puf.records.c00100)
cps_qdiv = distribution(calc_cps.records.e00650, calc_cps.records.s006, calc_cps.records.c00100)
puf_qdiv = distribution(calc_puf.records.e00650, calc_puf.records.s006, calc_puf.records.c00100)
cps_biz = distribution(calc_cps.records.e00900, calc_cps.records.s006, calc_cps.records.c00100)
puf_biz = distribution(calc_puf.records.e00900, calc_puf.records.s006, calc_puf.records.c00100)
cps_dist['WAS'] = cps_was[1]
puf_dist['WAS'] = puf_was[1]
cps_dist['INT'] = cps_int[1]
puf_dist['INT'] = puf_int[1]
cps_dist['ODIV'] = cps_odiv[1]
puf_dist['ODIV'] = puf_odiv[1]
cps_dist['QDIV'] = cps_qdiv[1]
puf_dist['QDIV'] = puf_qdiv[1]
cps_dist['BIZ'] = cps_biz[1]
puf_dist['BIZ'] = puf_biz[1]
cps_dist['AGI Bin'] = index_list()
puf_dist['AGI Bin'] = index_list()
cps_dist['label'] = 'CPS'
puf_dist['label'] = 'PUF'
# Create scatter plot objects
items_tups = [('WAS', 'WAS'), ('INT', 'Interest Income'), ('ODIV', 'Ordinary Dividends'),
              ('QDIV', 'Qualified Dividends'), ('BIZ', 'Business Income')]
soi_dist = pd.DataFrame()
for item in items_tups:
    soi_dist[item[0]] = (soi_income[item[0]] / soi_income[item[0]].sum()) * 100
soi_dist['AGI Bin'] = index_list()
soi_dist['label'] = 'SOI'
scatter_data = pd.concat([cps_dist, puf_dist, soi_dist])
scatter_list = list()  # list for scatter plot objects
for item in items_tups:
    title = 'Percent of Total {} by AGI Bin'.format(item[1])
    scatter = Scatter(scatter_data, x='AGI Bin', y=item[0], color='label', ylabel='Percent',
                      title=title, tooltips=[('PCT', '@{}'.format(item[0]))])
    scatter_list.append(scatter)

In [16]:
show(column(scatter_list))

In [17]:
cps_tot = pd.DataFrame()
puf_tot = pd.DataFrame()
cps_tot['WAS'] = cps_was[0]
puf_tot['WAS'] = puf_was[0]
cps_tot['INT'] = cps_int[0]
puf_tot['INT'] = puf_int[0]
cps_tot['ODIV'] = cps_odiv[0]
puf_tot['ODIV'] = puf_odiv[0]
cps_tot['QDIV'] = cps_qdiv[0]
puf_tot['QDIV'] = puf_qdiv[0]
cps_tot['BIZ'] = cps_biz[0]
puf_tot['BIZ'] = puf_biz[0]
cps_tot['AGI Bin'] = index_list()
puf_tot['AGI Bin'] = index_list()
cps_tot['label'] = 'CPS'
puf_tot['label'] = 'PUF'
soi_income['AGI Bin'] = index_list()
soi_income['label'] = 'SOI'
total_data = pd.concat([cps_tot, puf_tot, soi_income])
# Create scatter plot objects
items_tups = [('WAS', 'WAS'), ('INT', 'Interest Income'), ('ODIV', 'Ordinary Dividends'),
              ('QDIV', 'Qualified Dividends'), ('BIZ', 'Business Income')]
total_list = list()  # list for scatter plot objects
for item in items_tups:
    title = 'Total {} by AGI Bin'.format(item[1])
    scatter = Scatter(total_data, x='AGI Bin', y=item[0], color='label', ylabel='Total',
                      title=title, tooltips=[('Total', '@{}'.format(item[0]))])
    total_list.append(scatter)

In [18]:
show(column(total_list))

### Itemized Deduction Amounts

In [19]:
deductions_cps = {'Medical Expenses': (calc_cps.records.e17500[calc_cps.records.c04470 > 0] *
                                       calc_cps.records.s006[calc_cps.records.c04470 > 0]).sum(),
                  'State and Local Taxes':  (calc_cps.records.e18400[calc_cps.records.c04470 > 0] *
                                             calc_cps.records.s006[calc_cps.records.c04470 > 0]).sum(),
                  'Real Estate Taxes':  (calc_cps.records.e18500[calc_cps.records.c04470 > 0] *
                                         calc_cps.records.s006[calc_cps.records.c04470 > 0]).sum(),
                  'Interest Paid':  (calc_cps.records.e19200[calc_cps.records.c04470 > 0] *
                                     calc_cps.records.s006[calc_cps.records.c04470 > 0]).sum(),
                  'Charitable Cash Contributions': (calc_cps.records.e19800[calc_cps.records.c04470 > 0] *
                                                    calc_cps.records.s006[calc_cps.records.c04470 > 0]).sum(),
                  'Charitable Non-Cash Contributions': (calc_cps.records.e20100[calc_cps.records.c04470 > 0] *
                                                        calc_cps.records.s006[calc_cps.records.c04470 > 0]).sum(),
                  'Total Misc. Expenses':  (calc_cps.records.e20400[calc_cps.records.c04470 > 0] *
                                            calc_cps.records.s006[calc_cps.records.c04470 > 0]).sum(),
                  'Net Casualty or Loss': (calc_cps.records.g20500[calc_cps.records.c04470 > 0] *
                                           calc_cps.records.s006[calc_cps.records.c04470 > 0]).sum()}
ded_cps_df = pd.DataFrame.from_dict(deductions_cps, 'index')
ded_cps_df.columns = ['Total']
ded_cps_df['source'] = 'CPS'

deductions_puf = {'Medical Expenses': (calc_puf.records.e17500[calc_puf.records.c04470 > 0] *
                                       calc_puf.records.s006[calc_puf.records.c04470 > 0]).sum(),
                  'State and Local Taxes':  (calc_puf.records.e18400[calc_puf.records.c04470 > 0] *
                                             calc_puf.records.s006[calc_puf.records.c04470 > 0]).sum(),
                  'Real Estate Taxes':  (calc_puf.records.e18500[calc_puf.records.c04470 > 0] *
                                         calc_puf.records.s006[calc_puf.records.c04470 > 0]).sum(),
                  'Interest Paid':  (calc_puf.records.e19200[calc_puf.records.c04470 > 0] *
                                     calc_puf.records.s006[calc_puf.records.c04470 > 0]).sum(),
                  'Charitable Cash Contributions': (calc_puf.records.e19800[calc_puf.records.c04470 > 0] *
                                                    calc_puf.records.s006[calc_puf.records.c04470 > 0]).sum(),
                  'Charitable Non-Cash Contributions': (calc_puf.records.e20100[calc_puf.records.c04470 > 0] *
                                                        calc_puf.records.s006[calc_puf.records.c04470 > 0]).sum(),
                  'Total Misc. Expenses':  (calc_puf.records.e20400[calc_puf.records.c04470 > 0] *
                                            calc_puf.records.s006[calc_puf.records.c04470 > 0]).sum(),
                  'Net Casualty or Loss': (calc_puf.records.g20500[calc_puf.records.c04470 > 0] *
                                           calc_puf.records.s006[calc_puf.records.c04470 > 0]).sum()}
ded_puf_df = pd.DataFrame.from_dict(deductions_puf, 'index')
ded_puf_df.columns = ['Total']
ded_puf_df['source'] = 'PUF'
soi_deductions['source'] = 'SOI'

ded_full_df = pd.concat([ded_cps_df, ded_puf_df, soi_deductions])
ded_full_df['index'] = ded_full_df.index

In [20]:
ded_bar = Bar(ded_full_df, 'index', 'Total', group='source', title='Itemized Deduction Totals',
              xlabel='Deduction', ylabel='Total', tooltips=[('Deduction', '@index'), ('Total', '@height'),
                                                            ('Data', '@source')])
show(ded_bar)

In [21]:
ded_error_df = pd.DataFrame()
ded_error_df['Difference - CPS'] = ded_cps_df['Total'] - soi_deductions['Total']
ded_error_df['% Difference - CPS'] = 100 * ded_error_df['Difference - CPS'] / soi_deductions['Total']
ded_error_df['Difference - PUF'] = ded_puf_df['Total'] - soi_deductions['Total']
ded_error_df['% Difference - PUF'] = 100 * ded_error_df['Difference - PUF'] / soi_deductions['Total']
print 'Error in Itemized Deductions Relative to SOI Totals'
ded_error_df

Error in Itemized Deductions Relative to SOI Totals


Unnamed: 0,Difference - CPS,% Difference - CPS,Difference - PUF,% Difference - PUF
Charitable Cash Contributions,-15578764104.2,-10.0,-4703427583.1,-3.0
Charitable Non-Cash Contributions,-34652972796.2,-53.0,-31555102263.5,-48.3
Interest Paid,-307962385000.0,-100.0,-7489448194.8,-2.4
Medical Expenses,-22933690785.2,-17.8,-9779156210.6,-7.6
Net Casualty or Loss,-2204349000.0,-100.0,2251393716.6,102.1
Real Estate Taxes,-103310686116.8,-57.1,17247882146.1,9.5
State and Local Taxes,-92296162578.9,-28.1,-16248434290.2,-4.9
Total Misc. Expenses,-25602945716.1,-20.3,3957677605.4,3.1


### Refundable Credits

In [22]:
eitc_cps = (calc_cps.records.eitc * calc_cps.records.s006).sum()
c11070_cps = (calc_cps.records.c11070 * calc_cps.records.s006).sum()
c10960_cps = (calc_cps.records.c10960 * calc_cps.records.s006).sum()
personal_credit_cps = (calc_cps.records.personal_credit * calc_cps.records.s006).sum()
ctc_new_cps = (calc_cps.records.ctc_new * calc_cps.records.s006).sum()

eitc_puf = (calc_puf.records.eitc * calc_puf.records.s006).sum()
c11070_puf = (calc_puf.records.c11070 * calc_puf.records.s006).sum()
c10960_puf = (calc_puf.records.c10960 * calc_puf.records.s006).sum()
personal_credit_puf = (calc_puf.records.personal_credit * calc_puf.records.s006).sum()
ctc_new_puf = (calc_puf.records.ctc_new * calc_puf.records.s006).sum()

In [23]:
print 'Refundable Credits'
pd.DataFrame(OrderedDict({'PUF': [eitc_puf, c11070_puf, c10960_puf, personal_credit_puf, ctc_new_puf],
                          'CPS': [eitc_cps, c11070_cps, c10960_cps, personal_credit_cps, ctc_new_cps],
                          'Diff': [eitc_cps - eitc_puf, c11070_cps - c11070_puf, c10960_cps - c10960_puf,
                                   personal_credit_cps - personal_credit_puf,
                                   ctc_new_cps - ctc_new_puf]}),
                          index=['eitc', 'c11070', 'c10960', 'personal credit', 'ctc new'])

Refundable Credits


Unnamed: 0,Diff,PUF,CPS
eitc,-6867495315.7,65207453506.9,58339958191.2
c11070,-5271114412.6,24274900731.4,19003786318.8
c10960,-8382574969.7,8382574969.7,0.0
personal credit,0.0,0.0,0.0
ctc new,0.0,0.0,0.0


In [24]:
eic0cps = sum(calc_cps.records.s006[calc_cps.records.EIC == 0])
eic1cps = sum(calc_cps.records.s006[calc_cps.records.EIC == 1])
eic2cps = sum(calc_cps.records.s006[calc_cps.records.EIC == 2])
eic3cps = sum(calc_cps.records.s006[calc_cps.records.EIC == 3])
eic0puf = sum(calc_puf.records.s006[calc_puf.records.EIC == 0])
eic1puf = sum(calc_puf.records.s006[calc_puf.records.EIC == 1])
eic2puf = sum(calc_puf.records.s006[calc_puf.records.EIC == 2])
eic3puf = sum(calc_puf.records.s006[calc_puf.records.EIC == 3]) 
eic_tot_cps = calc_cps.records.s006.sum()
eic_tot_puf = calc_puf.records.s006.sum()
print 'Percent and Number of Tax Units with Specified Number of EIC Qualified Children'
pd.DataFrame({'CPS': [eic0cps, eic1cps, eic2cps, eic3cps],
              'PUF': [eic0puf, eic1puf, eic2puf, eic3puf],
              '% - CPS': [eic0cps / eic_tot_cps, eic1cps / eic_tot_cps,
                          eic2cps / eic_tot_cps, eic3cps / eic_tot_cps],
              '% - PUF': [eic0puf / eic_tot_puf, eic1puf / eic_tot_puf,
                          eic2puf / eic_tot_puf, eic3puf / eic_tot_puf]})

Percent and Number of Tax Units with Specified Number of EIC Qualified Children


Unnamed: 0,% - CPS,% - PUF,CPS,PUF
0,0.7,0.9,114745569.5,140952059.1
1,0.1,0.1,22983420.2,10061670.4
2,0.1,0.0,16819448.2,7931481.3
3,0.1,0.0,8647332.5,3279408.9


### Benefit Programs

In [25]:
# Columns used for participation rates
cps['ssi_part'] = np.where(cps.ssi > 0, 1, 0)
cps['snap_part'] = np.where(cps.snap > 0, 1, 0)
cps['mcare_part'] = np.where(cps.mcare > 0, 1, 0)
cps['mcaid_part'] = np.where(cps.mcaid > 0, 1, 0)
cps['ss_part'] = np.where(cps.ss > 0, 1, 0)
cps['vb_part'] = np.where(cps.vb > 0, 1, 0)

In [26]:
benefits = pd.DataFrame({'SSI': [(cps.ssi * cps.s006).sum()],
                         'SNAP': [(cps.snap * cps.s006).sum()],
                         'Medicare': [(cps.mcare * cps.s006).sum()],
                         'Medicaid': [(cps.mcaid * cps.s006).sum()],
                         'Social Security': [(cps.ss * cps.s006).sum()],
                         'VB': [(cps.vb * cps.s006).sum()]}).transpose()
benefits.columns = ['Total']
print 'Benefits Totals'
benefits

Benefits Totals


Unnamed: 0,Total
Medicaid,352985014221.6
Medicare,488236708639.1
SNAP,82336140621.7
SSI,54186915454.5
Social Security,630068059192.4
VB,146836134085.4


#### Benefit Program Participation Rates

In [27]:
benefit_participation_names = [('ssi_part', 'SSI'), ('snap_part', 'SNAP'), ('mcare_part', 'Medicare'),
                               ('mcaid_part', 'Medicaid'), ('ss_part', 'Social Security'),
                               ('vb_part', "Veteran's Benefits")]

In [28]:
# Create list of bokeh figure displaying participation rates in each program
fig_list = list()
for item in benefit_participation_names:
    wcps = percentile(cps, item[0], 100, 'e00200', 's006')
    f = figure(title='{} Participation Rate'.format(item[1]),
               x_axis_label='Wage Percentile')
    f.line(wcps.index, wcps)
    fig_list.append(f)

In [29]:
show(column(fig_list))

#### Average Benefits Received by Participants

In [30]:
benefit_program_names = [('ssi', 'SSI'), ('snap', 'SNAP'), ('mcare', 'Medicare'),
                         ('mcaid', 'Medicaid'), ('ss', 'Social Security'),
                         ('vb', "Veteran's Benefits")]

In [35]:
fig_list = list()
for item in benefit_program_names:
    # Only taking the average benefit of participants
    bcps = cps[cps[item[0]] > 0]
    wcps = percentile(bcps, item[0], 100, 'e00200', 's006')
    f = figure(title='Average {} Benefit - Participants'.format(item[1]),
               x_axis_label='Wage Percentile')
    f.line(wcps.index, wcps)
    fig_list.append(f)

In [36]:
show(column(fig_list))

#### Average Benefits Received by Entire Population

In [33]:
fig_list = list()
for item in benefit_program_names:
    # Only taking the average benefit of participants
    wcps = percentile(cps, item[0], 100, 'e00200', 's006')
    f = figure(title='Average {} Benefit - Entire Population'.format(item[1]),
               x_axis_label='Wage Percentile')
    f.line(wcps.index, wcps)
    fig_list.append(f)

In [34]:
show(column(fig_list))