In [1]:
from taxcalc import *
from taxcalc.utils import *
from bokeh.io import show, output_notebook
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, NumeralTickFormatter
from collections import OrderedDict
import copy
import pandas as pd
from notebookfunctions import distribution, index_list, percentile
output_notebook()

In [8]:
# Data from IRS-SOI Tax Stats. Used for comparison
soi_stats = pd.read_csv('soi_stats.csv', index_col=0)  # Equivalent to tax-calc diagnostic table
soi_income = pd.read_csv('soi_income_stats.csv')  # Distribution of income items
soi_deductions = pd.read_csv('soi_deductions.csv', index_col=0)  # Itemized deductions
soi_deductions['index'] = soi_deductions.index

In [2]:
# Read in new PUF and associated data
gf = pd.read_csv('../taxdata/stage1/growfactors.csv')
wt = pd.read_csv('../taxdata/puf_stage2/puf_weights.csv')
puf = pd.read_csv('../taxdata/puf_data/puf.csv')
adj = pd.read_csv('../taxdata/puf_stage3/puf_ratios.csv', index_col=0)
adj = adj.transpose()

In [3]:
# base calculator
calc = Calculator(records=Records(), policy=Policy())
calc.advance_to_year(2014)
calc.calc_all()

You loaded data for 2009.
Tax-Calculator startup automatically extrapolated your data to 2013.


In [4]:
# new calculator
growf = Growfactors('../taxdata/stage1/growfactors.csv')
rec = Records(puf, gfactors=growf,
              weights=wt, adjust_ratios=adj,
              start_year=2012)
pol = Policy(gfactors=growf, start_year=2012, num_years=16)
consump = Consumption(start_year=2012, num_years=16)
behave = Behavior(start_year=2012, num_years=16)
calc_new = Calculator(records=rec, policy=pol, consumption=consump, behavior=behave)
calc_new.advance_to_year(2014)
calc_new.calc_all()

You loaded data for 2012.
Tax-Calculator startup automatically extrapolated your data to 2012.


# New Disribution Table

In [5]:
create_distribution_table(calc_new, groupby='weighted_deciles',
                          result_type='weighted_sum', income_measure='c00100')

Unnamed: 0,num_returns_AMT,num_returns_ItemDed,s006,num_returns_StandardDed,refund,taxbc,c04600,c00100,iitax,aftertax_income,...,expanded_income,standard,payrolltax,c04470,c05800,combined,c04800,c62100,othertaxes,c09600
0,16572,0,16165677,6382551,492259106,287756,84297082200,-60400221264,3635428031,117120728875,...,123125511178,122331212309,2369354272,0,4097272843,6004782303,6898156,-36982854056,35206225,4096985087
1,2309,20105,16166559,16137680,4176141869,76177413,62305668440,72567517707,-3836210770,149502822547,...,153840799704,108388813228,8174187927,93192291,323193676,4337977157,1387317804,73625884727,18121377,247016263
2,19783,262765,16165812,15901036,19874322962,1260947253,94930176360,171306016534,-18302906264,252739794782,...,254615906970,127757570923,20179018452,3405511154,1537642252,1876112188,13222413786,170300411325,68587937,276694999
3,25989,785906,16166460,15377213,31172711731,5363866908,118347533760,273713312896,-25926771049,348431888220,...,356203600896,131315489999,33698483725,10031716652,5584657091,7771712676,54934663005,268067409025,115242994,220790184
4,17539,1531794,16165615,14632879,23041434292,15785584045,121994801360,403935045968,-9061891850,444694424075,...,487052116473,128432511103,51419584249,22013306436,16005370229,42357692399,141920825410,391220326967,200755497,219786184
5,15191,2900672,16166065,13263760,12208927572,34261039716,124642539840,569746285167,17744064774,558932354051,...,650349846212,118919131689,73673427387,43547036856,34508703743,91417492161,285155792715,545803896150,370005427,247664027
6,23992,4812021,16166231,11354149,2648144640,61998145140,129319056040,793275742204,53011483076,722925580561,...,876055773572,109128635968,100118709935,76303299321,62373521096,153130193011,480050315686,751085752375,595482979,375375956
7,49246,7070958,16166672,9095437,193910191,107450458317,143020745560,1115475041351,99519618833,977122185023,...,1211000191088,98633186650,134358387233,129677264229,108095114690,233878006065,746795300424,1044875566626,775338110,644656373
8,153760,10050924,16165379,6112504,33955199,181601438201,164416252400,1630299641347,173617069716,1385141262667,...,1756561593367,74016243070,197803260984,211735681841,182747151093,371420330700,1180870423360,1518693658377,1544022644,1145712892
9,4005677,13958206,16166962,2207685,156442,1028369965839,156917941503,4964077839622,1068938687455,3823503064834,...,5233784724502,27553936211,341342972214,592973739705,1061090753122,1410281659669,4188513306358,4705921314113,30649540332,32720787283


# Current Distribution Table

In [6]:
create_distribution_table(calc, groupby='weighted_deciles',
                          result_type='weighted_sum', income_measure='c00100')

Unnamed: 0,num_returns_AMT,num_returns_ItemDed,s006,num_returns_StandardDed,refund,taxbc,c04600,c00100,iitax,aftertax_income,...,expanded_income,standard,payrolltax,c04470,c05800,combined,c04800,c62100,othertaxes,c09600
0,11493,0,16220752,3708850,906029892,0,86537994034,-149726787864,2361457503,31540778847,...,37122742595,123664286701,3220506246,0,3184071522,5581963748,0,-136818460696,86953480,3184071522
1,910,16978,16223564,16193482,3091156478,29889716,62800517303,49153922700,-2897010865,128770131699,...,131490747997,102703346069,5617627163,54523814,168832395,2720616298,697441193,49487976905,25782677,138942679
2,27438,127058,16222306,16085102,15739892911,617680780,88758571182,143134086177,-14975974655,226812797476,...,227737509498,125770900218,15900686677,1491389937,694218447,924712022,6585562650,142336044721,95003962,76537667
3,56262,778872,16222064,15443192,30601982626,4084319153,114942622376,249213176722,-26733429636,331015512775,...,333707338575,130019629204,29425255435,10086591886,4321455307,2691825799,42100726133,242729143343,117955756,237136154
4,32681,1515580,16223014,14706390,25346402224,13612657115,121756634656,382321791610,-14044597895,431633207537,...,464944700633,127392012174,47356090991,23212520833,13810348792,33311493096,124657921396,367965705687,249777902,197691676
5,22179,2798053,16222499,13424385,14281999026,32428593755,124038481638,553710452123,12629098555,552297913647,...,634658321968,118454408988,69731309767,43480770883,32632446842,82360408321,271695058062,527527807021,448650310,203853087
6,19149,4970371,16221953,11249915,3675361936,61446967759,125384685878,784431401275,49724205028,721252636974,...,867110514253,104753187850,96133672251,81764884123,61802074423,145857877279,475188293251,738301676883,718152651,355106663
7,64292,7234590,16223206,8984526,1386607192,108830829123,139235241390,1112500051832,97332040887,975809232814,...,1205535207793,94791036921,132393934093,131122757497,109417449721,229725974979,748492817636,1042072554885,1047217608,586620598
8,190588,10327914,16222053,5892361,1532104925,189381889569,164570887134,1667643238695,175768417694,1413639418647,...,1793082486885,69933991672,203674650545,222515689982,190120233842,379443068239,1211402555603,1550734759269,1601565184,738344273
9,4404713,14207267,16223209,2015145,873195538,1123569557383,154909947388,5228312503191,1161499625317,3959322111835,...,5475151525549,24192511419,354329788398,591438451680,1156586615144,1515829413714,4460206219834,4960693754490,34765659102,33017057760


# Diagnostic Table Comparison

In [7]:
new_diag = create_diagnostic_table(calc_new)
diag = create_diagnostic_table(calc)  # Current PUF

In [9]:
diag_data = pd.DataFrame()
diag_data['SOI'] = soi_stats['Value']
diag_data['New'] = new_diag[2014]
diag_data['Current'] = diag[2014]
diag_data['% Change'] = ((new_diag[2014] / diag[2014]) - 1) * 100
diag_data['New - SOI'] = map(abs, new_diag[2014] - diag_data['SOI'])
diag_data['Current - SOI'] = map(abs, diag[2014] - diag_data['SOI'])
diag_data

Unnamed: 0,SOI,New,Current,% Change,New - SOI,Current - SOI
Returns (#m),148.6,161.7,162.2,-0.3,13.1,13.6
AGI ($b),9771.0,9934.0,10020.7,-0.9,163.0,249.7
Itemizers (#m),44.0,41.4,42.0,-1.4,2.6,2.0
Itemized Deduction ($b),1206.7,1089.8,1105.2,-1.4,116.9,101.5
Standard Deduction Filers (#m),117.4,110.5,107.7,2.6,6.9,9.7
Standard Deduction ($b),876.2,969.4,925.0,4.8,93.2,48.8
Personal Exemption ($b),1121.6,1144.8,1113.8,2.8,23.2,7.8
Taxable Income ($b),6997.9,7092.9,7341.0,-3.4,95.0,343.1
Regular Tax ($b),,1436.2,1534.0,-6.4,,
AMT Income ($b),,9432.6,9485.0,-0.6,,


# Income Levels

In [10]:
inc_dict = OrderedDict()
inc_dict['New'] = [] 
inc_dict['Current'] = []
inc_dict['SOI'] = []
inc_dict['Pct Diff'] = []
inc_dict['New - SOI'] = []
inc_dict['Current - SOI'] = []
inc_list = ['WAS', 'Taxable Interest', 'Ordinary Dividends', 'Qualified Dividends', 'Business Income']
# Wage and salary
was_new = (calc_new.records.e00200 * calc_new.records.s006).sum()
inc_dict['New'].append(was_new)
was = (calc.records.e00200 * calc.records.s006).sum()
inc_dict['Current'].append(was)
soi_was = soi_income['WAS'].sum()
inc_dict['SOI'].append(soi_was)
new_was_diff = abs(was_new - soi_was)
inc_dict['New - SOI'].append(new_was_diff)
curr_was_diff = abs(was - soi_was)
inc_dict['Current - SOI'].append(curr_was_diff)
inc_dict['Pct Diff'].append((curr_was_diff / was) * 100)
# Interest income
int_new = (calc_new.records.e00300 * calc_new.records.s006).sum()
inc_dict['New'].append(int_new)
int_ = (calc.records.e00300 * calc.records.s006).sum()
inc_dict['Current'].append(int_)
int_soi = soi_income['INT'].sum()
inc_dict['SOI'].append(int_soi)
new_int_diff = abs(int_new - int_soi)
inc_dict['New - SOI'].append(new_int_diff)
curr_int_diff = abs(int_ - int_soi)
inc_dict['Current - SOI'].append(curr_int_diff)
inc_dict['Pct Diff'].append((curr_int_diff / int_) * 100)
# Ordinary dividends
odiv_new = (calc_new.records.e00600 * calc_new.records.s006).sum()
inc_dict['New'].append(odiv_new)
odiv = (calc.records.e00600 * calc.records.s006).sum()
inc_dict['Current'].append(odiv)
odiv_soi = soi_income['ODIV'].sum()
inc_dict['SOI'].append(odiv_soi)
new_odiv_diff = abs(odiv_new - odiv_soi)
inc_dict['New - SOI'].append(new_odiv_diff)
curr_odiv_diff = abs(odiv - odiv_soi)
inc_dict['Current - SOI'].append(curr_odiv_diff)
inc_dict['Pct Diff'].append((curr_odiv_diff / odiv) * 100)
# Qualified dividends
qdiv_new = (calc_new.records.e00650 * calc_new.records.s006).sum()
inc_dict['New'].append(qdiv_new)
qdiv = (calc.records.e00650 * calc.records.s006).sum()
inc_dict['Current'].append(qdiv)
qdiv_soi = soi_income['QDIV'].sum()
inc_dict['SOI'].append(qdiv_soi)
new_qdiv_diff = abs(qdiv_new - qdiv_soi)
inc_dict['New - SOI'].append(new_qdiv_diff)
curr_qdiv_diff = abs(qdiv - qdiv_soi)
inc_dict['Current - SOI'].append(curr_qdiv_diff)
inc_dict['Pct Diff'].append((curr_qdiv_diff / qdiv) * 100)

biz_new = (calc_new.records.e00900 * calc_new.records.s006).sum()
inc_dict['New'].append(biz_new)
biz = (calc.records.e00900 * calc.records.s006).sum()
inc_dict['Current'].append(biz)
biz_soi = soi_income['BIZ'].sum()
inc_dict['SOI'].append(biz_soi)
new_biz_diff = abs(biz_new - biz_soi)
inc_dict['New - SOI'].append(new_biz_diff)
curr_biz_diff = abs(biz - biz_soi)
inc_dict['Current - SOI'].append(curr_biz_diff)
inc_dict['Pct Diff'].append((curr_biz_diff / biz) * 100)

inc_df = pd.DataFrame.from_dict(inc_dict)
inc_df.index = inc_list
print "'Pct Diff' represents the percentage change between the current and new sums"
inc_df

'Pct Diff' represents the percentage change between the current and new sums


Unnamed: 0,New,Current,SOI,Pct Diff,New - SOI,Current - SOI
WAS,6784960449585.9,6820912371695.9,6785880966000.0,0.5,920516414.1,35031405695.9
Taxable Interest,92873859223.5,97246196658.4,93894281000.0,3.4,1020421776.5,3351915658.4
Ordinary Dividends,254702179872.0,270137086776.2,254702232000.0,5.7,52128.0,15434854776.2
Qualified Dividends,196858639935.9,198124994913.5,192447708000.0,2.9,4410931935.9,5677286913.5
Business Income,317256662774.3,309444223964.1,317248783000.0,2.5,7879774.3,7804559035.9


# Distribution of Income

In [11]:
# Generate data for distribution plots
new_dist = pd.DataFrame()
cur_dist = pd.DataFrame()
new_was = distribution(calc_new.records.e00200, calc_new.records.s006, calc_new.records.c00100)
cur_was = distribution(calc.records.e00200, calc.records.s006, calc.records.c00100)
new_int = distribution(calc_new.records.e00300, calc_new.records.s006, calc_new.records.c00100)
cur_int = distribution(calc.records.e00300, calc.records.s006, calc.records.c00100)
new_odiv = distribution(calc_new.records.e00600, calc_new.records.s006, calc_new.records.c00100)
cur_odiv = distribution(calc.records.e00600, calc.records.s006, calc.records.c00100)
new_qdiv = distribution(calc_new.records.e00650, calc_new.records.s006, calc_new.records.c00100)
cur_qdiv = distribution(calc.records.e00650, calc.records.s006, calc.records.c00100)
new_biz = distribution(calc_new.records.e00900, calc_new.records.s006, calc_new.records.c00100)
cur_biz = distribution(calc.records.e00900, calc.records.s006, calc.records.c00100)
new_dist['WAS'] = new_was[1]
cur_dist['WAS'] = cur_was[1]
new_dist['INT'] = new_int[1]
cur_dist['INT'] = cur_int[1]
new_dist['ODIV'] = new_odiv[1]
cur_dist['ODIV'] = cur_odiv[1]
new_dist['QDIV'] = new_qdiv[1]
cur_dist['QDIV'] = cur_qdiv[1]
new_dist['BIZ'] = new_biz[1]
cur_dist['BIZ'] = cur_biz[1]
new_dist['AGI Bin'] = index_list()
cur_dist['AGI Bin'] = index_list()
new_dist['label'] = 'New'
cur_dist['label'] = 'Current'
# Create scatter plot objects
items_tups = [('WAS', 'WAS'), ('INT', 'Interest Income'), ('ODIV', 'Ordinary Dividends'),
              ('QDIV', 'Qualified Dividends'), ('BIZ', 'Business Income')]
soi_dist = pd.DataFrame()
for item in items_tups:
    soi_dist[item[0]] = (soi_income[item[0]] / soi_income[item[0]].sum())
soi_dist['AGI Bin'] = index_list()
soi_dist['label'] = 'SOI'
scatter_data = pd.concat([new_dist, cur_dist, soi_dist])
scatter_data_new = ColumnDataSource(scatter_data[scatter_data['label'] == 'New'])
scatter_data_cur = ColumnDataSource(scatter_data[scatter_data['label'] == 'Current'])
scatter_data_soi = ColumnDataSource(scatter_data[scatter_data['label'] == 'SOI'])

figure_list = []
for item in items_tups:
    title = 'Percent of Total {} by AGI Bin'.format(item[1])
    f = figure(title=title, x_range=index_list())
    f.xaxis.major_label_orientation = 45
    f.circle(x='AGI Bin', y=item[0], color='green', size=10, legend='New', alpha=0.5,
             source=scatter_data_new)
    f.circle(x='AGI Bin', y=item[0], color='red', size=10, legend='Current', alpha=0.5,
             source=scatter_data_cur)
    f.circle(x='AGI Bin', y=item[0], color='blue', size=10, legend='SOI', alpha=0.5,
             source=scatter_data_soi)
    f.legend.location = 'top_left'
    f.yaxis[0].formatter = NumeralTickFormatter(format="0.00%")
    hover = HoverTool(tooltips=[('Pct', '@{}%'.format(item[0]))])
    f.add_tools(hover)
    figure_list.append(f)
show(column(figure_list))

In [12]:
new_tot = pd.DataFrame()
cur_tot = pd.DataFrame()
new_tot['WAS'] = new_was[0]
cur_tot['WAS'] = cur_was[0]
new_tot['INT'] = new_int[0]
cur_tot['INT'] = cur_int[0]
new_tot['ODIV'] = new_odiv[0]
cur_tot['ODIV'] = cur_odiv[0]
new_tot['QDIV'] = new_qdiv[0]
cur_tot['QDIV'] = cur_qdiv[0]
new_tot['BIZ'] = new_biz[0]
cur_tot['BIZ'] = cur_biz[0]
new_tot['AGI Bin'] = index_list()
cur_tot['AGI Bin'] = index_list()
new_tot['label'] = 'New'
cur_tot['label'] = 'Current'
soi_income['AGI Bin'] = index_list()
soi_income['label'] = 'SOI'
total_data = pd.concat([new_tot, cur_tot, soi_income])
# Create scatter plot objects
items_tups = [('WAS', 'WAS'), ('INT', 'Interest Income'), ('ODIV', 'Ordinary Dividends'),
              ('QDIV', 'Qualified Dividends'), ('BIZ', 'Business Income')]

total_data_new = ColumnDataSource(total_data[total_data['label'] == 'New'])
total_data_cur = ColumnDataSource(total_data[total_data['label'] == 'Current'])
total_data_soi = ColumnDataSource(total_data[total_data['label'] == 'SOI'])

figure_list = []
for item in items_tups:
    title = 'Total {} by AGI Bin'.format(item[1])
    f = figure(title=title, x_range=index_list())
    f.xaxis.major_label_orientation = 45
    f.circle(x='AGI Bin', y=item[0], color='green', size=10, legend='New', alpha=0.5,
             source=total_data_new)
    f.circle(x='AGI Bin', y=item[0], color='red', size=10, legend='Current', alpha=0.5,
             source=total_data_cur)
    f.circle(x='AGI Bin', y=item[0], color='blue', size=10, legend='SOI', alpha=0.5,
             source=total_data_soi)
    f.legend.location = 'top_left'
    f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
    hover = HoverTool(tooltips=[('Total', '@{}{}'.format(item[0], '{$0.00a}'))])
    f.add_tools(hover)
    figure_list.append(f)
show(column(figure_list))

In [13]:
deductions_new = {'Medical Expenses': (calc_new.records.e17500[calc_new.records.c04470 > 0] *
                                        calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'State and Local Taxes':  (calc_new.records.e18400[calc_new.records.c04470 > 0] *
                                              calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Real Estate Taxes':  (calc_new.records.e18500[calc_new.records.c04470 > 0] *
                                          calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Interest Paid':  (calc_new.records.e19200[calc_new.records.c04470 > 0] *
                                       calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Charitable Cash Contributions': (calc_new.records.e19800[calc_new.records.c04470 > 0] *
                                                     calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Charitable Non-Cash Contributions': (calc_new.records.e20100[calc_new.records.c04470 > 0] *
                                                         calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Total Misc. Expenses':  (calc_new.records.e20400[calc_new.records.c04470 > 0] *
                                             calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Net Casualty or Loss': (calc_new.records.g20500[calc_new.records.c04470 > 0] *
                                            calc_new.records.s006[calc_new.records.c04470 > 0]).sum()}
ded_new_df = pd.DataFrame.from_dict(deductions_new, 'index')
ded_new_df.columns = ['Total']
ded_new_df['source'] = 'New'

deductions = {'Medical Expenses': (calc.records.e17500[calc.records.c04470 > 0] *
                                   calc.records.s006[calc.records.c04470 > 0]).sum(),
              'State and Local Taxes':  (calc.records.e18400[calc.records.c04470 > 0] *
                                         calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Real Estate Taxes':  (calc.records.e18500[calc.records.c04470 > 0] *
                                     calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Interest Paid':  (calc.records.e19200[calc.records.c04470 > 0] *
                                 calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Charitable Cash Contributions': (calc.records.e19800[calc.records.c04470 > 0] *
                                                calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Charitable Non-Cash Contributions': (calc.records.e20100[calc.records.c04470 > 0] *
                                                    calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Total Misc. Expenses':  (calc.records.e20400[calc.records.c04470 > 0] *
                                        calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Net Casualty or Loss': (calc.records.g20500[calc.records.c04470 > 0] *
                                       calc.records.s006[calc.records.c04470 > 0]).sum()}
ded_df = pd.DataFrame.from_dict(deductions, 'index')
ded_df.columns = ['Total']
ded_df['source'] = 'Current'
soi_deductions['source'] = 'SOI'

ded_full_df = pd.concat([ded_new_df, ded_df, soi_deductions])

In [14]:
ded_cds_new = ColumnDataSource(ded_full_df[ded_full_df['source'] == 'New'])
ded_cds_new.add(data=[i - .5 for i in range(1, 9)], name='xaxis')
ded_cds_cur = ColumnDataSource(ded_full_df[ded_full_df['source'] == 'Current'])
ded_cds_cur.add(data=[i - .25 for i in range(1, 9)], name='xaxis')
ded_cds_soi = ColumnDataSource(ded_full_df[ded_full_df['source'] == 'SOI'])
ded_cds_soi.add(data=[i - .75 for i in range(1, 9)], name='xaxis')

'xaxis'

In [15]:
xaxis_list = ['Interest Paid', 'Total Misc. Expenses', 'Medical Expenses',
              'Charitable Non-Cash Contributions', 'Real Estate Taxes',
              'State and Local Taxes', 'Charitable Cash Contributions',
              'Net Casualty or Loss']
f = figure(title='Itemized Deduction Totals', x_range=xaxis_list, width=800)
#f = figure(title='Itemized Deduction Totals')
f.xaxis.major_label_orientation = 45
f.vbar(x='xaxis', bottom=0, top='Total', legend='New',
       color='green', alpha=0.5, line_alpha=1,
       width=0.25, source=ded_cds_new)
f.vbar(x='xaxis', bottom=0, top='Total', legend='Current', color='red',
       alpha=0.5, line_alpha=1,
       width=0.25, source=ded_cds_cur)
f.vbar(x='xaxis', bottom=0, top='Total', legend='SOI', color='blue',
       alpha=0.5, line_alpha=1,
       width=0.25, source=ded_cds_soi)
hover = HoverTool(tooltips=[('Total', '@Total{$0.00a}')])
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
f.add_tools(hover)

show(f)

In [16]:
ded_error_df = pd.DataFrame()
ded_error_df['Difference: New'] = ded_new_df['Total'] - soi_deductions['Total']
ded_error_df['% Difference: New'] = 100 * ded_error_df['Difference: New'] / soi_deductions['Total']
ded_error_df['Difference: Current'] = ded_df['Total'] - soi_deductions['Total']
ded_error_df['% Difference: Current'] = 100 * ded_error_df['Difference: Current'] / soi_deductions['Total']
print 'Error in Itemized Deductions Relative to SOI Totals'
ded_error_df

Error in Itemized Deductions Relative to SOI Totals


Unnamed: 0,Difference: New,% Difference: New,Difference: Current,% Difference: Current
Charitable Cash Contributions,-4909845726.0,-3.2,-4697400311.8,-3.0
Charitable Non-Cash Contributions,-20258359383.2,-31.0,-31554076082.7,-48.3
Interest Paid,-12479893896.8,-4.1,-7473328319.6,-2.4
Medical Expenses,-17892823670.3,-13.9,-9751487495.3,-7.6
Net Casualty or Loss,5005943747.1,227.1,2251393716.6,102.1
Real Estate Taxes,-6525885181.3,-3.6,17200103389.4,9.5
State and Local Taxes,-9342675154.4,-2.8,-16242417254.8,-4.9
Total Misc. Expenses,7169575023.8,5.7,3967640537.5,3.1


In [42]:
# Create DataFrame subset of calculator records for the new weights
item_df_new = pd.DataFrame({'e00200': calc_new.records.e00200,
                             's006': calc_new.records.s006,
                             'c04470': calc_new.records.c04470})
item_df_new['itemizer'] = np.where(calc_new.records.c04470 > 0, 1, 0)
item_participation_rt_new = percentile(item_df_new, 'itemizer', 100, 'e00200', 's006')
# Create DataFrame subset for current weights
item_df = pd.DataFrame({'e00200': calc.records.e00200,
                        's006': calc.records.s006,
                        'c04470': calc.records.c04470})
item_df['itemizer'] = np.where(calc.records.c04470 > 0, 1, 0)
item_participation_rt = percentile(item_df, 'itemizer', 100, 'e00200', 's006')
item_index = item_participation_rt_new.index

# Create column data sources for plotting
item_cds_new = ColumnDataSource({'rate': item_participation_rt_new,
                                 'index': item_participation_rt_new.index})
item_cds_cur = ColumnDataSource({'rate': item_participation_rt,
                                 'index': item_participation_rt_new.index})

In [44]:
hover_ded = HoverTool(tooltips=[('Percentile', '$index'), ('Itemizing Rate', '@rate{0.00%}')])
f = figure(title='Itemizing Rate by Wage Percentile',
           x_axis_label='Wage Percentile', tools=[hover_ded, 'save', 'reset', 'zoom_in', 'zoom_out'])
f.line('index', 'rate', legend='New', line_width=2, source=item_cds_new)
f.line('index', 'rate', legend='Current', color='red', line_width=2, source=item_cds_cur)
f.legend.location = 'top_left'
f.yaxis[0].formatter = NumeralTickFormatter(format='0.00%')
show(f)

In [19]:
# Average Itemized Deduction by percentile
itemizers_new = copy.deepcopy(item_df_new[item_df_new['c04470'] > 0])
item_mean_new = percentile(itemizers_new, 'c04470', 100, 'e00200', 's006')
itemizers = copy.deepcopy(item_df[item_df['c04470'] > 0])
item_mean = percentile(itemizers, 'c04470', 100, 'e00200', 's006')
# Create column data source for plotting
item_cds_new = ColumnDataSource({'mean': item_mean_new,
                                 'xaxis': item_index})
item_cds_cur = ColumnDataSource({'mean': item_mean,
                                 'xaxis': item_index})

In [40]:
hover = HoverTool(tooltips=[('Percentile', '@xaxis'), ('Mean', '@mean{$0.00a}')])
f = figure(title='Mean Itemized Deduction Total Among Itemizers',
           x_axis_label='Wage Percentile', tools=[hover, 'save', 'reset', 'zoom_in', 'zoom_out'])
f.line('xaxis', 'mean', legend='New', source=item_cds_new, line_width=2)
f.line('xaxis', 'mean', legend='Current', color='red', source=item_cds_cur, line_width=2)
f.legend.location = 'top_left'
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
show(f)

# Tax Liability by Year

In [21]:
liabilities_current = {'Income Tax': [],
                       'Payroll Tax': [],
                       'Combined Tax': [],
                       'source': []}
liabilities_new = {'Income Tax': [],
                   'Payroll Tax': [],
                   'Combined Tax': [],
                   'source': []}
years = []
for year in range(2015, 2028):
    years.append(year)
    calc.advance_to_year(year)
    calc_new.advance_to_year(year)
    calc.calc_all()
    calc_new.calc_all()
    liabilities_current['Income Tax'].append((calc.records.s006 * calc.records.iitax).sum())
    liabilities_current['Payroll Tax'].append((calc.records.s006 * calc.records.payrolltax).sum())
    liabilities_current['Combined Tax'].append((calc.records.s006 * calc.records.combined).sum())
    liabilities_new['Income Tax'].append((calc_new.records.s006 * calc_new.records.iitax).sum())
    liabilities_new['Payroll Tax'].append((calc_new.records.s006 * calc_new.records.payrolltax).sum())
    liabilities_new['Combined Tax'].append((calc_new.records.s006 * calc_new.records.combined).sum())
    liabilities_current['source'].append('Current')
    liabilities_new['source'].append('New')
liabilities_new['years'] = years
liabilities_current['years'] = years

In [22]:
# column data sources for plotting
liabilities_cds_new = ColumnDataSource(liabilities_new)
liabilities_cds_cur = ColumnDataSource(liabilities_current)

In [33]:
f = figure(title='Tax Liabilities 2015-2027')
f.width = 800
f.height = 500
f.line('years', 'Income Tax', color='blue', line_width=2, legend='Income-Current',
       source=liabilities_cds_cur)
f.line('years', 'Income Tax', color='cyan', line_width=2, legend='Income-New',
       source=liabilities_cds_new)
f.line('years', 'Payroll Tax', color='purple', line_width=2, legend='Payroll-Current',
       source=liabilities_cds_cur)
f.line('years', 'Payroll Tax', color='magenta', line_width=2, legend='Payroll-New',
       source=liabilities_cds_new)
f.line('years', 'Combined Tax', color='green', line_width=2, legend='Combined-Current',
       source=liabilities_cds_cur)
f.line('years', 'Combined Tax', color='lime', line_width=2, legend='Combined-New',
       source=liabilities_cds_new)
f.legend.location='top_left'
f.xaxis[0].ticker.desired_num_ticks = 12
f.xaxis.minor_tick_line_color = None
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
c1 = f.circle('years', 'Income Tax', color='blue', size=5, legend='Income-Current',
              source=liabilities_cds_cur)
c2 = f.circle('years', 'Income Tax', color='cyan', size=5, legend='Income-New',
              source=liabilities_cds_new)
c3 = f.circle('years', 'Payroll Tax', color='purple', size=5, legend='Payroll-Current',
              source=liabilities_cds_cur)
c4 = f.circle('years', 'Payroll Tax', color='magenta', size=5, legend='Payroll-New',
              source=liabilities_cds_new)
c5 = f.circle('years', 'Combined Tax', color='green', size=5, legend='Combined-Current',
              source=liabilities_cds_cur)
c6 = f.circle('years', 'Combined Tax', color='lime', size=5, legend='Combined-New',
              source=liabilities_cds_new)
hover = HoverTool(tooltips=[('Calculator', '@source'), ('Year', '@years'),
                            ('Income Tax', '$y{$0.00a}')],
                  renderers=[c1, c2])
hover2 = HoverTool(tooltips=[('Calculator', '@source'), ('Year', '@years'),
                             ('Payroll Tax', '$y{$0.00a}')],
                  renderers=[c3, c4])
hover3 =HoverTool(tooltips=[('Calculator', '@source'), ('Year', '@years'),
                            ('Combined Tax', '$y{$0.00a}')],
                  renderers=[c5, c6])
f.add_tools(hover, hover2, hover3)

show(f)

In [24]:
ldf_current = pd.DataFrame(liabilities_current, index=years)
ldf_current.drop(['source', 'years'], axis=1, inplace=True)
print 'Current Tax Liabilities by Year'
ldf_current

Current Tax Liabilities by Year


Unnamed: 0,Combined Tax,Income Tax,Payroll Tax
2015,2580952229934.0,1580105276837.2,1000846953096.9
2016,2652676860126.7,1615051720045.1,1037625140081.5
2017,2813546326543.2,1720999668174.9,1092546658368.3
2018,2937654024096.4,1795828233432.2,1141825790664.1
2019,3045538636999.4,1858467869104.0,1187070767895.5
2020,3154058759275.7,1924670545585.3,1229388213690.6
2021,3273926547197.1,1999801411738.6,1274125135458.4
2022,3409445728811.7,2086327698559.6,1323118030252.1
2023,3556676832883.3,2181320492211.6,1375356340671.6
2024,3711096056404.3,2281099251848.1,1429996804556.1


In [25]:
ldf_new = pd.DataFrame(liabilities_new, index=years)
ldf_new.drop(['source', 'years'], axis=1, inplace=True)
print 'New Tax Liabilities by Year'
ldf_new

New Tax Liabilities by Year


Unnamed: 0,Combined Tax,Income Tax,Payroll Tax
2015,2498372690086.6,1493015947278.0,1005356742808.5
2016,2589256374128.5,1535733054777.4,1053523319351.2
2017,2730738944355.0,1628628096082.2,1102110848272.7
2018,2853447823914.0,1702036958664.4,1151410865249.6
2019,2960726425817.8,1763895223993.6,1196831201824.3
2020,3068168620013.5,1828973913136.6,1239194706876.9
2021,3186237976179.1,1901970360621.8,1284267615557.3
2022,3318808407418.8,1984984088551.5,1333824318867.2
2023,3463241439470.8,2076790707540.5,1386450731930.3
2024,3613590295401.5,2172222457276.8,1441367838124.7


In [26]:
print 'Difference in Tax Liabilities by Year'
diff_df = ldf_new - ldf_current
diff_df = diff_df.append(diff_df.sum(), ignore_index=True)
years.append('Total')
diff_df.index = years
diff_df

Difference in Tax Liabilities by Year


Unnamed: 0,Combined Tax,Income Tax,Payroll Tax
2015,-82579539847.4,-87089329559.2,4509789711.7
2016,-63420485998.1,-79318665267.7,15898179269.6
2017,-82807382188.3,-92371572092.7,9564189904.4
2018,-84206200182.3,-93791274767.8,9585074585.5
2019,-84812211181.5,-94572645110.4,9760433928.8
2020,-85890139262.3,-95696632448.7,9806493186.3
2021,-87688571017.9,-97831051116.8,10142480098.9
2022,-90637321393.0,-101343610008.1,10706288615.1
2023,-93435393412.5,-104529784671.0,11094391258.6
2024,-97505761002.8,-108876794571.3,11371033568.5


In [27]:
print 'Pct. Change in Tax Liabilities by Year'
((ldf_new - ldf_current) / ldf_current) * 100

Pct. Change in Tax Liabilities by Year


Unnamed: 0,Combined Tax,Income Tax,Payroll Tax
2015,-3.2,-5.5,0.5
2016,-2.4,-4.9,1.5
2017,-2.9,-5.4,0.9
2018,-2.9,-5.2,0.8
2019,-2.8,-5.1,0.8
2020,-2.7,-5.0,0.8
2021,-2.7,-4.9,0.8
2022,-2.7,-4.9,0.8
2023,-2.6,-4.8,0.8
2024,-2.6,-4.8,0.8


# Income Distribution

In [29]:
calc_dist = Calculator(records=Records(), policy=Policy())
calc_dist.advance_to_year(2014)
calc_dist.calc_all()

# new calculator
growfd = Growfactors('../taxdata/stage1/growfactors.csv')
red = Records(puf, gfactors=growfd,
              weights=wt, adjust_ratios=adj,
              start_year=2012)
pod = Policy(gfactors=growfd, start_year=2012, num_years=16)
consumpd = Consumption(start_year=2012, num_years=16)
behaved = Behavior(start_year=2012, num_years=16)
calc_dist_new = Calculator(records=red, policy=pod, consumption=consumpd, behavior=behaved)
calc_dist_new.advance_to_year(2014)
calc_dist_new.calc_all()

You loaded data for 2009.
Tax-Calculator startup automatically extrapolated your data to 2013.
You loaded data for 2012.
Tax-Calculator startup automatically extrapolated your data to 2012.


In [45]:
income_df = pd.DataFrame({'c00100': calc_dist.records.c00100,
                          's006': calc_dist.records.s006})
income_df_new = pd.DataFrame({'c00100': calc_dist_new.records.c00100,
                              's006': calc_dist_new.records.s006})
mean_inc = percentile(income_df, 'c00100', 100, 'c00100', 's006')
mean_inc_new = percentile(income_df_new, 'c00100', 100, 'c00100', 's006')
income_cds = ColumnDataSource({'mean': mean_inc,
                               'percentile': item_index,
                               'source': ['Current'] * len(mean_inc)})
income_cds_new = ColumnDataSource({'mean': mean_inc_new,
                                   'percentile': item_index,
                                   'source': ['New'] * len(mean_inc_new)})

In [46]:
f = figure(title='Average AGI by AGI Percentile - 2014')
f.line(x='percentile', y='mean', source=income_cds, legend='Current', line_width=2)
f.line(x='percentile', y='mean', color='red', source=income_cds_new, legend='New', line_width=2)
f.legend.location='top_left'
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
hover = HoverTool(tooltips=[('Percentile', '@percentile'), ('Avg. AGI', '@mean{$0.00a}'),
                            ('Calculator', '@source')])
f.add_tools(hover)
show(f)