In [96]:
from taxcalc import *
from taxcalc.utils import *
from bokeh.io import show, output_notebook
from bkcharts import Bar, Scatter
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import HoverTool
from collections import OrderedDict
import copy
import pandas as pd
from notebookfunctions import distribution, index_list, percentile
output_notebook()

In [97]:
# Data from IRS-SOI Tax Stats. Used for comparison
soi_stats = pd.read_csv('soi_stats.csv', index_col=0)  # Equivalent to tax-calc diagnostic table
soi_income = pd.read_csv('soi_income_stats.csv')  # Distribution of income items
soi_deductions = pd.read_csv('soi_deductions.csv', index_col=0)  # Itemized deductions
soi_deductions['index'] = soi_deductions.index

In [98]:
# Read in new PUF and associated data
gf = pd.read_csv('../taxdata/stage1/growfactors.csv')
wt = pd.read_csv('../taxdata/puf_stage2/puf_weights.csv')
puf = pd.read_csv('../taxdata/puf_data/puf.csv')
adj = pd.read_csv('../taxdata/puf_stage3/puf_ratios.csv', index_col=0)
adj = adj.transpose()

In [99]:
# base calculator
calc = Calculator(records=Records(), policy = Policy())
calc.advance_to_year(2014)
calc.calc_all()

You loaded data for 2009.
Tax-Calculator startup automatically extrapolated your data to 2013.


In [100]:
# new calculator
growf = Growfactors('../taxdata/stage1/growfactors.csv')
rec = Records(puf, gfactors=growf,
              weights=wt, adjust_ratios=adj,
              start_year=2011)
pol = Policy(gfactors=growf, start_year=2011, num_years=14)
calc_new = Calculator(records=rec, policy=pol)
calc_new.advance_to_year(2014)
calc_new.calc_all()

You loaded data for 2011.
Tax-Calculator startup automatically extrapolated your data to 2011.


### New Distribution Table 

In [101]:
create_distribution_table(calc_new.records, groupby='weighted_deciles',
                          result_type='weighted_sum')

Unnamed: 0,s006,c00100,num_returns_StandardDed,standard,num_returns_ItemDed,c04470,c04600,c04800,taxbc,c62100,num_returns_AMT,c09600,c05800,c07100,othertaxes,refund,iitax,payrolltax,combined
0,16129231,-49911159980,11305684,97533652415,8861,233087917,67786943015,642748270,41944860,-54479433156,0,0,41944860,6539822,72907214,2698051710,-2589739458,5682897776,3093158318
1,16129740,104765578263,13619941,116100538963,54524,481622406,83052706050,5009063879,472518329,104446429093,12759,5676625,478194954,10376250,70092933,13743168142,-13205256505,14406540420,1201283914
2,16129552,167404651473,13206972,123899834716,322359,3442830246,107854657542,24977036357,2441549270,164997974121,29278,25793676,2467342946,349806271,101892033,26059440944,-23840012236,23411380902,-428631333
3,16129504,260160354647,14217052,126629762321,761920,9922608835,112484797970,71457295454,7531098634,253797238952,14832,10551290,7541649924,1457387204,200176715,23445808367,-17161368932,35465759301,18304390369
4,16128810,375827812248,13932164,127941410420,1713843,22558134655,117256263390,147683483858,17073308780,361557615494,20925,46085492,17119394273,3471442471,292770625,17336150327,-3395427901,48454136218,45058708317
5,16130563,538816769736,12882519,122569448940,3160279,48163224449,119531700471,273025835940,33469641341,509009321773,17994,32561428,33502202768,5621901761,389602586,8655056626,19614846968,66393180387,86008027355
6,16130166,751651584306,10974087,115146346856,5120086,85900560172,131100262358,428657023500,55299867807,698794346772,9749,35965590,55335833396,8256004995,614301995,2957885514,44736244882,88733251421,133469496302
7,16129312,1093410129928,8634853,97801958202,7489395,140071234525,146093975016,711241490530,100854756104,1011109876894,29463,56993861,100911749965,11325987608,954904610,1334598494,89206068473,126243853335,215449921809
8,16129969,1636513956097,5471183,67031176644,10656121,233226674814,168817157199,1168481089654,178241406249,1504809841682,119805,247027311,178488433561,14185363870,1508619748,1452760002,164358929437,200693019381,365051948818
9,16129753,4950981616724,1808472,22272117859,14311735,614359537265,160971180372,4158470560204,1033109235367,4674682041547,3985539,33703919971,1066813155338,26258488080,29452538124,868256058,1069138949324,348881085870,1418020035194


### Current Distribution Table

In [102]:
create_distribution_table(calc.records, groupby='weighted_deciles',
                          result_type='weighted_sum')

Unnamed: 0,s006,c00100,num_returns_StandardDed,standard,num_returns_ItemDed,c04470,c04600,c04800,taxbc,c62100,num_returns_AMT,c09600,c05800,c07100,othertaxes,refund,iitax,payrolltax,combined
0,16222114,-117642730397,11094216,96752559176,6150,288416463,67077228183,707132509,58326738,-123301156818,0,0,58326738,6534866,104114514,2284974248,-2129067863,5733573002,3604505139
1,16220773,97580036362,13627903,114461983569,48948,470314529,79698496354,4249129344,377930779,97159272826,20950,6253106,384183885,5960746,65791448,12562769796,-12118755208,13318269683,1199514475
2,16223499,163827095125,13243287,123426363072,279753,2913092844,105332431878,24036354174,2354677750,161806454526,27189,13887837,2368565587,313771658,97248587,26763187081,-24611144564,23161721038,-1449423526
3,16222202,245272919379,14169419,126314754873,683754,8892476658,110668233920,64748499687,6811152285,239558900407,31485,28990830,6840143115,1328461112,206490156,23677789679,-17959617520,33072495795,15112878275
4,16223284,364054702131,14142144,128509105833,1516658,20886584071,116603561906,140621774305,16213987830,351180554668,18616,37936750,16251924580,3522446457,271705398,17305696557,-4304513035,46731627548,42427114513
5,16222555,528263202071,13206631,124699660349,2863096,43161694001,119056478172,268185132150,32723181029,502672002166,15840,38879146,32762060176,5877778155,424731110,8886777131,18422235999,64330473025,82752709024
6,16222223,753980159239,11339850,114639366799,4849894,81691267946,125467524211,441941009969,57961249369,705123893445,10617,31844100,57993093469,8203321547,682348690,2662975603,47809145009,86561378986,134370523995
7,16222702,1099608620029,9028824,98667637348,7185021,132308581642,139089019394,731605500376,105755595393,1025663100136,46267,76698684,105832294078,11040474218,1001666943,1292077528,94501409274,125044925483,219546334756
8,16222104,1668257638581,5871767,69882644295,10349258,222733967624,164524641207,1212247826461,189826035295,1550047600623,184541,295056578,190121091873,14427205045,1617792894,1544499976,175767179745,202436924101,378204103847
9,16223164,5218313951446,2019136,24281286951,14190613,591800540343,155421760341,4453457287124,1122072416954,4975981120241,4448053,38148425391,1160220842344,29020977897,34686229698,884181608,1165001912538,356209808064,1521211720602


### Diagnostic Table Comparison

In [103]:
new_diag = create_diagnostic_table(calc_new)
diag = create_diagnostic_table(calc)  # Current PUF

In [104]:
diag_data = pd.DataFrame()
diag_data['SOI'] = soi_stats['Value']
diag_data['New'] = new_diag[2014]
diag_data['Current'] = diag[2014]
diag_data['% Change'] = ((new_diag[2014] / diag[2014]) - 1) * 100
diag_data['New - SOI'] = map(abs, new_diag[2014] - diag_data['SOI'])
diag_data['Current - SOI'] = map(abs, diag[2014] - diag_data['SOI'])
diag_data

Unnamed: 0,SOI,New,Current,% Change,New - SOI,Current - SOI
Returns (#m),148.6,161.3,162.2,-0.6,12.7,13.6
AGI ($b),9771.0,9829.6,10021.5,-1.9,58.6,250.5
Itemizers (#m),44.0,43.6,42.0,3.9,0.4,2.0
Itemized Deduction ($b),1206.7,1158.4,1105.2,4.8,48.3,101.5
Standard Deduction Filers (#m),117.4,106.1,107.7,-1.6,11.3,9.7
Standard Deduction ($b),876.2,926.4,925.2,0.1,50.2,49.0
Personal Exemption ($b),1121.6,1149.8,1114.0,3.2,28.2,7.6
Taxable Income ($b),6997.9,6989.6,7341.8,-4.8,8.3,343.9
Regular Tax ($b),,1428.5,1534.2,-6.9,,
AMT Income ($b),,9228.7,9485.9,-2.7,,


### Income Levels 

In [105]:
inc_dict = OrderedDict()
inc_dict['New'] = [] 
inc_dict['Current'] = []
inc_dict['SOI'] = []
inc_dict['Pct Diff'] = []
inc_dict['New - SOI'] = []
inc_dict['Current - SOI'] = []
inc_list = ['WAS', 'Taxable Interest', 'Ordinary Dividends', 'Qualified Dividends', 'Business Income']
# Wage and salary
was_new = (calc_new.records.e00200 * calc_new.records.s006).sum()
inc_dict['New'].append(was_new)
was = (calc.records.e00200 * calc.records.s006).sum()
inc_dict['Current'].append(was)
soi_was = soi_income['WAS'].sum()
inc_dict['SOI'].append(soi_was)
new_was_diff = abs(was_new - soi_was)
inc_dict['New - SOI'].append(new_was_diff)
curr_was_diff = abs(was - soi_was)
inc_dict['Current - SOI'].append(curr_was_diff)
inc_dict['Pct Diff'].append((was_diff / was) * 100)
# Interest income
int_new = (calc_new.records.e00300 * calc_new.records.s006).sum()
inc_dict['New'].append(int_new)
int_ = (calc.records.e00300 * calc.records.s006).sum()
inc_dict['Current'].append(int_)
int_soi = soi_income['INT'].sum()
inc_dict['SOI'].append(int_soi)
new_int_diff = abs(int_new - int_soi)
inc_dict['New - SOI'].append(new_int_diff)
curr_int_diff = abs(int_ - int_soi)
inc_dict['Current - SOI'].append(curr_int_diff)
inc_dict['Pct Diff'].append((int_diff / int_) * 100)
# Ordinary dividends
odiv_new = (calc_new.records.e00600 * calc_new.records.s006).sum()
inc_dict['New'].append(odiv_new)
odiv = (calc.records.e00600 * calc.records.s006).sum()
inc_dict['Current'].append(odiv)
odiv_soi = soi_income['ODIV'].sum()
inc_dict['SOI'].append(odiv_soi)
new_odiv_diff = abs(odiv_new - odiv_soi)
inc_dict['New - SOI'].append(new_odiv_diff)
curr_odiv_diff = abs(odiv - odiv_soi)
inc_dict['Current - SOI'].append(curr_odiv_diff)
inc_dict['Pct Diff'].append((odiv_diff / odiv) * 100)
# Qualified dividends
qdiv_new = (calc_new.records.e00650 * calc_new.records.s006).sum()
inc_dict['New'].append(qdiv_new)
qdiv = (calc.records.e00650 * calc.records.s006).sum()
inc_dict['Current'].append(qdiv)
qdiv_soi = soi_income['QDIV'].sum()
inc_dict['SOI'].append(qdiv_soi)
new_qdiv_diff = abs(qdiv_new - qdiv_soi)
inc_dict['New - SOI'].append(new_qdiv_diff)
curr_qdiv_diff = abs(qdiv - qdiv_soi)
inc_dict['Current - SOI'].append(curr_qdiv_diff)
inc_dict['Pct Diff'].append((qdiv_diff / qdiv) * 100)

biz_new = (calc_new.records.e00900 * calc_new.records.s006).sum()
inc_dict['New'].append(biz_new)
biz = (calc.records.e00900 * calc.records.s006).sum()
inc_dict['Current'].append(biz)
biz_soi = soi_income['BIZ'].sum()
inc_dict['SOI'].append(biz_soi)
new_biz_diff = abs(biz_new - biz_soi)
inc_dict['New - SOI'].append(new_biz_diff)
curr_biz_dif = abs(biz - biz_soi)
inc_dict['Current - SOI'].append(curr_biz_dif)
inc_dict['Pct Diff'].append((biz_diff / biz) * 100)

inc_df = pd.DataFrame.from_dict(inc_dict)
inc_df.index = inc_list
print "'Pct Diff' represents the percentage change between the current and new sums"
inc_df

'Pct Diff' represents the percentage change between the current and new sums


Unnamed: 0,New,Current,SOI,Pct Diff,New - SOI,Current - SOI
WAS,6784980284114.8,6820912371695.9,6784942966000.0,-0.5,37318114.8,35969405695.9
Taxable Interest,104562327635.2,97246196658.4,93894281000.0,7.5,10668046635.2,3351915658.4
Ordinary Dividends,254707967594.8,270137086776.2,254702232000.0,-5.7,5735594.8,15434854776.2
Qualified Dividends,192856835660.6,198124994913.5,192447708000.0,-2.7,409127660.6,5677286913.5
Business Income,317256458442.1,309444223964.1,317258765000.0,2.5,2306557.9,7814541035.9


### Distribution of Income Variables

In [106]:
# Generate data for distribution plots
new_dist = pd.DataFrame()
cur_dist = pd.DataFrame()
new_was = distribution(calc_new.records.e00200, calc_new.records.s006, calc_new.records.c00100)
cur_was = distribution(calc.records.e00200, calc.records.s006, calc.records.c00100)
new_int = distribution(calc_new.records.e00300, calc_new.records.s006, calc_new.records.c00100)
cur_int = distribution(calc.records.e00300, calc.records.s006, calc.records.c00100)
new_odiv = distribution(calc_new.records.e00600, calc_new.records.s006, calc_new.records.c00100)
cur_odiv = distribution(calc.records.e00600, calc.records.s006, calc.records.c00100)
new_qdiv = distribution(calc_new.records.e00650, calc_new.records.s006, calc_new.records.c00100)
cur_qdiv = distribution(calc.records.e00650, calc.records.s006, calc.records.c00100)
new_biz = distribution(calc_new.records.e00900, calc_new.records.s006, calc_new.records.c00100)
cur_biz = distribution(calc.records.e00900, calc.records.s006, calc.records.c00100)
new_dist['WAS'] = new_was[1]
cur_dist['WAS'] = cur_was[1]
new_dist['INT'] = new_int[1]
cur_dist['INT'] = cur_int[1]
new_dist['ODIV'] = new_odiv[1]
cur_dist['ODIV'] = cur_odiv[1]
new_dist['QDIV'] = new_qdiv[1]
cur_dist['QDIV'] = cur_qdiv[1]
new_dist['BIZ'] = new_biz[1]
cur_dist['BIZ'] = cur_biz[1]
new_dist['AGI Bin'] = index_list()
cur_dist['AGI Bin'] = index_list()
new_dist['label'] = 'New'
cur_dist['label'] = 'Current'
# Create scatter plot objects
items_tups = [('WAS', 'WAS'), ('INT', 'Interest Income'), ('ODIV', 'Ordinary Dividends'),
              ('QDIV', 'Qualified Dividends'), ('BIZ', 'Business Income')]
soi_dist = pd.DataFrame()
for item in items_tups:
    soi_dist[item[0]] = (soi_income[item[0]] / soi_income[item[0]].sum()) * 100
soi_dist['AGI Bin'] = index_list()
soi_dist['label'] = 'SOI'
scatter_data = pd.concat([new_dist, cur_dist, soi_dist])
scatter_list = list()  # list for scatter plot objects
for item in items_tups:
    title = 'Percent of Total {} by AGI Bin'.format(item[1])
    scatter = Scatter(scatter_data, x='AGI Bin', y=item[0], color='label', ylabel='Percent',
                      title=title, tooltips=[('PCT', '@{}'.format(item[0]))])
    scatter_list.append(scatter)

In [107]:
show(column(scatter_list))

In [108]:
new_tot = pd.DataFrame()
cur_tot = pd.DataFrame()
new_tot['WAS'] = new_was[0]
cur_tot['WAS'] = cur_was[0]
new_tot['INT'] = new_int[0]
cur_tot['INT'] = cur_int[0]
new_tot['ODIV'] = new_odiv[0]
cur_tot['ODIV'] = cur_odiv[0]
new_tot['QDIV'] = new_qdiv[0]
cur_tot['QDIV'] = cur_qdiv[0]
new_tot['BIZ'] = new_biz[0]
cur_tot['BIZ'] = cur_biz[0]
new_tot['AGI Bin'] = index_list()
cur_tot['AGI Bin'] = index_list()
new_tot['label'] = 'New'
cur_tot['label'] = 'Current'
soi_income['AGI Bin'] = index_list()
soi_income['label'] = 'SOI'
total_data = pd.concat([new_tot, cur_tot, soi_income])
# Create scatter plot objects
items_tups = [('WAS', 'WAS'), ('INT', 'Interest Income'), ('ODIV', 'Ordinary Dividends'),
              ('QDIV', 'Qualified Dividends'), ('BIZ', 'Business Income')]
total_list = list()  # list for scatter plot objects
for item in items_tups:
    title = 'Total {} by AGI Bin'.format(item[1])
    scatter = Scatter(total_data, x='AGI Bin', y=item[0], color='label', ylabel='Total',
                      title=title, tooltips=[('Total', '@{}'.format(item[0]))])
    total_list.append(scatter)

In [109]:
show(column(total_list))

### Itemized Deduction Amounts

In [110]:
deductions_new = {'Medical Expenses': (calc_new.records.e17500[calc_new.records.c04470 > 0] *
                                        calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'State and Local Taxes':  (calc_new.records.e18400[calc_new.records.c04470 > 0] *
                                              calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Real Estate Taxes':  (calc_new.records.e18500[calc_new.records.c04470 > 0] *
                                          calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Interest Paid':  (calc_new.records.e19200[calc_new.records.c04470 > 0] *
                                       calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Charitable Cash Contributions': (calc_new.records.e19800[calc_new.records.c04470 > 0] *
                                                     calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Charitable Non-Cash Contributions': (calc_new.records.e20100[calc_new.records.c04470 > 0] *
                                                         calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Total Misc. Expenses':  (calc_new.records.e20400[calc_new.records.c04470 > 0] *
                                             calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Net Casualty or Loss': (calc_new.records.g20500[calc_new.records.c04470 > 0] *
                                            calc_new.records.s006[calc_new.records.c04470 > 0]).sum()}
ded_new_df = pd.DataFrame.from_dict(deductions_new, 'index')
ded_new_df.columns = ['Total']
ded_new_df['source'] = 'New'

deductions = {'Medical Expenses': (calc.records.e17500[calc.records.c04470 > 0] *
                                   calc.records.s006[calc.records.c04470 > 0]).sum(),
              'State and Local Taxes':  (calc.records.e18400[calc.records.c04470 > 0] *
                                         calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Real Estate Taxes':  (calc.records.e18500[calc.records.c04470 > 0] *
                                     calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Interest Paid':  (calc.records.e19200[calc.records.c04470 > 0] *
                                 calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Charitable Cash Contributions': (calc.records.e19800[calc.records.c04470 > 0] *
                                                calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Charitable Non-Cash Contributions': (calc.records.e20100[calc.records.c04470 > 0] *
                                                    calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Total Misc. Expenses':  (calc.records.e20400[calc.records.c04470 > 0] *
                                        calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Net Casualty or Loss': (calc.records.g20500[calc.records.c04470 > 0] *
                                       calc.records.s006[calc.records.c04470 > 0]).sum()}
ded_df = pd.DataFrame.from_dict(deductions, 'index')
ded_df.columns = ['Total']
ded_df['source'] = 'Current'
soi_deductions['source'] = 'SOI'

ded_full_df = pd.concat([ded_new_df, ded_df, soi_deductions])
ded_full_df['index'] = ded_full_df.index

In [111]:
ded_bar = Bar(ded_full_df, 'index', 'Total', group='source', title='Itemized Deduction Totals',
              xlabel='Deduction', ylabel='Total', tooltips=[('Deduction', '@index'), ('Total', '@height{0.2f}'),
                                                            ('Data', '@source')])
show(ded_bar)

In [117]:
ded_error_df = pd.DataFrame()
ded_error_df['Difference: New'] = ded_new_df['Total'] - soi_deductions['Total']
ded_error_df['% Difference: New'] = 100 * ded_error_df['Difference: New'] / soi_deductions['Total']
ded_error_df['Difference: Current'] = ded_df['Total'] - soi_deductions['Total']
ded_error_df['% Difference: Current'] = 100 * ded_error_df['Difference: Current'] / soi_deductions['Total']
print 'Error in Itemized Deductions Relative to SOI Totals'
ded_error_df

Error in Itemized Deductions Relative to SOI Totals


Unnamed: 0,Difference: New,% Difference: New,Difference: Current,% Difference: Current
Charitable Cash Contributions,-5987233855.7,-3.9,-4703427583.1,-3.0
Charitable Non-Cash Contributions,-30160002722.4,-46.2,-31555102263.5,-48.3
Interest Paid,60086643754.4,19.5,-7489448194.8,-2.4
Medical Expenses,-17629204361.4,-13.7,-9779156210.6,-7.6
Net Casualty or Loss,1757990563.6,79.8,2251393716.6,102.1
Real Estate Taxes,11388573128.2,6.3,17247882146.1,9.5
State and Local Taxes,-18564889867.9,-5.7,-16248434290.2,-4.9
Total Misc. Expenses,1167649238.6,0.9,3957677605.4,3.1


In [113]:
# Create DataFrame subset of calculator records for the new weights
item_df_new = pd.DataFrame({'e00200': calc_new.records.e00200,
                             's006': calc_new.records.s006,
                             'c04470': calc_new.records.c04470})
item_df_new['itemizer'] = np.where(calc_new.records.c04470 > 0, 1, 0)
item_participation_rt_new = percentile(item_df_new, 'itemizer', 100, 'e00200', 's006')
# Create DataFrame subset for current weights
item_df = pd.DataFrame({'e00200': calc.records.e00200,
                        's006': calc.records.s006,
                        'c04470': calc.records.c04470})
item_df['itemizer'] = np.where(calc.records.c04470 > 0, 1, 0)
item_participation_rt = percentile(item_df, 'itemizer', 100, 'e00200', 's006')
item_index = item_participation_rt_new.index

In [114]:
hover_ded = HoverTool(tooltips=[('Percentile', '$index'), ('Itemizing Rate', '$x')])
f = figure(title='Itemizing Rate by Wage Percentile',
           x_axis_label='Wage Percentile', tools=[hover_ded, 'save', 'reset', 'zoom_in', 'zoom_out'])
f.line(item_index, item_participation_rt_new, legend='new')
f.line(item_index, item_participation_rt, legend='Current', color='red')
f.legend.location = 'top_left'
show(f)

In [115]:
# Average Itemized Deduction by percentile
itemizers_new = copy.deepcopy(item_df_new[item_df_new['c04470'] > 0])
item_mean_new = percentile(itemizers_new, 'c04470', 100, 'e00200', 's006')
itemizers = copy.deepcopy(item_df[item_df['c04470'] > 0])
item_mean = percentile(itemizers, 'c04470', 100, 'e00200', 's006')

In [116]:
hover = HoverTool(tooltips=[('Percentile', '$index'), ('Mean', '$y')])
f = figure(title='Mean Itemized Deduction Total Among Itemizers',
           x_axis_label='Wage Percentile', tools=[hover, 'save', 'reset', 'zoom_in', 'zoom_out'])
f.line(item_index, item_mean_new, legend='new')
f.line(item_index, item_mean, legend='Current', color='red')
f.legend.location = 'top_left'
show(f)