[Rendered Notebook](http://nbviewer.jupyter.org/github/andersonfrailey/Notebook-Uploads/blob/master/2011%20PUF%20Comparisons.ipynb)

In [1]:
from taxcalc import *
from taxcalc.utils import *
from bokeh.io import show, output_notebook
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, NumeralTickFormatter
from collections import OrderedDict
import copy
import pandas as pd
from notebookfunctions import distribution, index_list, percentile
output_notebook()

In [2]:
# Data from IRS-SOI Tax Stats. Used for comparison
soi_stats = pd.read_csv('soi_stats.csv', index_col=0)  # Equivalent to tax-calc diagnostic table
soi_income = pd.read_csv('soi_income_stats.csv')  # Distribution of income items
soi_deductions = pd.read_csv('soi_deductions.csv', index_col=0)  # Itemized deductions
soi_deductions['index'] = soi_deductions.index

In [3]:
# Read in new PUF and associated data
gf = pd.read_csv('../taxdata/stage1/growfactors.csv')
wt = pd.read_csv('../taxdata/puf_stage2/puf_weights.csv')
puf = pd.read_csv('../taxdata/puf_data/puf.csv')
adj = pd.read_csv('../taxdata/puf_stage3/puf_ratios.csv', index_col=0)
adj = adj.transpose()

In [4]:
# base calculator
calc = Calculator(records=Records(), policy = Policy())
calc.advance_to_year(2014)
calc.calc_all()

You loaded data for 2009.
Tax-Calculator startup automatically extrapolated your data to 2013.


In [5]:
# new calculator
growf = Growfactors('../taxdata/stage1/growfactors.csv')
rec = Records(puf, gfactors=growf,
              weights=wt, adjust_ratios=adj,
              start_year=2011)
pol = Policy(gfactors=growf, start_year=2011, num_years=17)
consump = Consumption(start_year=2011, num_years=17)
behave = Behavior(start_year=2011, num_years=17)
calc_new = Calculator(records=rec, policy=pol, consumption=consump, behavior=behave)
calc_new.advance_to_year(2014)
calc_new.calc_all()

You loaded data for 2011.
Tax-Calculator startup automatically extrapolated your data to 2011.


### New Distribution Table 

In [6]:
create_distribution_table(calc_new.records, groupby='weighted_deciles',
                          result_type='weighted_sum', income_measure='c00100')

Unnamed: 0,s006,c00100,num_returns_StandardDed,standard,num_returns_ItemDed,c04470,c04600,c04800,taxbc,c62100,...,c09600,c05800,c07100,othertaxes,refund,iitax,payrolltax,combined,expanded_income,aftertax_income
0,16206395,-78640573696,5762025,134654847558,0,0,85843986138,0,0,-58517301643,...,3835825677,3835825677,5632648,78904008,381238870,3527858166,2464037908,5991896074,105359863343,99367967269
1,16204819,63704900478,16184778,110537135757,7429,44335364,63476364501,1095738389,50996952,64297777050,...,144908055,195905007,2028821,55974570,3094983317,-2845132561,6956079225,4110946665,145567421853,141456475188
2,16208384,162310710977,16017494,130245405423,189328,2237933212,94470885590,10603976689,991141080,162319359799,...,385863040,1377004120,20044708,74014919,18400302169,-16969327838,18661558791,1692230953,245081398498,243389167545
3,16206132,264859264278,15477886,134228399807,725915,8981904672,121202978022,47727388417,4609600845,259737330732,...,312716646,4922317491,363423227,132988656,32336000629,-27644117709,31968219818,4324102109,348026791689,343702689580
4,16206589,399128538462,14746244,129954265048,1459022,20718230181,124520113634,136265177337,15008210678,387006251093,...,337619915,15345830593,2097498025,251710781,24200492442,-10700449092,50292502365,39592053273,481472013035,441879959762
5,16206774,569048532808,13549493,121512231426,2657056,40492221103,127506448161,282143353077,33357260698,546370099341,...,410295105,33767555803,4931364180,385242568,12869730495,16351703696,72332736462,88684440158,650650708947,561966268790
6,16205573,798285142604,11505025,110061823309,4698186,74638454665,129727285870,485933399820,62173231715,757071310182,...,442907417,62616139132,7341414067,607012613,2585112946,53296624733,98909179970,152205804703,879873401591,727667596888
7,16207577,1125506953168,9011031,97732167310,7196466,129569865956,143532643748,755665190044,108298900500,1056151381051,...,993470896,109292371395,9022247286,937451583,170426009,101037149683,132619724389,233656874072,1219973109344,986316235272
8,16206507,1653409616896,6048471,72927653188,10156454,219351575334,167613149866,1197182539709,184111307240,1538017364547,...,1168857948,185280165188,10724277646,1396883871,28396580,175924374833,198491213210,374415588043,1780534258480,1406118670437
9,16206820,4974371374346,2099057,26073021803,14106665,583915580226,159944988778,4206397290372,1036713782019,4729967105459,...,33925843424,1070639625443,22474998774,29797543141,221326,1077961948484,346058371874,1424020320358,5250304943261,3826284622903


### Current Distribution Table

In [7]:
create_distribution_table(calc.records, groupby='weighted_deciles',
                          result_type='weighted_sum', income_measure='c00100')

Unnamed: 0,s006,c00100,num_returns_StandardDed,standard,num_returns_ItemDed,c04470,c04600,c04800,taxbc,c62100,...,c09600,c05800,c07100,othertaxes,refund,iitax,payrolltax,combined,expanded_income,aftertax_income
0,16220752,-149726787864,3708850,123664286701,0,0,86537994034,0,0,-136818460696,...,3184071522,3184071522,3537606,86953480,906029892,2361457503,3220506246,5581963748,37122742595,31540778847
1,16223564,49153922700,16193482,102703346069,16978,54523814,62800517303,697441193,29889716,49487976905,...,138942679,168832395,469458,25782677,3091156478,-2897010865,5617627163,2720616298,131490747997,128770131699
2,16222306,143134086177,16085102,125770900218,127058,1491389937,88758571182,6585562650,617680780,142336044721,...,76537667,694218447,25304153,95003962,15739892911,-14975974655,15900686677,924712022,227737509498,226812797476
3,16222064,249213176722,15443192,130019629204,778872,10086591886,114942622376,42100726133,4084319153,242729143343,...,237136154,4321455307,570858074,117955756,30601982626,-26733429636,29425255435,2691825799,333707338575,331015512775
4,16223014,382321791610,14706390,127392012174,1515580,23212520833,121756634656,124657921396,13612657115,367965705687,...,197691676,13810348792,2758322364,249777902,25346402224,-14044597895,47356090991,33311493096,464944700633,431633207537
5,16222499,553710452123,13424385,118454408988,2798053,43480770883,124038481638,271695058062,32428593755,527527807021,...,203853087,32632446842,6169999571,448650310,14281999026,12629098555,69731309767,82360408321,634658321968,552297913647
6,16221953,784431401275,11249915,104753187850,4970371,81764884123,125384685878,475188293251,61446967759,738301676883,...,355106663,61802074423,9120660110,718152651,3675361936,49724205028,96133672251,145857877279,867110514253,721252636974
7,16223206,1112500051832,8984526,94791036921,7234590,131122757497,139235241390,748492817636,108830829123,1042072554885,...,586620598,109417449721,11746019250,1047217608,1386607192,97332040887,132393934093,229725974979,1205535207793,975809232814
8,16222053,1667643238695,5892361,69933991672,10327914,222515689982,164570887134,1211402555603,189381889569,1550734759269,...,738344273,190120233842,14421276407,1601565184,1532104925,175768417694,203674650545,379443068239,1793082486885,1413639418647
9,16223209,5228312503191,2015145,24192511419,14207267,591438451680,154909947388,4460206219834,1123569557383,4960693754490,...,33017057760,1156586615144,28979453391,34765659102,873195538,1161499625317,354329788398,1515829413714,5475151525549,3959322111835


### Diagnostic Table Comparison

In [8]:
new_diag = create_diagnostic_table(calc_new)
diag = create_diagnostic_table(calc)  # Current PUF

In [9]:
diag_data = pd.DataFrame()
diag_data['SOI'] = soi_stats['Value']
diag_data['New'] = new_diag[2014]
diag_data['Current'] = diag[2014]
diag_data['% Change'] = ((new_diag[2014] / diag[2014]) - 1) * 100
diag_data['New - SOI'] = map(abs, new_diag[2014] - diag_data['SOI'])
diag_data['Current - SOI'] = map(abs, diag[2014] - diag_data['SOI'])
diag_data

Unnamed: 0,SOI,New,Current,% Change,New - SOI,Current - SOI
Returns (#m),148.6,162.1,162.2,-0.1,13.5,13.6
AGI ($b),9771.0,9932.0,10020.7,-0.9,161.0,249.7
Itemizers (#m),44.0,41.2,42.0,-1.9,2.8,2.0
Itemized Deduction ($b),1206.7,1080.0,1105.2,-2.3,126.7,101.5
Standard Deduction Filers (#m),117.4,110.4,107.7,2.5,7.0,9.7
Standard Deduction ($b),876.2,979.3,925.0,5.9,103.1,48.8
Personal Exemption ($b),1121.6,1159.1,1113.8,4.1,37.5,7.8
Taxable Income ($b),6997.9,7123.0,7341.0,-3.0,125.1,343.1
Regular Tax ($b),,1445.3,1534.0,-5.8,,
AMT Income ($b),,9442.4,9485.0,-0.4,,


### Income Levels 

In [10]:
inc_dict = OrderedDict()
inc_dict['New'] = [] 
inc_dict['Current'] = []
inc_dict['SOI'] = []
inc_dict['Pct Diff'] = []
inc_dict['New - SOI'] = []
inc_dict['Current - SOI'] = []
inc_list = ['WAS', 'Taxable Interest', 'Ordinary Dividends', 'Qualified Dividends', 'Business Income']
# Wage and salary
was_new = (calc_new.records.e00200 * calc_new.records.s006).sum()
inc_dict['New'].append(was_new)
was = (calc.records.e00200 * calc.records.s006).sum()
inc_dict['Current'].append(was)
soi_was = soi_income['WAS'].sum()
inc_dict['SOI'].append(soi_was)
new_was_diff = abs(was_new - soi_was)
inc_dict['New - SOI'].append(new_was_diff)
curr_was_diff = abs(was - soi_was)
inc_dict['Current - SOI'].append(curr_was_diff)
inc_dict['Pct Diff'].append((curr_was_diff / was) * 100)
# Interest income
int_new = (calc_new.records.e00300 * calc_new.records.s006).sum()
inc_dict['New'].append(int_new)
int_ = (calc.records.e00300 * calc.records.s006).sum()
inc_dict['Current'].append(int_)
int_soi = soi_income['INT'].sum()
inc_dict['SOI'].append(int_soi)
new_int_diff = abs(int_new - int_soi)
inc_dict['New - SOI'].append(new_int_diff)
curr_int_diff = abs(int_ - int_soi)
inc_dict['Current - SOI'].append(curr_int_diff)
inc_dict['Pct Diff'].append((curr_int_diff / int_) * 100)
# Ordinary dividends
odiv_new = (calc_new.records.e00600 * calc_new.records.s006).sum()
inc_dict['New'].append(odiv_new)
odiv = (calc.records.e00600 * calc.records.s006).sum()
inc_dict['Current'].append(odiv)
odiv_soi = soi_income['ODIV'].sum()
inc_dict['SOI'].append(odiv_soi)
new_odiv_diff = abs(odiv_new - odiv_soi)
inc_dict['New - SOI'].append(new_odiv_diff)
curr_odiv_diff = abs(odiv - odiv_soi)
inc_dict['Current - SOI'].append(curr_odiv_diff)
inc_dict['Pct Diff'].append((curr_odiv_diff / odiv) * 100)
# Qualified dividends
qdiv_new = (calc_new.records.e00650 * calc_new.records.s006).sum()
inc_dict['New'].append(qdiv_new)
qdiv = (calc.records.e00650 * calc.records.s006).sum()
inc_dict['Current'].append(qdiv)
qdiv_soi = soi_income['QDIV'].sum()
inc_dict['SOI'].append(qdiv_soi)
new_qdiv_diff = abs(qdiv_new - qdiv_soi)
inc_dict['New - SOI'].append(new_qdiv_diff)
curr_qdiv_diff = abs(qdiv - qdiv_soi)
inc_dict['Current - SOI'].append(curr_qdiv_diff)
inc_dict['Pct Diff'].append((curr_qdiv_diff / qdiv) * 100)

biz_new = (calc_new.records.e00900 * calc_new.records.s006).sum()
inc_dict['New'].append(biz_new)
biz = (calc.records.e00900 * calc.records.s006).sum()
inc_dict['Current'].append(biz)
biz_soi = soi_income['BIZ'].sum()
inc_dict['SOI'].append(biz_soi)
new_biz_diff = abs(biz_new - biz_soi)
inc_dict['New - SOI'].append(new_biz_diff)
curr_biz_diff = abs(biz - biz_soi)
inc_dict['Current - SOI'].append(curr_biz_diff)
inc_dict['Pct Diff'].append((curr_biz_diff / biz) * 100)

inc_df = pd.DataFrame.from_dict(inc_dict)
inc_df.index = inc_list
print "'Pct Diff' represents the percentage change between the current and new sums"
inc_df

'Pct Diff' represents the percentage change between the current and new sums


Unnamed: 0,New,Current,SOI,Pct Diff,New - SOI,Current - SOI
WAS,6784950833499.0,6820912371695.9,6785880966000.0,0.5,930132501.0,35031405695.9
Taxable Interest,90425786010.6,97246196658.4,93894281000.0,3.4,3468494989.4,3351915658.4
Ordinary Dividends,254696960387.0,270137086776.2,254702232000.0,5.7,5271613.0,15434854776.2
Qualified Dividends,180830300363.9,198124994913.5,192447708000.0,2.9,11617407636.1,5677286913.5
Business Income,317260010213.3,309444223964.1,317248783000.0,2.5,11227213.3,7804559035.9


### Distribution of Income Variables

In [11]:
# Generate data for distribution plots
new_dist = pd.DataFrame()
cur_dist = pd.DataFrame()
new_was = distribution(calc_new.records.e00200, calc_new.records.s006, calc_new.records.c00100)
cur_was = distribution(calc.records.e00200, calc.records.s006, calc.records.c00100)
new_int = distribution(calc_new.records.e00300, calc_new.records.s006, calc_new.records.c00100)
cur_int = distribution(calc.records.e00300, calc.records.s006, calc.records.c00100)
new_odiv = distribution(calc_new.records.e00600, calc_new.records.s006, calc_new.records.c00100)
cur_odiv = distribution(calc.records.e00600, calc.records.s006, calc.records.c00100)
new_qdiv = distribution(calc_new.records.e00650, calc_new.records.s006, calc_new.records.c00100)
cur_qdiv = distribution(calc.records.e00650, calc.records.s006, calc.records.c00100)
new_biz = distribution(calc_new.records.e00900, calc_new.records.s006, calc_new.records.c00100)
cur_biz = distribution(calc.records.e00900, calc.records.s006, calc.records.c00100)
new_dist['WAS'] = new_was[1]
cur_dist['WAS'] = cur_was[1]
new_dist['INT'] = new_int[1]
cur_dist['INT'] = cur_int[1]
new_dist['ODIV'] = new_odiv[1]
cur_dist['ODIV'] = cur_odiv[1]
new_dist['QDIV'] = new_qdiv[1]
cur_dist['QDIV'] = cur_qdiv[1]
new_dist['BIZ'] = new_biz[1]
cur_dist['BIZ'] = cur_biz[1]
new_dist['AGI Bin'] = index_list()
cur_dist['AGI Bin'] = index_list()
new_dist['label'] = 'New'
cur_dist['label'] = 'Current'
# Create scatter plot objects
items_tups = [('WAS', 'WAS'), ('INT', 'Interest Income'), ('ODIV', 'Ordinary Dividends'),
              ('QDIV', 'Qualified Dividends'), ('BIZ', 'Business Income')]
soi_dist = pd.DataFrame()
for item in items_tups:
    soi_dist[item[0]] = (soi_income[item[0]] / soi_income[item[0]].sum())
soi_dist['AGI Bin'] = index_list()
soi_dist['label'] = 'SOI'
scatter_data = pd.concat([new_dist, cur_dist, soi_dist])
scatter_data_new = ColumnDataSource(scatter_data[scatter_data['label'] == 'New'])
scatter_data_cur = ColumnDataSource(scatter_data[scatter_data['label'] == 'Current'])
scatter_data_soi = ColumnDataSource(scatter_data[scatter_data['label'] == 'SOI'])

figure_list = []
for item in items_tups:
    title = 'Percent of Total {} by AGI Bin'.format(item[1])
    f = figure(title=title, x_range=index_list())
    f.xaxis.major_label_orientation = 45
    f.circle(x='AGI Bin', y=item[0], color='green', size=10, legend='New', alpha=0.5,
             source=scatter_data_new)
    f.circle(x='AGI Bin', y=item[0], color='red', size=10, legend='Current', alpha=0.5,
             source=scatter_data_cur)
    f.circle(x='AGI Bin', y=item[0], color='blue', size=10, legend='SOI', alpha=0.5,
             source=scatter_data_soi)
    f.legend.location = 'top_left'
    f.yaxis[0].formatter = NumeralTickFormatter(format="0.00%")
    hover = HoverTool(tooltips=[('Pct', '@{}%'.format(item[0]))])
    f.add_tools(hover)
    figure_list.append(f)

In [12]:
show(column(figure_list))

In [13]:
new_tot = pd.DataFrame()
cur_tot = pd.DataFrame()
new_tot['WAS'] = new_was[0]
cur_tot['WAS'] = cur_was[0]
new_tot['INT'] = new_int[0]
cur_tot['INT'] = cur_int[0]
new_tot['ODIV'] = new_odiv[0]
cur_tot['ODIV'] = cur_odiv[0]
new_tot['QDIV'] = new_qdiv[0]
cur_tot['QDIV'] = cur_qdiv[0]
new_tot['BIZ'] = new_biz[0]
cur_tot['BIZ'] = cur_biz[0]
new_tot['AGI Bin'] = index_list()
cur_tot['AGI Bin'] = index_list()
new_tot['label'] = 'New'
cur_tot['label'] = 'Current'
soi_income['AGI Bin'] = index_list()
soi_income['label'] = 'SOI'
total_data = pd.concat([new_tot, cur_tot, soi_income])
# Create scatter plot objects
items_tups = [('WAS', 'WAS'), ('INT', 'Interest Income'), ('ODIV', 'Ordinary Dividends'),
              ('QDIV', 'Qualified Dividends'), ('BIZ', 'Business Income')]

total_data_new = ColumnDataSource(total_data[total_data['label'] == 'New'])
total_data_cur = ColumnDataSource(total_data[total_data['label'] == 'Current'])
total_data_soi = ColumnDataSource(total_data[total_data['label'] == 'SOI'])

figure_list = []
for item in items_tups:
    title = 'Total {} by AGI Bin'.format(item[1])
    f = figure(title=title, x_range=index_list())
    f.xaxis.major_label_orientation = 45
    f.circle(x='AGI Bin', y=item[0], color='green', size=10, legend='New', alpha=0.5,
             source=total_data_new)
    f.circle(x='AGI Bin', y=item[0], color='red', size=10, legend='Current', alpha=0.5,
             source=total_data_cur)
    f.circle(x='AGI Bin', y=item[0], color='blue', size=10, legend='SOI', alpha=0.5,
             source=total_data_soi)
    f.legend.location = 'top_left'
    f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
    hover = HoverTool(tooltips=[('Total', '@{}{}'.format(item[0], '{$0.00a}'))])
    f.add_tools(hover)
    figure_list.append(f)

In [14]:
show(column(figure_list))

### Itemized Deduction Amounts

In [15]:
deductions_new = {'Medical Expenses': (calc_new.records.e17500[calc_new.records.c04470 > 0] *
                                        calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'State and Local Taxes':  (calc_new.records.e18400[calc_new.records.c04470 > 0] *
                                              calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Real Estate Taxes':  (calc_new.records.e18500[calc_new.records.c04470 > 0] *
                                          calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Interest Paid':  (calc_new.records.e19200[calc_new.records.c04470 > 0] *
                                       calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Charitable Cash Contributions': (calc_new.records.e19800[calc_new.records.c04470 > 0] *
                                                     calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Charitable Non-Cash Contributions': (calc_new.records.e20100[calc_new.records.c04470 > 0] *
                                                         calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Total Misc. Expenses':  (calc_new.records.e20400[calc_new.records.c04470 > 0] *
                                             calc_new.records.s006[calc_new.records.c04470 > 0]).sum(),
                   'Net Casualty or Loss': (calc_new.records.g20500[calc_new.records.c04470 > 0] *
                                            calc_new.records.s006[calc_new.records.c04470 > 0]).sum()}
ded_new_df = pd.DataFrame.from_dict(deductions_new, 'index')
ded_new_df.columns = ['Total']
ded_new_df['source'] = 'New'

deductions = {'Medical Expenses': (calc.records.e17500[calc.records.c04470 > 0] *
                                   calc.records.s006[calc.records.c04470 > 0]).sum(),
              'State and Local Taxes':  (calc.records.e18400[calc.records.c04470 > 0] *
                                         calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Real Estate Taxes':  (calc.records.e18500[calc.records.c04470 > 0] *
                                     calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Interest Paid':  (calc.records.e19200[calc.records.c04470 > 0] *
                                 calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Charitable Cash Contributions': (calc.records.e19800[calc.records.c04470 > 0] *
                                                calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Charitable Non-Cash Contributions': (calc.records.e20100[calc.records.c04470 > 0] *
                                                    calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Total Misc. Expenses':  (calc.records.e20400[calc.records.c04470 > 0] *
                                        calc.records.s006[calc.records.c04470 > 0]).sum(),
              'Net Casualty or Loss': (calc.records.g20500[calc.records.c04470 > 0] *
                                       calc.records.s006[calc.records.c04470 > 0]).sum()}
ded_df = pd.DataFrame.from_dict(deductions, 'index')
ded_df.columns = ['Total']
ded_df['source'] = 'Current'
soi_deductions['source'] = 'SOI'

ded_full_df = pd.concat([ded_new_df, ded_df, soi_deductions])

In [16]:
ded_cds_new = ColumnDataSource(ded_full_df[ded_full_df['source'] == 'New'])
ded_cds_new.add(data=[i - .5 for i in range(1, 9)], name='xaxis')
ded_cds_cur = ColumnDataSource(ded_full_df[ded_full_df['source'] == 'Current'])
ded_cds_cur.add(data=[i - .25 for i in range(1, 9)], name='xaxis')
ded_cds_soi = ColumnDataSource(ded_full_df[ded_full_df['source'] == 'SOI'])
ded_cds_soi.add(data=[i - .75 for i in range(1, 9)], name='xaxis')

'xaxis'

In [17]:
xaxis_list = ['Interest Paid', 'Total Misc. Expenses', 'Medical Expenses',
              'Charitable Non-Cash Contributions', 'Real Estate Taxes',
              'State and Local Taxes', 'Charitable Cash Contributions',
              'Net Casualty or Loss']
f = figure(title='Itemized Deduction Totals', x_range=xaxis_list, width=800)
#f = figure(title='Itemized Deduction Totals')
f.xaxis.major_label_orientation = 45
f.vbar(x='xaxis', bottom=0, top='Total', legend='New',
       color='green', alpha=0.5, line_alpha=1,
       width=0.25, source=ded_cds_new)
f.vbar(x='xaxis', bottom=0, top='Total', legend='Current', color='red',
       alpha=0.5, line_alpha=1,
       width=0.25, source=ded_cds_cur)
f.vbar(x='xaxis', bottom=0, top='Total', legend='SOI', color='blue',
       alpha=0.5, line_alpha=1,
       width=0.25, source=ded_cds_soi)
hover = HoverTool(tooltips=[('Total', '@Total{$0.00a}')])
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
f.add_tools(hover)

show(f)

In [18]:
ded_error_df = pd.DataFrame()
ded_error_df['Difference: New'] = ded_new_df['Total'] - soi_deductions['Total']
ded_error_df['% Difference: New'] = 100 * ded_error_df['Difference: New'] / soi_deductions['Total']
ded_error_df['Difference: Current'] = ded_df['Total'] - soi_deductions['Total']
ded_error_df['% Difference: Current'] = 100 * ded_error_df['Difference: Current'] / soi_deductions['Total']
print 'Error in Itemized Deductions Relative to SOI Totals'
ded_error_df

Error in Itemized Deductions Relative to SOI Totals


Unnamed: 0,Difference: New,% Difference: New,Difference: Current,% Difference: Current
Charitable Cash Contributions,-6384396376.2,-4.1,-4697400311.8,-3.0
Charitable Non-Cash Contributions,-27175410720.7,-41.6,-31554076082.7,-48.3
Interest Paid,-16328258245.9,-5.3,-7473328319.6,-2.4
Medical Expenses,-12054421348.8,-9.3,-9751487495.3,-7.6
Net Casualty or Loss,2863801247.1,129.9,2251393716.6,102.1
Real Estate Taxes,2384469754.8,1.3,17200103389.4,9.5
State and Local Taxes,-18636182248.3,-5.7,-16242417254.8,-4.9
Total Misc. Expenses,3753466747.4,3.0,3967640537.5,3.1


In [19]:
# Create DataFrame subset of calculator records for the new weights
item_df_new = pd.DataFrame({'e00200': calc_new.records.e00200,
                             's006': calc_new.records.s006,
                             'c04470': calc_new.records.c04470})
item_df_new['itemizer'] = np.where(calc_new.records.c04470 > 0, 1, 0)
item_participation_rt_new = percentile(item_df_new, 'itemizer', 100, 'e00200', 's006')
# Create DataFrame subset for current weights
item_df = pd.DataFrame({'e00200': calc.records.e00200,
                        's006': calc.records.s006,
                        'c04470': calc.records.c04470})
item_df['itemizer'] = np.where(calc.records.c04470 > 0, 1, 0)
item_participation_rt = percentile(item_df, 'itemizer', 100, 'e00200', 's006')
item_index = item_participation_rt_new.index

# Create column data sources for plotting
item_cds_new = ColumnDataSource({'rate': item_participation_rt_new,
                                 'index': item_participation_rt_new.index})
item_cds_cur = ColumnDataSource({'rate': item_participation_rt,
                                 'index': item_participation_rt_new.index})

In [20]:
hover_ded = HoverTool(tooltips=[('Percentile', '$index'), ('Itemizing Rate', '@rate{0.00%}')])
f = figure(title='Itemizing Rate by Wage Percentile',
           x_axis_label='Wage Percentile', tools=[hover_ded, 'save', 'reset', 'zoom_in', 'zoom_out'])
f.line('index', 'rate', legend='New', source=item_cds_new)
f.line('index', 'rate', legend='Current', color='red', source=item_cds_cur)
f.legend.location = 'top_left'
f.yaxis[0].formatter = NumeralTickFormatter(format='0.00%')
show(f)

In [21]:
# Average Itemized Deduction by percentile
itemizers_new = copy.deepcopy(item_df_new[item_df_new['c04470'] > 0])
item_mean_new = percentile(itemizers_new, 'c04470', 100, 'e00200', 's006')
itemizers = copy.deepcopy(item_df[item_df['c04470'] > 0])
item_mean = percentile(itemizers, 'c04470', 100, 'e00200', 's006')
# Create column data source for plotting
item_cds_new = ColumnDataSource({'mean': item_mean_new,
                                 'xaxis': item_index})
item_cds_cur = ColumnDataSource({'mean': item_mean,
                                 'xaxis': item_index})

In [22]:
hover = HoverTool(tooltips=[('Percentile', '@xaxis'), ('Mean', '@mean{$0.00a}')])
f = figure(title='Mean Itemized Deduction Total Among Itemizers',
           x_axis_label='Wage Percentile', tools=[hover, 'save', 'reset', 'zoom_in', 'zoom_out'])
f.line('xaxis', 'mean', legend='New', source=item_cds_new)
f.line('xaxis', 'mean', legend='Current', color='red', source=item_cds_cur)
f.legend.location = 'top_left'
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
show(f)

### Tax Liability by Year

In [23]:
liabilities_current = {'Income Tax': [],
                       'Payroll Tax': [],
                       'Combined Tax': [],
                       'source': []}
liabilities_new = {'Income Tax': [],
                   'Payroll Tax': [],
                   'Combined Tax': [],
                   'source': []}
years = []
for year in range(2015, 2028):
    years.append(year)
    calc.advance_to_year(year)
    calc_new.advance_to_year(year)
    calc.calc_all()
    calc_new.calc_all()
    liabilities_current['Income Tax'].append((calc.records.s006 * calc.records.iitax).sum())
    liabilities_current['Payroll Tax'].append((calc.records.s006 * calc.records.payrolltax).sum())
    liabilities_current['Combined Tax'].append((calc.records.s006 * calc.records.combined).sum())
    liabilities_new['Income Tax'].append((calc_new.records.s006 * calc_new.records.iitax).sum())
    liabilities_new['Payroll Tax'].append((calc_new.records.s006 * calc_new.records.payrolltax).sum())
    liabilities_new['Combined Tax'].append((calc_new.records.s006 * calc_new.records.combined).sum())
    liabilities_current['source'].append('Current')
    liabilities_new['source'].append('New')
liabilities_new['years'] = years
liabilities_current['years'] = years

In [24]:
# column data sources for plotting
liabilities_cds_new = ColumnDataSource(liabilities_new)
liabilities_cds_cur = ColumnDataSource(liabilities_current)

In [35]:
f = figure(title='Tax Liabilities 2015-2027')
f.width = 800
f.height = 500
f.line('years', 'Income Tax', color='blue', line_width=2, legend='Income-Current',
       source=liabilities_cds_cur)
f.line('years', 'Income Tax', color='cyan', line_width=2, legend='Income-New',
       source=liabilities_cds_new)
f.line('years', 'Payroll Tax', color='purple', line_width=2, legend='Payroll-Current',
       source=liabilities_cds_cur)
f.line('years', 'Payroll Tax', color='magenta', line_width=2, legend='Payroll-New',
       source=liabilities_cds_new)
f.line('years', 'Combined Tax', color='green', line_width=2, legend='Combined-Current',
       source=liabilities_cds_cur)
f.line('years', 'Combined Tax', color='lime', line_width=2, legend='Combined-New',
       source=liabilities_cds_new)

c1 = f.circle('years', 'Income Tax', color='blue', size=5, legend='Income-Current',
              source=liabilities_cds_cur)
c2 = f.circle('years', 'Income Tax', color='cyan', size=5, legend='Income-New',
              source=liabilities_cds_new)
c3 = f.circle('years', 'Payroll Tax', color='purple', size=5, legend='Payroll-Current',
              source=liabilities_cds_cur)
c4 = f.circle('years', 'Payroll Tax', color='magenta', size=5, legend='Payroll-New',
              source=liabilities_cds_new)
c5 = f.circle('years', 'Combined', color='green', size=5, legend='Combined-Current',
              source=liabilities_cds_cur)
c6 = f.circle('years', 'Combined', color='lime', size=5, legend='Combined-New',
              source=liabilities_cds_new)
f.legend.location = 'top_left'
f.xaxis[0].ticker.desired_num_ticks = 12
f.xaxis.minor_tick_line_color = None
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')

hover = HoverTool(tooltips=[('', '@source'), ('Year', '@years'),
                            ('Income Tax', '@Income Tax{$0.00a}')],
                  renderers=[c1, c2])
hover2 = HoverTool(tooltips=[('', '@source'), ('Year', '@years'),
                             ('Payroll Tax', '@$Payroll Tax{$0.00a}')],
                  renderers=[c3, c4])
hover3 =HoverTool(tooltips=[('', '@source'), ('Year', '@years'),
                            ('Combined Tax', '@Combined{$0.00a}')],
                  renderers=[c5, c6])
f.add_tools(hover, hover2, hover3)
show(f)

In [28]:
ldf_current = pd.DataFrame(liabilities_current, index=years)
ldf_current.drop(['source', 'years'], axis=1, inplace=True)
print 'Current Tax Liabilities by Year'
ldf_current

Current Tax Liabilities by Year


Unnamed: 0,Combined Tax,Income Tax,Payroll Tax
2015,2580952229934.0,1580105276837.2,1000846953096.9
2016,2652676860126.7,1615051720045.1,1037625140081.5
2017,2813546326543.2,1720999668174.9,1092546658368.3
2018,2937654024096.4,1795828233432.2,1141825790664.1
2019,3045538636999.4,1858467869104.0,1187070767895.5
2020,3154058759275.7,1924670545585.3,1229388213690.6
2021,3273926547197.1,1999801411738.6,1274125135458.4
2022,3409445728811.7,2086327698559.6,1323118030252.1
2023,3556676832883.3,2181320492211.6,1375356340671.6
2024,3711096056404.3,2281099251848.1,1429996804556.1


In [29]:
ldf_new = pd.DataFrame(liabilities_new, index=years)
ldf_new.drop(['source', 'years'], axis=1, inplace=True)
print 'New Tax Liabilities by Year'
ldf_new

New Tax Liabilities by Year


Unnamed: 0,Combined Tax,Income Tax,Payroll Tax
2015,2516872869380.7,1505975871976.7,1010896997404.0
2016,2607651099630.4,1552656739226.2,1054994360404.1
2017,2751912554432.9,1648253390030.4,1103659164402.6
2018,2876318459793.0,1723343912727.0,1152974547066.1
2019,2985167955505.3,1787029282615.6,1198138672889.7
2020,3093160807641.5,1852664170135.1,1240496637506.4
2021,3211974850777.4,1926314926443.0,1285659924334.4
2022,3345328585244.3,2010254029148.8,1335074556095.6
2023,3490460673186.3,2102750305952.5,1387710367233.9
2024,3642158855012.3,2199460108389.4,1442698746622.9


In [30]:
print 'Difference in Tax Liabilities by Year'
diff_df = ldf_new - ldf_current
diff_df = diff_df.append(diff_df.sum(), ignore_index=True)
years.append('Total')
diff_df.index = years
diff_df

Difference in Tax Liabilities by Year


Unnamed: 0,Combined Tax,Income Tax,Payroll Tax
2015,-64079360553.3,-74129404860.4,10050044307.1
2016,-45025760496.3,-62394980818.9,17369220322.6
2017,-61633772110.3,-72746278144.5,11112506034.3
2018,-61335564303.3,-72484320705.2,11148756401.9
2019,-60370681494.1,-71438586488.3,11067904994.2
2020,-60897951634.2,-72006375450.2,11108423815.9
2021,-61951696419.6,-73486485295.6,11534788876.0
2022,-64117143567.4,-76073669410.8,11956525843.5
2023,-66216159697.0,-78570186259.1,12354026562.3
2024,-68937201391.9,-81639143458.7,12701942066.7


In [31]:
print 'Pct. Change in Tax Liabilities by Year'
((ldf_new - ldf_current) / ldf_current) * 100

Pct. Change in Tax Liabilities by Year


Unnamed: 0,Combined Tax,Income Tax,Payroll Tax
2015,-2.5,-4.7,1.0
2016,-1.7,-3.9,1.7
2017,-2.2,-4.2,1.0
2018,-2.1,-4.0,1.0
2019,-2.0,-3.8,0.9
2020,-1.9,-3.7,0.9
2021,-1.9,-3.7,0.9
2022,-1.9,-3.6,0.9
2023,-1.9,-3.6,0.9
2024,-1.9,-3.6,0.9
