In [1]:
from taxcalc import *
from bokeh.io import show, output_notebook
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, NumeralTickFormatter
from collections import OrderedDict
import copy
import pandas as pd
from notebookfunctions import distribution, index_list, percentile
output_notebook()

In [2]:
# Data from IRS-SOI Tax Stats. Used for comparison
soi_stats = pd.read_csv('soi_stats.csv', index_col=0)  # Equivalent to tax-calc diagnostic table
soi_income = pd.read_csv('soi_income_stats.csv')  # Distribution of income items
soi_deductions = pd.read_csv('soi_deductions.csv', index_col=0)  # Itemized deductions
soi_deductions['index'] = soi_deductions.index

In [3]:
# Read in new PUF and associated data
gf = pd.read_csv('../taxdata/stage1/growfactors.csv')
wt = pd.read_csv('../taxdata/puf_stage2/puf_weights.csv')
puf = pd.read_csv('../taxdata/puf_data/puf.csv')
adj = pd.read_csv('../taxdata/puf_stage3/puf_ratios.csv', index_col=0)
adj = adj.transpose()

In [4]:
# base calculator
# temporary growth rates
grow_temp = Growfactors('taxcalc/growfactors_temp.csv')
recs = Records(gfactors=grow_temp)
pols = Policy()
calc = Calculator(records=recs, policy=pols)
calc.advance_to_year(2014)
calc.calc_all()

You loaded data for 2009.
Your data include the following unused variables that will be ignored:
  n1821
Tax-Calculator startup automatically extrapolated your data to 2013.


In [5]:
# new calculator
growf = Growfactors('../taxdata/stage1/growfactors.csv')
rec = Records(puf, gfactors=growf,
              weights=wt, adjust_ratios=adj,
              start_year=2011)
pol = Policy(gfactors=growf)
consump = Consumption(start_year=2011, num_years=17)
behave = Behavior(start_year=2011, num_years=17)
#params = ParametersBase(start_year=2011, num_years=17)
calc_new = Calculator(records=rec, policy=pol, consumption=consump, behavior=behave)
calc_new.advance_to_year(2014)
print(calc_new.policy_current_year())
calc_new.calc_all()

You loaded data for 2011.
Tax-Calculator startup automatically extrapolated your data to 2013.
2014


In [6]:
# list of variables used throughout notebook
var_list = ['e00200', 'e00300', 'e00600', 'e00650', 'e00900', 'c00100',
            's006', 'e17500', 'c04470', 'e18400', 'e18500', 'e19200', 'e19800', 'e20100',
            'e20400', 'g20500', 'iitax', 'payrolltax', 'combined']
# create data frames for both calculators
calc_df = calc.dataframe(var_list)
new_df = calc_new.dataframe(var_list)

# Distribution Tables

In [7]:
curr_table, _ = calc.distribution_tables(None)
print ('Current PUF')
curr_table

ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'

Exception ignored in: 'pandas._libs.lib.is_bool_array'
ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'


Current PUF


Unnamed: 0,s006,c00100,num_returns_StandardDed,standard,num_returns_ItemDed,c04470,c04600,c04800,taxbc,c62100,...,c09600,c05800,c07100,othertaxes,refund,iitax,payrolltax,combined,expanded_income,aftertax_income
0,16221793,-87307405375,11146637,96407359841,6087,264420507,66734350903,837996365,80929000,-93697347981,...,0,80929000,6971601,81400811,2526565011,-2371206801,5813976106,3442769306,-83056124135,-86498893441
1,16222840,97696000447,13632755,114464028623,54252,427192044,79940587312,4098599114,366554922,97335286959,...,7933783,374488705,7163854,67319871,12374838446,-11940193724,13166598731,1226405007,136744925045,135518520038
2,16221490,163468877532,13230158,123397911082,291268,3127562614,105324141284,23615280936,2317284610,161287246966,...,28785086,2346069695,313479968,95718940,26344313834,-24216005166,22653526956,-1562478210,243042824393,244605302603
3,16223700,244271226308,14193383,126510866481,665097,8675779270,110406827660,64167133999,6728920955,238716968823,...,29752265,6758673220,1317901363,190009074,23461455686,-17830674754,32516776026,14686101272,355123480068,340437378795
4,16222475,362531321371,14165525,128715334378,1497871,20700893573,116829492149,139146548563,16078090151,349707249795,...,44135661,16122225812,3488450770,262896068,17584563210,-4687892100,46562008652,41874116551,491474693643,449600577091
5,16221683,524486317326,13178967,124460625287,2882627,43092458811,118837408328,265330113463,32309372364,498816566335,...,40562634,32349934997,5828747564,426620413,8995351586,17952456261,63669887689,81622343950,660912932518,579290588568
6,16222444,748546418528,11388577,115287352674,4803785,80625803708,125281678668,437438543361,57283794507,700594959360,...,38919584,57322714090,8154522127,680661675,2753987998,47094865639,85917017696,133011883336,882702922566,749691039230
7,16222827,1092349731112,9043469,98828370778,7172046,132250968817,139442917664,723997183246,104415929974,1018381692957,...,60125018,104476054992,11072525677,1012981743,1298281698,93118229361,124635761651,217753991012,1204302332176,986548341164
8,16221896,1655218919934,5830283,69495223098,10390784,223921661490,164449691635,1198406293993,187012834871,1535575163967,...,293968886,187306803758,14524350223,1615376719,1548159725,172849670528,201791432199,374641102727,1780169573807,1405528471079
9,16223470,5010404019291,1996496,24063641503,14214627,596219211775,156981070943,4239681834066,1061876168463,4769377025799,...,38841090217,1100717258680,29120331277,30368805793,923418383,1101042314813,357073176859,1458115491672,5288418667894,3830303176222


In [8]:
new_table, _ = calc_new.distribution_tables(None)
print ('New PUF')
new_table

ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'

Exception ignored in: 'pandas._libs.lib.is_bool_array'
ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'


New PUF


Unnamed: 0,s006,c00100,num_returns_StandardDed,standard,num_returns_ItemDed,c04470,c04600,c04800,taxbc,c62100,...,c09600,c05800,c07100,othertaxes,refund,iitax,payrolltax,combined,expanded_income,aftertax_income
0,16206016,-43955548157,11504053,98088334344,1536,204936571,64220092639,957908230,70501127,-44072448419,...,0,70501127,3036696,94705288,1780987150,-1618817431,5795414443,4176597012,-34255616751,-38432213763
1,16205646,110475248450,13947869,118430150466,75133,783505875,82105322994,6481522962,584233154,109915759517,...,45547935,629781090,7860622,61020742,14240643167,-13557701957,15184895783,1627193826,151056018367,149428824540
2,16207848,177938900865,13961991,126677851196,304623,3508811987,105651296378,29588592647,2873041480,175694229341,...,109405262,2982446742,171877362,103523914,26935974560,-24021881267,24819818644,797937377,253844779188,253046841811
3,16206368,260979946631,14478763,131996868587,617871,7836595201,112539923210,73509150193,7855855388,256464494957,...,138732701,7994588089,1065931612,190203222,23177106699,-16058247001,35396393499,19338146498,370136285209,350798138711
4,16206466,383411145296,14311291,130347117183,1515505,20714303000,116822779914,154986593621,18026266009,371100962564,...,164605136,18190871145,2962911887,275321807,16021423625,-518142560,49667788877,49149646316,507531873614,458382227298
5,16206726,543216446556,13201114,126587818026,2894250,41830188740,120531350879,278561088867,33456313515,519174968180,...,212795449,33669108964,4874538753,386082573,7421200937,21759451847,66207518579,87966970426,678569616339,590602645914
6,16206360,769481535126,11507686,117236831940,4672148,75243179607,125863642008,458964673984,59937635743,725880107489,...,205536546,60143172289,6643497223,535854229,1564416792,52471112503,89529425057,142000537560,899033672667,757033135107
7,16206802,1113299775875,9043900,99788803825,7157976,130517764665,140406594535,744808414336,107292201357,1040579924455,...,265126208,107557327565,8539132004,947011218,148697161,99816509618,126599797935,226416307554,1224236532027,997820224474
8,16206184,1652835233581,5906677,70037638314,10296516,219753483111,163404215869,1202833971215,186962072271,1535596279875,...,390356906,187352429177,10662017410,1376516076,25210223,178041717619,198130419337,376172136956,1779605018337,1403432881381
9,16207154,4964347167184,2042789,25010545616,14153796,585088760983,155468866391,4205097802306,1041518623845,4748915325142,...,40880623983,1082399247828,22514477283,29747560238,1797805,1089630532979,345673607519,1435304140498,5276256849959,3840952709461


# Diagnostic Table Comparison

In [9]:
curr_diag = calc.diagnostic_table(1)
curr_diag

Unnamed: 0,2014
Returns (#m),162.2
AGI ($b),9811.7
Itemizers (#m),42.0
Itemized Deduction ($b),1109.3
Standard Deduction Filers (#m),107.8
Standard Deduction ($b),925.9
Personal Exemption ($b),1115.8
Taxable Income ($b),7096.7
Regular Tax ($b),1468.5
AMT Income ($b),9276.1


In [10]:
new_diag = calc_new.diagnostic_table(1)
new_diag

Unnamed: 0,2014
Returns (#m),162.1
AGI ($b),9932.0
Itemizers (#m),41.7
Itemized Deduction ($b),1085.5
Standard Deduction Filers (#m),109.9
Standard Deduction ($b),957.0
Personal Exemption ($b),1129.7
Taxable Income ($b),7155.8
Regular Tax ($b),1458.6
AMT Income ($b),9439.2


In [11]:
diag_data = pd.DataFrame()
diag_data['SOI'] = soi_stats['Value']
diag_data['New'] = new_diag[2014]
diag_data['Current'] = curr_diag[2014]
diag_data['% Change'] = ((new_diag[2014] / curr_diag[2014]) - 1) * 100
diag_data['New - SOI'] = new_diag[2014] - diag_data['SOI']
diag_data['Current - SOI'] = curr_diag[2014] - diag_data['SOI']
diag_data

Unnamed: 0,SOI,New,Current,% Change,New - SOI,Current - SOI
Returns (#m),148.6,162.1,162.2,-0.1,13.5,13.6
AGI ($b),9771.0,9932.0,9811.7,1.2,161.0,40.7
Itemizers (#m),44.0,41.7,42.0,-0.7,-2.3,-2.0
Itemized Deduction ($b),1206.7,1085.5,1109.3,-2.1,-121.2,-97.4
Standard Deduction Filers (#m),117.4,109.9,107.8,1.9,-7.5,-9.6
Standard Deduction ($b),876.2,957.0,925.9,3.4,80.8,49.7
Personal Exemption ($b),1121.6,1129.7,1115.8,1.3,8.1,-5.8
Taxable Income ($b),6997.9,7155.8,7096.7,0.8,157.9,98.8
Regular Tax ($b),,1458.6,1468.5,-0.7,,
AMT Income ($b),,9439.2,9276.1,1.8,,


# Extended Diagnostic Table

In [12]:
print ('Current Calculator')
calc.diagnostic_table(14)

Current Calculator


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
Returns (#m),162.2,164.4,167.1,170.1,172.9,175.4,177.7,179.9,182.2,184.4,186.7,189.0,191.2,192.5
AGI ($b),9811.7,10477.8,10751.9,11266.8,11809.6,12224.2,12663.8,13144.4,13675.0,14239.5,14831.2,15449.0,16033.1,16719.2
Itemizers (#m),42.0,43.0,44.0,45.1,20.0,20.1,20.7,21.4,22.0,22.6,23.3,24.0,50.3,51.5
Itemized Deduction ($b),1109.3,1172.3,1224.8,1294.1,631.8,648.6,685.1,726.3,767.5,809.8,855.8,903.4,1927.3,2023.8
Standard Deduction Filers (#m),107.8,108.8,110.2,111.5,139.1,140.8,142.1,143.3,144.6,145.8,147.1,148.3,124.4,124.8
Standard Deduction ($b),925.9,949.9,962.7,981.0,2360.7,2437.0,2511.1,2586.2,2666.2,2746.6,2829.0,2913.8,1328.9,1365.3
Personal Exemption ($b),1115.8,1138.2,1164.0,1175.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1530.4,1576.6
Taxable Income ($b),7096.7,7662.0,7844.7,8269.4,9101.5,9432.5,9772.9,10148.8,10569.3,11020.8,11493.7,11989.4,11842.9,12336.5
Regular Tax ($b),1468.5,1588.0,1628.1,1721.1,1712.3,1774.5,1837.9,1909.5,1990.6,2079.1,2172.1,2269.4,2478.4,2573.1
AMT Income ($b),9276.1,9909.7,10157.3,10634.4,11353.2,11755.2,12167.9,12618.1,13118.6,13651.8,14209.7,14792.7,15087.0,15713.7


In [13]:
print ('New Calculator')
calc_new.diagnostic_table(14)

New Calculator


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
Returns (#m),162.1,164.3,167.0,170.0,172.8,175.4,177.7,180.0,182.2,184.5,186.8,189.0,191.3,193.6
AGI ($b),9932.0,10600.7,10875.3,11397.7,11954.1,12384.2,12834.6,13326.3,13866.8,14442.5,15045.1,15674.0,16257.3,16944.8
Itemizers (#m),41.7,42.8,43.9,45.1,19.4,19.5,20.0,20.7,21.3,22.0,22.7,23.4,50.1,50.6
Itemized Deduction ($b),1085.5,1146.7,1199.2,1267.3,612.7,627.6,662.9,701.9,742.9,786.7,831.3,879.4,1886.7,1971.4
Standard Deduction Filers (#m),109.9,110.8,112.2,113.6,141.8,143.7,145.1,146.4,147.7,149.0,150.3,151.6,127.2,128.7
Standard Deduction ($b),957.0,980.5,992.5,1010.7,2422.9,2505.5,2582.9,2662.7,2745.5,2828.1,2915.5,3003.0,1378.9,1426.1
Personal Exemption ($b),1129.7,1152.6,1178.4,1190.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1551.8,1597.2
Taxable Income ($b),7155.8,7727.4,7914.1,8345.2,9172.7,9514.9,9862.1,10244.8,10670.8,11128.9,11609.0,12111.1,11984.9,12511.4
Regular Tax ($b),1458.6,1576.8,1619.9,1713.3,1704.8,1768.7,1832.6,1904.2,1985.5,2074.6,2167.6,2265.5,2476.4,2590.0
AMT Income ($b),9439.2,10078.2,10328.2,10815.3,11553.3,11974.6,12401.3,12866.3,13379.5,13925.4,14497.0,15092.0,15387.7,16034.0


# Income Levels

In [14]:
def pct_diff(current, new):
    return ((new / current) - 1) * 100

In [15]:
inc_dict = OrderedDict()
inc_dict['New'] = []
inc_dict['Current'] = []
inc_dict['SOI'] = []
inc_dict['Pct Diff'] = []
inc_dict['New - SOI'] = []
inc_dict['Current - SOI'] = []
inc_vars = ['e00200', 'e00300', 'e00600', 'e00650', 'e00900']
soi_inc_vars = ['WAS', 'INT', 'ODIV', 'QDIV', 'BIZ']
for tc, soi in zip(inc_vars, soi_inc_vars):
    new_val = calc_new.weighted_total(tc)
    curr_val = calc.weighted_total(tc)
    soi_val = soi_income[soi].sum()
    inc_dict['New'].append(new_val)
    inc_dict['Current'].append(curr_val)
    inc_dict['SOI'].append(soi_val)
    inc_dict['Pct Diff'].append(pct_diff(curr_val, new_val))
    inc_dict['New - SOI'].append(new_val - soi_val)
    inc_dict['Current - SOI'].append(curr_val - soi_val)
# create data frame to display totals better
inc_df = pd.DataFrame.from_dict(inc_dict)
inc_df.index = ['WAS', 'Taxable Interest', 'Ordinary Dividends', 'Qualified Dividends', 'Business Income']
print ("'Pct Diff' represents the percentage change between the current and new sums")
inc_df

'Pct Diff' represents the percentage change between the current and new sums


Unnamed: 0,New,Current,SOI,Pct Diff,New - SOI,Current - SOI
WAS,6784950833499.0,6784952125431.1,6785880966000.0,-0.0,-930132501.0,-928840568.9
Taxable Interest,90425786010.6,93894174624.3,93894281000.0,-3.7,-3468494989.4,-106375.7
Ordinary Dividends,254696960387.0,254702137258.3,254702232000.0,-0.0,-5271613.0,-94741.7
Qualified Dividends,180830300363.9,186804634087.7,192447708000.0,-3.2,-11617407636.1,-5643073912.3
Business Income,317260010213.3,317258657428.4,317248783000.0,0.0,11227213.3,9874428.4


# Distribution of Income Variables

In [16]:
new_dist_dict = {}
cur_dist_dict = {}
for item in inc_vars:
    new_dist_dict[item] = distribution(calc_new.array(item), calc_new.array('s006'), calc_new.array('c00100'))
    cur_dist_dict[item] = distribution(calc.array(item), calc.array('s006'), calc.array('c00100'))
new_dist = pd.DataFrame()
cur_dist = pd.DataFrame()
# Add all of the variables to a data frame
for tc, x in zip(inc_vars, soi_inc_vars):
    new_dist[x] = new_dist_dict[tc][1]
    cur_dist[x] = cur_dist_dict[tc][1]
new_dist['AGI Bin'] = index_list()
new_dist['label'] = 'New'
cur_dist['AGI Bin'] = index_list()
cur_dist['label'] = 'Current'
# SOI distribution
items_tups = [('WAS', 'WAS'), ('INT', 'Interest Income'), ('ODIV', 'Ordinary Dividends'),
              ('QDIV', 'Qualified Dividends'), ('BIZ', 'Business Income')]
soi_dist = pd.DataFrame()
for item in items_tups:
    soi_dist[item[0]] = (soi_income[item[0]] / soi_income[item[0]].sum())
soi_dist['AGI Bin'] = index_list()
soi_dist['label'] = 'SOI'

scatter_new = ColumnDataSource(new_dist)
scatter_cur = ColumnDataSource(cur_dist)
scatter_soi = ColumnDataSource(soi_dist)

figure_list = []
for item in items_tups:
    title = 'Percent of Total {} by AGI Bin'.format(item[1])
    f = figure(title=title, x_range=index_list())
    f.xaxis.major_label_orientation = 45
    f.circle(x='AGI Bin', y=item[0], color='green', size=10, legend='New', alpha=0.5,
             source=scatter_new)
    f.circle(x='AGI Bin', y=item[0], color='red', size=10, legend='Current', alpha=0.5,
             source=scatter_cur)
    f.circle(x='AGI Bin', y=item[0], color='blue', size=10, legend='SOI', alpha=0.5,
             source=scatter_soi)
    f.legend.location = 'top_left'
    f.yaxis[0].formatter = NumeralTickFormatter(format='0.00%')
    hover = HoverTool(tooltips=[('Pct', '@{}%'.format(item[0]))])
    f.add_tools(hover)
    figure_list.append(f)
show(column(figure_list))

In [17]:
new_tot = pd.DataFrame()
cur_tot = pd.DataFrame()
# Add all of the variables to a data frame
for tc, x in zip(inc_vars, soi_inc_vars):
    new_tot[x] = new_dist_dict[tc][0]
    cur_tot[x] = cur_dist_dict[tc][0]
new_tot['AGI Bin'] = index_list()
new_tot['label'] = 'New'
cur_tot['AGI Bin'] = index_list()
cur_tot['label'] = 'Current'

soi_income['AGI Bin'] = index_list()
soi_income['label'] = 'SOI'

total_new = ColumnDataSource(new_tot)
total_cur = ColumnDataSource(cur_tot)
total_soi = ColumnDataSource(soi_income)

figure_list = []
for item in items_tups:
    title = 'Total {} by AGI Bin'.format(item[1])
    f = figure(title=title, x_range=index_list())
    f.xaxis.major_label_orientation = 45
    f.circle(x='AGI Bin', y=item[0], color='green', size=10, legend='New', alpha=0.5,
             source=total_new)
    f.circle(x='AGI Bin', y=item[0], color='red', size=10, legend='Current', alpha=0.5,
             source=total_cur)
    f.circle(x='AGI Bin', y=item[0], color='blue', size=10, legend='SOI', alpha=0.5,
             source=total_soi)
    f.legend.location = 'top_left'
    f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
    hover = HoverTool(tooltips=[('Pct', '@{}%'.format(item[0]))])
    f.add_tools(hover)
    figure_list.append(f)
show(column(figure_list))

# Itemized Deduction Amounts

In [18]:
deductions_new = {}
deductions_cur = {}
deduction_list = ['Medical Expenses', 'State and Local Taxes', 'Real Estate Taxes',
                  'Interest Paid', 'Charitable Cash Contributions',
                  'Charitable Non-Cash Contributions', 'Total Misc. Expenses',
                  'Net Casualty or Loss']
deduction_vars = ['e17500', 'e18400', 'e18500', 'e19200', 'e19800', 'e20100',
                  'e20400', 'g20500']
for ded, var in zip(deduction_list, deduction_vars):
    deductions_new[ded] = (new_df[var][new_df['c04470'] > 0] *
                           new_df['s006'][new_df['c04470'] > 0]).sum()
    deductions_cur[ded] = (calc_df[var][calc_df['c04470'] > 0] *
                           calc_df['s006'][calc_df['c04470'] > 0]).sum()
ded_new_df = pd.DataFrame.from_dict(deductions_new, 'index')
ded_new_df.columns = ['Total']
ded_new_df['source'] = 'New'
ded_cur_df = pd.DataFrame.from_dict(deductions_cur, 'index')
ded_cur_df.columns = ['Total']
ded_cur_df['source'] = 'Current'

soi_deductions['source'] = 'SOI'

ded_cds_new = ColumnDataSource(ded_new_df)
ded_cds_new.add(data=[i - .5 for i in range(1, 9)], name='xaxis')
ded_cds_cur = ColumnDataSource(ded_cur_df)
ded_cds_cur.add(data=[i - .25 for i in range(1, 9)], name='xaxis')
ded_cds_soi = ColumnDataSource(soi_deductions)
ded_cds_soi.add(data=[i - .75 for i in range(1, 9)], name='xaxis')

'xaxis'

In [19]:
xaxis_list = ['Interest Paid', 'Total Misc. Expenses', 'Medical Expenses',
              'Charitable Non-Cash Contributions', 'Real Estate Taxes',
              'State and Local Taxes', 'Charitable Cash Contributions',
              'Net Casualty or Loss']
f = figure(title='Itemized Deduction Totals', x_range=xaxis_list, width=800)
#f = figure(title='Itemized Deduction Totals')
f.xaxis.major_label_orientation = 45
f.vbar(x='xaxis', bottom=0, top='Total', legend='New',
       color='green', alpha=0.5, line_alpha=1,
       width=0.25, source=ded_cds_new)
f.vbar(x='xaxis', bottom=0, top='Total', legend='Current', color='red',
       alpha=0.5, line_alpha=1,
       width=0.25, source=ded_cds_cur)
f.vbar(x='xaxis', bottom=0, top='Total', legend='SOI', color='blue',
       alpha=0.5, line_alpha=1,
       width=0.25, source=ded_cds_soi)
hover = HoverTool(tooltips=[('Total', '@Total{$0.00a}')])
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
f.add_tools(hover)

show(f)

In [20]:
ded_error_df = pd.DataFrame()
ded_error_df['Difference: New'] = ded_new_df['Total'] - soi_deductions['Total']
ded_error_df['% Difference: New'] = 100 * ded_error_df['Difference: New'] / soi_deductions['Total']
ded_error_df['Difference: Current'] = ded_cur_df['Total'] - soi_deductions['Total']
ded_error_df['% Difference: Current'] = 100 * ded_error_df['Difference: Current'] / soi_deductions['Total']
print ('Error in Itemized Deductions Relative to SOI Totals')
ded_error_df

Error in Itemized Deductions Relative to SOI Totals


Unnamed: 0,Difference: New,% Difference: New,Difference: Current,% Difference: Current
Medical Expenses,-10881857435.6,-8.4,-10263322756.7,-8.0
State and Local Taxes,-17335117915.7,-5.3,-15964512279.0,-4.9
Real Estate Taxes,3655391628.2,2.0,17294916696.2,9.6
Interest Paid,-14394297495.7,-4.7,-7293196129.2,-2.4
Charitable Cash Contributions,-5758509001.9,-3.7,-4690078031.9,-3.0
Charitable Non-Cash Contributions,-27044511451.0,-41.4,-31545208332.2,-48.3
Total Misc. Expenses,4062221013.8,3.2,4144485540.8,3.3
Net Casualty or Loss,2863801247.1,129.9,2271378406.5,103.0


### Itemizing Rate

In [21]:
calc_df['itemizer'] = np.where(calc_df['c04470'] > 0, 1, 0)
new_df['itemizer'] = np.where(new_df['c04470'] > 0, 1, 0)
pr_new = percentile(new_df, 'itemizer', 100, 'e00200', 's006')
pr_cur = percentile(calc_df, 'itemizer', 100, 'e00200', 's006')
item_index = pr_new.index
pr_cds_new = ColumnDataSource({'rate': pr_new,
                               'index': item_index})
pr_cds_cur = ColumnDataSource({'rate': pr_cur,
                                'index': item_index})

hover_ded = HoverTool(tooltips=[('Percentile', '@index'), ('Itemizing Rate', '@rate{0.00%}')])
f = figure(title='Itemizing Rate by Wage Percentile',
           x_axis_label='Wage Percentile')
f.add_tools(hover_ded)
f.line('index', 'rate', legend='New', source=pr_cds_new)
f.line('index', 'rate', legend='Current', color='red', source=pr_cds_cur)
f.yaxis[0].formatter = NumeralTickFormatter(format='0.00%')
f.legend.location = 'top_left'
show(f)

In [22]:
itemizers_new = copy.deepcopy(new_df[new_df['c04470'] > 0])
mean_new = percentile(itemizers_new, 'c04470', 100, 'e00200', 's006')
itemizers = copy.deepcopy(calc_df[calc_df['c04470'] > 0])
mean_cur = percentile(itemizers, 'c04470', 100, 'e00200', 's006')
mean_cds_new = ColumnDataSource({'mean': mean_new,
                                 'xaxis': item_index})
mean_cds_cur = ColumnDataSource({'mean': mean_cur,
                                 'xaxis': item_index})

hover = HoverTool(tooltips=[('Percentile', '@xaxis'), ('Mean', '@mean{$0.00a}')])
f = figure(title='Mean Itemized Deduction Total Among Itemizers',
           x_axis_label='Wage Percentile')
f.add_tools(hover)
f.line('xaxis', 'mean', legend='New', source=mean_cds_new)
f.line('xaxis', 'mean', legend='Current', color='red', source=mean_cds_cur)
f.legend.location = 'top_left'
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
show(f)

# Tax Liability by Year

In [23]:
# new liabilities
li_new = {'Income': [calc_new.weighted_total('iitax')],
          'Payroll': [calc_new.weighted_total('payrolltax')],
          'Combined': [calc_new.weighted_total('combined')],
          'source': ['New'],
          'Units': [calc_new.array('s006').sum()]}
# current liabilities
li_cur = {'Income': [calc.weighted_total('iitax')],
          'Payroll': [calc.weighted_total('payrolltax')],
          'Combined': [calc.weighted_total('combined')],
          'source': ['Current'],
          'Units': [calc.array('s006').sum()]}
years = [2014]
for year in range(2015, 2028):
    years.append(year)
    calc.advance_to_year(year)
    calc.calc_all()
    calc_new.advance_to_year(year)
    print('new:', calc_new.policy_current_year(), 'current:', calc.policy_current_year())
    print(calc_new.param('_II_em'))
    print(calc.param('_II_em'))
    calc_new.calc_all()
    li_new['Income'].append(calc_new.weighted_total('iitax'))
    li_new['Payroll'].append(calc_new.weighted_total('payrolltax'))
    li_new['Combined'].append(calc_new.weighted_total('combined'))
    li_new['source'].append('New')
    li_new['Units'].append(calc_new.array('s006').sum())
    li_cur['Income'].append(calc.weighted_total('iitax'))
    li_cur['Payroll'].append(calc.weighted_total('payrolltax'))
    li_cur['Combined'].append(calc.weighted_total('combined'))
    li_cur['source'].append('Current')
    li_cur['Units'].append(calc.array('s006').sum())
li_new['years'] = years
li_cur['years'] = years

new: 2015 current: 2015
[ 3900.   3950.   4000.   4050.   4050.      0.      0.      0.      0.
     0.      0.      0.      0.   4883.   4987.5]
[ 3900.    3950.    4000.    4050.    4050.       0.       0.       0.
     0.       0.       0.       0.       0.    4883.    4987.98]
new: 2016 current: 2016
[ 3900.   3950.   4000.   4050.   4050.      0.      0.      0.      0.
     0.      0.      0.      0.   4883.   4987.5]
[ 3900.    3950.    4000.    4050.    4050.       0.       0.       0.
     0.       0.       0.       0.       0.    4883.    4987.98]
new: 2017 current: 2017
[ 3900.   3950.   4000.   4050.   4050.      0.      0.      0.      0.
     0.      0.      0.      0.   4883.   4987.5]
[ 3900.    3950.    4000.    4050.    4050.       0.       0.       0.
     0.       0.       0.       0.       0.    4883.    4987.98]
new: 2018 current: 2018
[ 3900.   3950.   4000.   4050.   4050.      0.      0.      0.      0.
     0.      0.      0.      0.   4883.   4987.5]
[ 3900. 

In [24]:
# ColumnDataSources for plotting
li_cds_new = ColumnDataSource(li_new)
li_cds_cur = ColumnDataSource(li_cur)

In [25]:
f = figure(title='Tax Liabilities 2014-2027')
f.width = 800
f.height = 500
f.line('years', 'Income', color='blue', line_width=2, legend='Income-Current',
       source=li_cds_cur)
f.line('years', 'Income', color='cyan', line_width=2, legend='Income-New',
       source=li_cds_new)
f.line('years', 'Payroll', color='purple', line_width=2, legend='Payroll-Current',
       source=li_cds_cur)
f.line('years', 'Payroll', color='magenta', line_width=2, legend='Payroll-New',
       source=li_cds_new)
f.line('years', 'Combined', color='green', line_width=2, legend='Combined-Current',
       source=li_cds_cur)
f.line('years', 'Combined', color='lime', line_width=2, legend='Combined-New',
       source=li_cds_new)

c1 = f.circle('years', 'Income', color='blue', size=5, legend='Income-Current',
              source=li_cds_cur)
c2 = f.circle('years', 'Income', color='cyan', size=5, legend='Income-New',
              source=li_cds_new)
c3 = f.circle('years', 'Payroll', color='purple', size=5, legend='Payroll-Current',
              source=li_cds_cur)
c4 = f.circle('years', 'Payroll', color='magenta', size=5, legend='Payroll-New',
              source=li_cds_new)
c5 = f.circle('years', 'Combined', color='green', size=5, legend='Combined-Current',
              source=li_cds_cur)
c6 = f.circle('years', 'Combined', color='lime', size=5, legend='Combined-New',
              source=li_cds_new)
f.legend.location = 'top_left'
f.xaxis[0].ticker.desired_num_ticks = 12
f.xaxis.minor_tick_line_color = None
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')

hover = HoverTool(tooltips=[('PUF', '@source'), ('Year', '@years'),
                            ('Income', '@Income{$0.00a}')],
                  renderers=[c1, c2])
hover2 = HoverTool(tooltips=[('PUF', '@source'), ('Year', '@years'),
                             ('Payroll', '@Payroll{$0.00a}')],
                  renderers=[c3, c4])
hover3 =HoverTool(tooltips=[('PUF', '@source'), ('Year', '@years'),
                            ('Combined', '@Combined{$0.00a}')],
                  renderers=[c5, c6])
f.add_tools(hover, hover2, hover3)
show(f)

In [26]:
ldf_current = pd.DataFrame(li_cur, index=years)
ldf_current.drop(['source', 'years'], axis=1, inplace=True)
print ('Current Tax Liabilities by Year')
ldf_current

Current Tax Liabilities by Year


Unnamed: 0,Combined,Income,Payroll,Units
2014,2324811726624.2,1371011564057.3,953800162566.9,162224619.6
2015,2496405141206.3,1499687317297.4,996717823908.9,164426467.9
2016,2572374238835.4,1539019632733.9,1033354606101.5,167110179.0
2017,2725712039147.9,1637704761092.9,1088007278055.0,170054950.4
2018,2667916509164.4,1530852268398.2,1137064240766.3,172859803.9
2019,2773772497820.2,1591742617388.6,1182029880431.7,175361959.7
2020,2878645055893.6,1654554132252.4,1224090923641.2,177650713.2
2021,2994094216895.6,1725484401397.6,1268609815498.0,179925517.3
2022,3122359171982.6,1805000695989.7,1317358475993.0,182192965.7
2023,3262271239700.0,1892909990596.5,1369361249103.5,184426496.8


In [27]:
ldf_new = pd.DataFrame(li_new, index=years)
ldf_new.drop(['source', 'years'], axis=1, inplace=True)
print ('New Tax Liabilities by Year')
ldf_new

New Tax Liabilities by Year


Unnamed: 0,Combined,Income,Payroll,Units
2014,2342949614024.1,1385944534350.6,957005079673.5,162065569.3
2015,2515439715956.5,1514641928946.7,1000797787009.8,164306179.3
2016,2596092562666.2,1557911726267.3,1038180836398.9,167034390.5
2017,2750057828074.0,1657844416173.6,1092213411900.4,170025489.8
2018,2694753742864.4,1553716193714.9,1141037549149.5,172840481.4
2019,2803341846513.7,1617521705791.1,1185820140722.6,175383045.8
2020,2909754274835.8,1681957562159.6,1227796712676.3,177669696.8
2021,3026704779226.1,1754246287884.2,1272458491341.9,179962345.5
2022,3156242823655.6,1834923534723.0,1321319288932.6,182213231.7
2023,3298051083167.8,1924655796812.9,1373395286355.0,184481246.9


In [28]:
print ('Difference in Tax Liabilities by Year')
diff_df = ldf_new - ldf_current
diff_df = diff_df.append(diff_df.sum(), ignore_index=True)
years.append('Total')
diff_df.index = years
diff_df

Difference in Tax Liabilities by Year


Unnamed: 0,Combined,Income,Payroll,Units
2014,18137887399.9,14932970293.2,3204917106.7,-159050.3
2015,19034574750.2,14954611649.3,4079963100.9,-120288.6
2016,23718323830.8,18892093533.4,4826230297.4,-75788.5
2017,24345788926.0,20139655080.7,4206133845.3,-29460.6
2018,26837233700.0,22863925316.7,3973308383.2,-19322.5
2019,29569348693.4,25779088402.5,3790260290.9,21086.1
2020,31109218942.2,27403429907.2,3705789035.1,18983.6
2021,32610562330.5,28761886486.6,3848675843.9,36828.2
2022,33883651673.0,29922838733.4,3960812939.6,20265.9
2023,35779843467.8,31745806216.4,4034037251.4,54750.1


In [29]:
print ('Pct. Change in Tax Liabilities by Year')
((ldf_new - ldf_current) / ldf_current) * 100

Pct. Change in Tax Liabilities by Year


Unnamed: 0,Combined,Income,Payroll,Units
2014,0.8,1.1,0.3,-0.1
2015,0.8,1.0,0.4,-0.1
2016,0.9,1.2,0.5,-0.0
2017,0.9,1.2,0.4,-0.0
2018,1.0,1.5,0.3,-0.0
2019,1.1,1.6,0.3,0.0
2020,1.1,1.7,0.3,0.0
2021,1.1,1.7,0.3,0.0
2022,1.1,1.7,0.3,0.0
2023,1.1,1.7,0.3,0.0


In [30]:
show_df = pd.DataFrame(OrderedDict({'Year': [i for i in range(2014, 2028)],
                        'Units - New': [round(i / 1e6, 1) for i in li_new['Units']],
                        'Units - Old': [round(i / 1e6, 1) for i in li_cur['Units']],
                        'PTax - New': [round(i / 1e9, 1) for i in li_new['Payroll']],
                        'PTax - Old': [round(i / 1e9, 1) for i in li_cur['Payroll']],
                        'ITax - New': [round(i / 1e9, 1) for i in li_new['Income']],
                        'ITax - Old': [round(i / 1e9, 1) for i in li_cur['Income']]}))
print ('Units in millions, PTax and ITax in billions')
show_df

Units in millions, PTax and ITax in billions


Unnamed: 0,Year,Units - New,Units - Old,PTax - New,PTax - Old,ITax - New,ITax - Old
0,2014,162.1,162.2,957.0,953.8,1385.9,1371.0
1,2015,164.3,164.4,1000.8,996.7,1514.6,1499.7
2,2016,167.0,167.1,1038.2,1033.4,1557.9,1539.0
3,2017,170.0,170.1,1092.2,1088.0,1657.8,1637.7
4,2018,172.8,172.9,1141.0,1137.1,1553.7,1530.9
5,2019,175.4,175.4,1185.8,1182.0,1617.5,1591.7
6,2020,177.7,177.7,1227.8,1224.1,1682.0,1654.6
7,2021,180.0,179.9,1272.5,1268.6,1754.2,1725.5
8,2022,182.2,182.2,1321.3,1317.4,1834.9,1805.0
9,2023,184.5,184.4,1373.4,1369.4,1924.7,1892.9
