In [1]:
from taxcalc import *
from bokeh.io import show, output_notebook
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, NumeralTickFormatter
from collections import OrderedDict
import copy
import pandas as pd
from notebookfunctions import distribution, index_list, percentile
output_notebook()

In [2]:
# Data from IRS-SOI Tax Stats. Used for comparison
soi_stats = pd.read_csv('soi_stats.csv', index_col=0)  # Equivalent to tax-calc diagnostic table
soi_income = pd.read_csv('soi_income_stats.csv')  # Distribution of income items
soi_deductions = pd.read_csv('soi_deductions.csv', index_col=0)  # Itemized deductions
soi_deductions['index'] = soi_deductions.index

In [3]:
# Read in new PUF and associated data
gf = pd.read_csv('../taxdata/stage1/growfactors.csv')
wt = pd.read_csv('../taxdata/puf_stage2/puf_weights.csv')
puf = pd.read_csv('../taxdata/puf_data/puf.csv')
adj = pd.read_csv('../taxdata/puf_stage3/puf_ratios.csv', index_col=0)
adj = adj.transpose()

In [4]:
# base calculator
# temporary growth rates
grow_temp = Growfactors('taxcalc/growfactors_temp.csv')
recs = Records(gfactors=grow_temp)
pol = Policy()
calc = Calculator(records=recs, policy=pol)
calc.advance_to_year(2014)
calc.calc_all()

You loaded data for 2009.
Tax-Calculator startup automatically extrapolated your data to 2013.


In [5]:
# new calculator
growf = Growfactors('../taxdata/stage1/growfactors.csv')
rec = Records(puf, gfactors=growf,
              weights=wt, adjust_ratios=adj,
              start_year=2011)
pol = Policy(gfactors=growf, start_year=2011, num_years=17)
consump = Consumption(start_year=2011, num_years=17)
behave = Behavior(start_year=2011, num_years=17)
calc_new = Calculator(records=rec, policy=pol, consumption=consump, behavior=behave)
calc_new.advance_to_year(2014)
calc_new.calc_all()

You loaded data for 2011.
Tax-Calculator startup automatically extrapolated your data to 2011.


In [6]:
# list of variables used throughout notebook
var_list = ['e00200', 'e00300', 'e00600', 'e00650', 'e00900', 'c00100',
            's006', 'e17500', 'c04470', 'e18400', 'e18500', 'e19200', 'e19800', 'e20100',
            'e20400', 'g20500', 'iitax', 'payrolltax', 'combined']
# create data frames for both calculators
calc_df = calc.dataframe(var_list)
new_df = calc_new.dataframe(var_list)

# Distribution Tables

In [7]:
curr_table, _ = calc.distribution_tables(None)
print ('Current PUF')
curr_table

ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'

Exception ignored in: 'pandas._libs.lib.is_bool_array'
ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'


Current PUF


Unnamed: 0,s006,c00100,num_returns_StandardDed,standard,num_returns_ItemDed,c04470,c04600,c04800,taxbc,c62100,...,c09600,c05800,c07100,othertaxes,refund,iitax,payrolltax,combined,expanded_income,aftertax_income
0,16221793,-87307405375,11146637,96407359841,6087,264420507,66734350903,837996365,80929000,-93697347981,...,0,80929000,6971601,81400811,2526565011,-2371206801,5813976106,3442769306,-83056124135,-86498893441
1,16222840,97696000447,13632755,114464028623,54252,427192044,79940587312,4098599114,366554922,97335286959,...,7933783,374488705,7163854,67319871,12374838446,-11940193724,13166598731,1226405007,136744925045,135518520038
2,16221490,163468877532,13230158,123397911082,291268,3127562614,105324141284,23615280936,2317284610,161287246966,...,28785086,2346069695,313479968,95718940,26344313834,-24216005166,22653526956,-1562478210,243042824393,244605302603
3,16223700,244271226308,14193383,126510866481,665097,8675779270,110406827660,64167133999,6728920955,238716968823,...,29752265,6758673220,1317901363,190009074,23461455686,-17830674754,32516776026,14686101272,355123480068,340437378795
4,16222475,362531321371,14165525,128715334378,1497871,20700893573,116829492149,139146548563,16078090151,349707249795,...,44135661,16122225812,3488450770,262896068,17584563210,-4687892100,46562008652,41874116551,491474693643,449600577091
5,16221683,524486317326,13178967,124460625287,2882627,43092458811,118837408328,265330113463,32309372364,498816566335,...,40562634,32349934997,5828747564,426620413,8995351586,17952456261,63669887689,81622343950,660912932518,579290588568
6,16222444,748546418528,11388577,115287352674,4803785,80625803708,125281678668,437438543361,57283794507,700594959360,...,38919584,57322714090,8154522127,680661675,2753987998,47094865639,85917017696,133011883336,882702922566,749691039230
7,16222827,1092349731112,9043469,98828370778,7172046,132250968817,139442917664,723997183246,104415929974,1018381692957,...,60125018,104476054992,11072525677,1012981743,1298281698,93118229361,124635761651,217753991012,1204302332176,986548341164
8,16221896,1655218919934,5830283,69495223098,10390784,223921661490,164449691635,1198406293993,187012834871,1535575163967,...,293968886,187306803758,14524350223,1615376719,1548159725,172849670528,201791432199,374641102727,1780169573807,1405528471079
9,16223470,5010404019291,1996496,24063641503,14214627,596219211775,156981070943,4239681834066,1061876168463,4769377025799,...,38841090217,1100717258680,29120331277,30368805793,923418383,1101042314813,357073176859,1458115491672,5288418667894,3830303176222


In [8]:
new_table, _ = calc_new.distribution_tables(None)
print ('New PUF')
new_table

ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'

Exception ignored in: 'pandas._libs.lib.is_bool_array'
ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'


New PUF


Unnamed: 0,s006,c00100,num_returns_StandardDed,standard,num_returns_ItemDed,c04470,c04600,c04800,taxbc,c62100,...,c09600,c05800,c07100,othertaxes,refund,iitax,payrolltax,combined,expanded_income,aftertax_income
0,16206016,-43956302370,11504053,99558547058,1536,204957779,65845928547,940693088,69544365,-44073202632,...,0,69544365,3011340,94705288,1786025251,-1624786938,5800890667,4176103728,-34253632852,-38429736580
1,16206082,110475115754,13953329,120466804131,69673,726505698,84185707320,6057146850,542655929,109964100731,...,42476287,585132216,7242826,61020742,14329353888,-13690443755,15185214085,1494770330,151061539045,149566768715
2,16207411,177938895038,13965760,128903218920,300854,3480580134,108324244188,28421497581,2754114052,175712028474,...,107248365,2861362417,161857139,103523914,27281381294,-24478352102,24819963713,341611611,253839351673,253497740062
3,16205983,260974020573,14495043,134414959135,602716,7625541332,115347776301,71306656969,7561261806,256620179262,...,139514724,7700776530,986197407,190203222,23793063438,-16888281094,35395225223,18506944129,370126046701,351619102572
4,16206852,383416891999,14339169,132920501183,1488012,20397983616,119821577535,151358793694,17528653994,371278355176,...,164342548,17692996543,2814517004,275321807,16814732323,-1660930978,49669886779,48008955801,507542397581,459533441780
5,16206726,543216405423,13255894,129385689681,2839470,41289171018,123582779468,274360249638,32804989204,519510104941,...,212018428,33017007631,4729112869,386082573,8087490248,20586487088,66207845676,86794332764,678569738754,591775405991
6,16206520,769490943531,11599036,120363140054,4580958,74109722143,129052082290,454057157498,58706488575,726596668946,...,207864821,58914353396,6564212510,535854229,1782348528,51103646586,89531305607,140634952193,899044021347,758409069154
7,16206641,1113289780303,9157945,102893206813,7043770,129140943884,143959240454,739557143067,105702181000,1041321468771,...,262207274,105964388274,8527803026,947011218,162965033,98220631433,126602197007,224822828441,1224227735991,999404907550
8,16206668,1652891333712,6035383,72889030624,10168296,218059961963,167560279555,1197584387220,184404201509,1536444773488,...,366544527,184770746036,10678995150,1380263145,27646350,175444367682,198244262772,373688630454,1779723054240,1406034423786
9,16206669,4964247376357,2094666,26124739832,14101383,584920024535,160159228649,4199372150059,1035239493695,4749046200455,...,40456938747,1075696432443,22509980113,29743740573,1898431,1082928294471,347296832484,1430225126956,5276963657561,3846738530605


# Diagnostic Table Comparison

In [9]:
curr_diag = calc.diagnostic_table(1)
curr_diag

Unnamed: 0,2014
Returns (#m),162.2
AGI ($b),9811.7
Itemizers (#m),42.0
Itemized Deduction ($b),1109.3
Standard Deduction Filers (#m),107.8
Standard Deduction ($b),925.9
Personal Exemption ($b),1115.8
Taxable Income ($b),7096.7
Regular Tax ($b),1468.5
AMT Income ($b),9276.1


In [10]:
new_diag = calc_new.diagnostic_table(1)
new_diag

Unnamed: 0,2014
Returns (#m),162.1
AGI ($b),9932.0
Itemizers (#m),41.2
Itemized Deduction ($b),1080.0
Standard Deduction Filers (#m),110.4
Standard Deduction ($b),979.3
Personal Exemption ($b),1159.1
Taxable Income ($b),7123.0
Regular Tax ($b),1445.3
AMT Income ($b),9442.4


In [11]:
diag_data = pd.DataFrame()
diag_data['SOI'] = soi_stats['Value']
diag_data['New'] = new_diag[2014]
diag_data['Current'] = curr_diag[2014]
diag_data['% Change'] = ((new_diag[2014] / curr_diag[2014]) - 1) * 100
diag_data['New - SOI'] = new_diag[2014] - diag_data['SOI']
diag_data['Current - SOI'] = curr_diag[2014] - diag_data['SOI']
diag_data

Unnamed: 0,SOI,New,Current,% Change,New - SOI,Current - SOI
Returns (#m),148.6,162.1,162.2,-0.1,13.5,13.6
AGI ($b),9771.0,9932.0,9811.7,1.2,161.0,40.7
Itemizers (#m),44.0,41.2,42.0,-1.9,-2.8,-2.0
Itemized Deduction ($b),1206.7,1080.0,1109.3,-2.6,-126.7,-97.4
Standard Deduction Filers (#m),117.4,110.4,107.8,2.4,-7.0,-9.6
Standard Deduction ($b),876.2,979.3,925.9,5.8,103.1,49.7
Personal Exemption ($b),1121.6,1159.1,1115.8,3.9,37.5,-5.8
Taxable Income ($b),6997.9,7123.0,7096.7,0.4,125.1,98.8
Regular Tax ($b),,1445.3,1468.5,-1.6,,
AMT Income ($b),,9442.4,9276.1,1.8,,


# Income Levels

In [12]:
def pct_diff(current, new):
    return ((new / current) - 1) * 100

In [13]:
inc_dict = OrderedDict()
inc_dict['New'] = []
inc_dict['Current'] = []
inc_dict['SOI'] = []
inc_dict['Pct Diff'] = []
inc_dict['New - SOI'] = []
inc_dict['Current - SOI'] = []
inc_vars = ['e00200', 'e00300', 'e00600', 'e00650', 'e00900']
soi_inc_vars = ['WAS', 'INT', 'ODIV', 'QDIV', 'BIZ']
for tc, soi in zip(inc_vars, soi_inc_vars):
    new_val = calc_new.weighted_total(tc)
    curr_val = calc.weighted_total(tc)
    soi_val = soi_income[soi].sum()
    inc_dict['New'].append(new_val)
    inc_dict['Current'].append(curr_val)
    inc_dict['SOI'].append(soi_val)
    inc_dict['Pct Diff'].append(pct_diff(curr_val, new_val))
    inc_dict['New - SOI'].append(new_val - soi_val)
    inc_dict['Current - SOI'].append(curr_val - soi_val)
# create data frame to display totals better
inc_df = pd.DataFrame.from_dict(inc_dict)
inc_df.index = ['WAS', 'Taxable Interest', 'Ordinary Dividends', 'Qualified Dividends', 'Business Income']
print ("'Pct Diff' represents the percentage change between the current and new sums")
inc_df

'Pct Diff' represents the percentage change between the current and new sums


Unnamed: 0,New,Current,SOI,Pct Diff,New - SOI,Current - SOI
WAS,6784950833499.0,6784952125431.1,6785880966000.0,-0.0,-930132501.0,-928840568.9
Taxable Interest,90425786010.6,93894174624.3,93894281000.0,-3.7,-3468494989.4,-106375.7
Ordinary Dividends,254696960387.0,254702137258.3,254702232000.0,-0.0,-5271613.0,-94741.7
Qualified Dividends,180830300363.9,186804634087.7,192447708000.0,-3.2,-11617407636.1,-5643073912.3
Business Income,317260010213.3,317258657428.4,317248783000.0,0.0,11227213.3,9874428.4


# Distribution of Income Variables

In [14]:
new_dist_dict = {}
cur_dist_dict = {}
for item in inc_vars:
    new_dist_dict[item] = distribution(calc_new.array(item), calc_new.array('s006'), calc_new.array('c00100'))
    cur_dist_dict[item] = distribution(calc.array(item), calc.array('s006'), calc.array('c00100'))
new_dist = pd.DataFrame()
cur_dist = pd.DataFrame()
# Add all of the variables to a data frame
for tc, x in zip(inc_vars, soi_inc_vars):
    new_dist[x] = new_dist_dict[tc][1]
    cur_dist[x] = cur_dist_dict[tc][1]
new_dist['AGI Bin'] = index_list()
new_dist['label'] = 'New'
cur_dist['AGI Bin'] = index_list()
cur_dist['label'] = 'Current'
# SOI distribution
items_tups = [('WAS', 'WAS'), ('INT', 'Interest Income'), ('ODIV', 'Ordinary Dividends'),
              ('QDIV', 'Qualified Dividends'), ('BIZ', 'Business Income')]
soi_dist = pd.DataFrame()
for item in items_tups:
    soi_dist[item[0]] = (soi_income[item[0]] / soi_income[item[0]].sum())
soi_dist['AGI Bin'] = index_list()
soi_dist['label'] = 'SOI'

scatter_new = ColumnDataSource(new_dist)
scatter_cur = ColumnDataSource(cur_dist)
scatter_soi = ColumnDataSource(soi_dist)

figure_list = []
for item in items_tups:
    title = 'Percent of Total {} by AGI Bin'.format(item[1])
    f = figure(title=title, x_range=index_list())
    f.xaxis.major_label_orientation = 45
    f.circle(x='AGI Bin', y=item[0], color='green', size=10, legend='New', alpha=0.5,
             source=scatter_new)
    f.circle(x='AGI Bin', y=item[0], color='red', size=10, legend='Current', alpha=0.5,
             source=scatter_cur)
    f.circle(x='AGI Bin', y=item[0], color='blue', size=10, legend='SOI', alpha=0.5,
             source=scatter_soi)
    f.legend.location = 'top_left'
    f.yaxis[0].formatter = NumeralTickFormatter(format='0.00%')
    hover = HoverTool(tooltips=[('Pct', '@{}%'.format(item[0]))])
    f.add_tools(hover)
    figure_list.append(f)
show(column(figure_list))

In [15]:
new_tot = pd.DataFrame()
cur_tot = pd.DataFrame()
# Add all of the variables to a data frame
for tc, x in zip(inc_vars, soi_inc_vars):
    new_tot[x] = new_dist_dict[tc][0]
    cur_tot[x] = cur_dist_dict[tc][0]
new_tot['AGI Bin'] = index_list()
new_tot['label'] = 'New'
cur_tot['AGI Bin'] = index_list()
cur_tot['label'] = 'Current'

soi_income['AGI Bin'] = index_list()
soi_income['label'] = 'SOI'

total_new = ColumnDataSource(new_tot)
total_cur = ColumnDataSource(cur_tot)
total_soi = ColumnDataSource(soi_income)

figure_list = []
for item in items_tups:
    title = 'Total {} by AGI Bin'.format(item[1])
    f = figure(title=title, x_range=index_list())
    f.xaxis.major_label_orientation = 45
    f.circle(x='AGI Bin', y=item[0], color='green', size=10, legend='New', alpha=0.5,
             source=total_new)
    f.circle(x='AGI Bin', y=item[0], color='red', size=10, legend='Current', alpha=0.5,
             source=total_cur)
    f.circle(x='AGI Bin', y=item[0], color='blue', size=10, legend='SOI', alpha=0.5,
             source=total_soi)
    f.legend.location = 'top_left'
    f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
    hover = HoverTool(tooltips=[('Pct', '@{}%'.format(item[0]))])
    f.add_tools(hover)
    figure_list.append(f)
show(column(figure_list))

# Itemized Deduction Amounts

In [16]:
deductions_new = {}
deductions_cur = {}
deduction_list = ['Medical Expenses', 'State and Local Taxes', 'Real Estate Taxes',
                  'Interest Paid', 'Charitable Cash Contributions',
                  'Charitable Non-Cash Contributions', 'Total Misc. Expenses',
                  'Net Casualty or Loss']
deduction_vars = ['e17500', 'e18400', 'e18500', 'e19200', 'e19800', 'e20100',
                  'e20400', 'g20500']
for ded, var in zip(deduction_list, deduction_vars):
    deductions_new[ded] = (new_df[var][new_df['c04470'] > 0] *
                           new_df['s006'][new_df['c04470'] > 0]).sum()
    deductions_cur[ded] = (calc_df[var][calc_df['c04470'] > 0] *
                           calc_df['s006'][calc_df['c04470'] > 0]).sum()
ded_new_df = pd.DataFrame.from_dict(deductions_new, 'index')
ded_new_df.columns = ['Total']
ded_new_df['source'] = 'New'
ded_cur_df = pd.DataFrame.from_dict(deductions_cur, 'index')
ded_cur_df.columns = ['Total']
ded_cur_df['source'] = 'Current'

soi_deductions['source'] = 'SOI'

ded_cds_new = ColumnDataSource(ded_new_df)
ded_cds_new.add(data=[i - .5 for i in range(1, 9)], name='xaxis')
ded_cds_cur = ColumnDataSource(ded_cur_df)
ded_cds_cur.add(data=[i - .25 for i in range(1, 9)], name='xaxis')
ded_cds_soi = ColumnDataSource(soi_deductions)
ded_cds_soi.add(data=[i - .75 for i in range(1, 9)], name='xaxis')

'xaxis'

In [17]:
xaxis_list = ['Interest Paid', 'Total Misc. Expenses', 'Medical Expenses',
              'Charitable Non-Cash Contributions', 'Real Estate Taxes',
              'State and Local Taxes', 'Charitable Cash Contributions',
              'Net Casualty or Loss']
f = figure(title='Itemized Deduction Totals', x_range=xaxis_list, width=800)
#f = figure(title='Itemized Deduction Totals')
f.xaxis.major_label_orientation = 45
f.vbar(x='xaxis', bottom=0, top='Total', legend='New',
       color='green', alpha=0.5, line_alpha=1,
       width=0.25, source=ded_cds_new)
f.vbar(x='xaxis', bottom=0, top='Total', legend='Current', color='red',
       alpha=0.5, line_alpha=1,
       width=0.25, source=ded_cds_cur)
f.vbar(x='xaxis', bottom=0, top='Total', legend='SOI', color='blue',
       alpha=0.5, line_alpha=1,
       width=0.25, source=ded_cds_soi)
hover = HoverTool(tooltips=[('Total', '@Total{$0.00a}')])
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
f.add_tools(hover)

show(f)

In [18]:
ded_error_df = pd.DataFrame()
ded_error_df['Difference: New'] = ded_new_df['Total'] - soi_deductions['Total']
ded_error_df['% Difference: New'] = 100 * ded_error_df['Difference: New'] / soi_deductions['Total']
ded_error_df['Difference: Current'] = ded_cur_df['Total'] - soi_deductions['Total']
ded_error_df['% Difference: Current'] = 100 * ded_error_df['Difference: Current'] / soi_deductions['Total']
print ('Error in Itemized Deductions Relative to SOI Totals')
ded_error_df

Error in Itemized Deductions Relative to SOI Totals


Unnamed: 0,Difference: New,% Difference: New,Difference: Current,% Difference: Current
Medical Expenses,-12054421348.8,-9.3,-10263322756.7,-8.0
State and Local Taxes,-18630489216.9,-5.7,-15964512279.0,-4.9
Real Estate Taxes,2384469754.7,1.3,17294916696.2,9.6
Interest Paid,-16328258245.9,-5.3,-7293196129.2,-2.4
Charitable Cash Contributions,-6384396376.2,-4.1,-4690078031.9,-3.0
Charitable Non-Cash Contributions,-27175410720.7,-41.6,-31545208332.2,-48.3
Total Misc. Expenses,3753519968.3,3.0,4144485540.8,3.3
Net Casualty or Loss,2863801247.1,129.9,2271378406.5,103.0


### Itemizing Rate

In [19]:
calc_df['itemizer'] = np.where(calc_df['c04470'] > 0, 1, 0)
new_df['itemizer'] = np.where(new_df['c04470'] > 0, 1, 0)
pr_new = percentile(new_df, 'itemizer', 100, 'e00200', 's006')
pr_cur = percentile(calc_df, 'itemizer', 100, 'e00200', 's006')
item_index = pr_new.index
pr_cds_new = ColumnDataSource({'rate': pr_new,
                               'index': item_index})
pr_cds_cur = ColumnDataSource({'rate': pr_cur,
                                'index': item_index})

hover_ded = HoverTool(tooltips=[('Percentile', '@index'), ('Itemizing Rate', '@rate{0.00%}')])
f = figure(title='Itemizing Rate by Wage Percentile',
           x_axis_label='Wage Percentile')
f.add_tools(hover_ded)
f.line('index', 'rate', legend='New', source=pr_cds_new)
f.line('index', 'rate', legend='Current', color='red', source=pr_cds_cur)
f.yaxis[0].formatter = NumeralTickFormatter(format='0.00%')
f.legend.location = 'top_left'
show(f)

In [20]:
itemizers_new = copy.deepcopy(new_df[new_df['c04470'] > 0])
mean_new = percentile(itemizers_new, 'c04470', 100, 'e00200', 's006')
itemizers = copy.deepcopy(calc_df[calc_df['c04470'] > 0])
mean_cur = percentile(itemizers, 'c04470', 100, 'e00200', 's006')
mean_cds_new = ColumnDataSource({'mean': mean_new,
                                 'xaxis': item_index})
mean_cds_cur = ColumnDataSource({'mean': mean_cur,
                                 'xaxis': item_index})

hover = HoverTool(tooltips=[('Percentile', '@xaxis'), ('Mean', '@mean{$0.00a}')])
f = figure(title='Mean Itemized Deduction Total Among Itemizers',
           x_axis_label='Wage Percentile')
f.add_tools(hover)
f.line('xaxis', 'mean', legend='New', source=mean_cds_new)
f.line('xaxis', 'mean', legend='Current', color='red', source=mean_cds_cur)
f.legend.location = 'top_left'
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
show(f)

# Tax Liability by Year

In [21]:
# new liabilities
li_new = {'Income': [calc_new.weighted_total('iitax')],
          'Payroll': [calc_new.weighted_total('payrolltax')],
          'Combined': [calc_new.weighted_total('combined')],
          'source': ['New']}
# current liabilities
li_cur = {'Income': [calc.weighted_total('iitax')],
          'Payroll': [calc.weighted_total('payrolltax')],
          'Combined': [calc.weighted_total('combined')],
          'source': ['Current']}
years = [2014]
for year in range(2015, 2028):
    years.append(year)
    calc.advance_to_year(year)
    calc.calc_all()
    calc_new.advance_to_year(year)
    calc_new.calc_all()
    li_new['Income'].append(calc_new.weighted_total('iitax'))
    li_new['Payroll'].append(calc_new.weighted_total('payrolltax'))
    li_new['Combined'].append(calc_new.weighted_total('combined'))
    li_new['source'].append('New')
    li_cur['Income'].append(calc.weighted_total('iitax'))
    li_cur['Payroll'].append(calc.weighted_total('payrolltax'))
    li_cur['Combined'].append(calc.weighted_total('combined'))
    li_cur['source'].append('Current')
li_new['years'] = years
li_cur['years'] = years

In [22]:
# ColumnDataSources for plotting
li_cds_new = ColumnDataSource(li_new)
li_cds_cur = ColumnDataSource(li_cur)

In [29]:
f = figure(title='Tax Liabilities 2015-2027')
f.width = 800
f.height = 500
f.line('years', 'Income', color='blue', line_width=2, legend='Income-Current',
       source=li_cds_cur)
f.line('years', 'Income', color='cyan', line_width=2, legend='Income-New',
       source=li_cds_new)
f.line('years', 'Payroll', color='purple', line_width=2, legend='Payroll-Current',
       source=li_cds_cur)
f.line('years', 'Payroll', color='magenta', line_width=2, legend='Payroll-New',
       source=li_cds_new)
f.line('years', 'Combined', color='green', line_width=2, legend='Combined-Current',
       source=li_cds_cur)
f.line('years', 'Combined', color='lime', line_width=2, legend='Combined-New',
       source=li_cds_new)

c1 = f.circle('years', 'Income', color='blue', size=5, legend='Income-Current',
              source=li_cds_cur)
c2 = f.circle('years', 'Income', color='cyan', size=5, legend='Income-New',
              source=li_cds_new)
c3 = f.circle('years', 'Payroll', color='purple', size=5, legend='Payroll-Current',
              source=li_cds_cur)
c4 = f.circle('years', 'Payroll', color='magenta', size=5, legend='Payroll-New',
              source=li_cds_new)
c5 = f.circle('years', 'Combined', color='green', size=5, legend='Combined-Current',
              source=li_cds_cur)
c6 = f.circle('years', 'Combined', color='lime', size=5, legend='Combined-New',
              source=li_cds_new)
f.legend.location = 'top_left'
f.xaxis[0].ticker.desired_num_ticks = 12
f.xaxis.minor_tick_line_color = None
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')

hover = HoverTool(tooltips=[('PUF', '@source'), ('Year', '@years'),
                            ('Income', '@Income{$0.00a}')],
                  renderers=[c1, c2])
hover2 = HoverTool(tooltips=[('PUF', '@source'), ('Year', '@years'),
                             ('Payroll', '@Payroll{$0.00a}')],
                  renderers=[c3, c4])
hover3 =HoverTool(tooltips=[('PUF', '@source'), ('Year', '@years'),
                            ('Combined', '@Combined{$0.00a}')],
                  renderers=[c5, c6])
f.add_tools(hover, hover2, hover3)
show(f)

In [24]:
ldf_current = pd.DataFrame(li_cur, index=years)
ldf_current.drop(['source', 'years'], axis=1, inplace=True)
print ('Current Tax Liabilities by Year')
ldf_current

Current Tax Liabilities by Year


Unnamed: 0,Combined,Income,Payroll
2014,2324811726624.2,1371011564057.3,953800162566.9
2015,2496405141206.3,1499687317297.4,996717823908.9
2016,2572374238835.4,1539019632733.9,1033354606101.5
2017,2725712039147.9,1637704761092.9,1088007278055.0
2018,2667916509164.4,1530852268398.2,1137064240766.3
2019,2773772497820.2,1591742617388.6,1182029880431.7
2020,2878645055893.6,1654554132252.4,1224090923641.2
2021,2994094216895.6,1725484401397.6,1268609815498.0
2022,3122359171982.6,1805000695989.7,1317358475993.0
2023,3262271239700.0,1892909990596.5,1369361249103.5


In [25]:
ldf_new = pd.DataFrame(li_new, index=years)
ldf_new.drop(['source', 'years'], axis=1, inplace=True)
print ('New Tax Liabilities by Year')
ldf_new

New Tax Liabilities by Year


Unnamed: 0,Combined,Income,Payroll
2014,2328694256406.3,1369940632394.1,958753624012.2
2015,2514619881589.3,1503722884185.4,1010896997404.0
2016,2424699952421.1,1369705592017.0,1054994360404.1
2017,2558779503957.4,1455120339554.9,1103659164402.5
2018,2678866195380.7,1525891648314.6,1152974547066.1
2019,2783880995734.4,1585742322844.7,1198138672889.7
2020,2887506598147.4,1647009960641.0,1240496637506.4
2021,3003659592504.4,1717999668169.9,1285659924334.4
2022,3133575600428.0,1798501044332.5,1335074556095.6
2023,3273406485430.8,1885696118196.9,1387710367233.9


In [26]:
print ('Difference in Tax Liabilities by Year')
diff_df = ldf_new - ldf_current
diff_df = diff_df.append(diff_df.sum(), ignore_index=True)
years.append('Total')
diff_df.index = years
diff_df

Difference in Tax Liabilities by Year


Unnamed: 0,Combined,Income,Payroll
2014,3882529782.1,-1070931663.2,4953461445.3
2015,18214740383.0,4035566888.0,14179173495.1
2016,-147674286414.3,-169314040717.0,21639754302.7
2017,-166932535190.5,-182584421538.0,15651886347.5
2018,10949686216.3,-4960620083.5,15910306299.8
2019,10108497914.2,-6000294543.8,16108792458.0
2020,8861542253.8,-7544171611.4,16405713865.2
2021,9565375608.8,-7484733227.7,17050108836.4
2022,11216428445.4,-6499651657.2,17716080102.6
2023,11135245730.8,-7213872399.7,18349118130.4


In [27]:
print ('Pct. Change in Tax Liabilities by Year')
((ldf_new - ldf_current) / ldf_current) * 100

Pct. Change in Tax Liabilities by Year


Unnamed: 0,Combined,Income,Payroll
2014,0.2,-0.1,0.5
2015,0.7,0.3,1.4
2016,-5.7,-11.0,2.1
2017,-6.1,-11.1,1.4
2018,0.4,-0.3,1.4
2019,0.4,-0.4,1.4
2020,0.3,-0.5,1.3
2021,0.3,-0.4,1.3
2022,0.4,-0.4,1.3
2023,0.3,-0.4,1.3
