[Rendered Notebook](http://nbviewer.jupyter.org/github/andersonfrailey/Notebook-Uploads/blob/master/newcpswts.ipynb)

In [38]:
import pandas as pd
import numpy as np
import taxcalc as tc
from bokeh.io import show, output_notebook
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, NumeralTickFormatter
from bokeh.layouts import layout, row, gridplot, column
from notebookfunctions import distribution, index_list, percentile
output_notebook()

In [39]:
years = [i for i in range(2014, 2028)]

In [40]:
cps = pd.read_csv('/Users/andersonfrailey/Tax-Calculator/taxcalc/cps.csv.gz', compression='gzip')
new_wts = pd.read_csv('cps_stage2/cps_weights.csv.gz', compression='gzip')

In [41]:
new_wts.head()

Unnamed: 0,WT2014,WT2015,WT2016,WT2017,WT2018,WT2019,WT2020,WT2021,WT2022,WT2023,WT2024,WT2025,WT2026,WT2027
0,23002,23519,23890,24301,24681,25014,25309,25608,25895,26185,26477,26774,27075,27377
1,19460,19898,20211,20559,20881,21163,21412,21665,21908,22153,22400,22651,22906,23161
2,24528,28361,29303,30272,31287,32350,33466,34564,35707,36838,37939,39085,40169,41182
3,17133,26277,17794,18101,18383,18632,18851,19074,19288,19504,19721,19942,20167,20391
4,26012,30078,31077,32104,33181,34308,35491,36657,37869,39068,40236,41451,42600,43675


In [42]:
base = tc.Calculator(records=tc.Records.cps_constructor(),
                     policy=tc.Policy())
new = tc.Calculator(records=tc.Records(data=cps, weights=new_wts, start_year=2014),
                    policy=tc.Policy())

You loaded data for 2014.
Tax-Calculator startup automatically extrapolated your data to 2014.
You loaded data for 2014.
Tax-Calculator startup automatically extrapolated your data to 2014.


In [43]:
# collect data for each of the years
base_data = {}
new_data = {}
var_list = ['s006', 'iitax', 'combined', 'payrolltax', 'c00100',
            'expanded_income']
for year in years:
    base.advance_to_year(year)
    base.calc_all()
    new.advance_to_year(year)
    new.calc_all()
    base_data[year] = base.dataframe(var_list)
    new_data[year] = new.dataframe(var_list)

In [44]:
# tax liability by year in 2014
data = {'year': [], 'combined_base': [], 'combined_new': [], 'combined_change': [],
        'combined_pct_change': [], 'iitax_pct_change': [], 'payroll_pct_change': [],
        'iitax_base': [], 'iitax_new': [], 'iitax_change': [],
        'payroll_base': [], 'payroll_new': [], 'payroll_change': []}
for year in years:
    data['year'].append(year)
    combined_new = (new_data[year]['combined'] * new_data[year]['s006']).sum()
    combined_base = (base_data[year]['combined'] * base_data[year]['s006']).sum()
    combined_change = combined_new - combined_base
    combined_pct_change = (combined_change / combined_base) * 100
    
    iitax_new = (new_data[year]['iitax'] * new_data[year]['s006']).sum()
    iitax_base = (base_data[year]['iitax'] * base_data[year]['s006']).sum()
    iitax_change = iitax_new - iitax_base
    iitax_pct_change = (iitax_change / iitax_base) * 100

    payroll_new = (new_data[year]['payrolltax'] * new_data[year]['s006']).sum()
    payroll_base = (base_data[year]['payrolltax'] * base_data[year]['s006']).sum()
    payroll_change = payroll_new - payroll_base
    payroll_pct_change = (payroll_change / payroll_base) * 100

    data['combined_base'].append(combined_base)
    data['combined_new'].append(combined_new)
    data['combined_change'].append(combined_change)
    data['combined_pct_change'].append(combined_pct_change)
    data['iitax_base'].append(iitax_base)
    data['iitax_new'].append(iitax_new)
    data['iitax_change'].append(iitax_change)
    data['iitax_pct_change'].append(iitax_pct_change)
    data['payroll_base'].append(payroll_base)
    data['payroll_new'].append(payroll_new)
    data['payroll_change'].append(payroll_change)
    data['payroll_pct_change'].append(payroll_pct_change)
cds = ColumnDataSource(data)
data_df = pd.DataFrame(data, index=data['year'])

In [45]:
f = figure(title='Tax Liability by Year')
f.line(x='year', y='combined_base', line_width=2, color='blue', source=cds, legend='Combined-Base')
f.line(x='year', y='combined_new', line_width=2, color='lightblue', source=cds, legend='Combined-new')
f.line(x='year', y='iitax_base', line_width=2, color='green', source=cds, legend='IITax-Base')
f.line(x='year', y='iitax_new', line_width=2, color='lime', source=cds, legend='IITax-New')
f.line(x='year', y='payroll_base', line_width=2, color='red', source=cds, legend='Payroll-Base')
f.line(x='year', y='payroll_new', line_width=2, color='pink', source=cds, legend='Payroll-New')
f.legend.location='top_left'
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
f.xaxis[0].ticker.desired_num_ticks = 12
f.xaxis.minor_tick_line_color = None
show(f)

In [46]:
pd.options.display.float_format = '{:,.2f}'.format
print('Combined Tax Liability')
data_df[['combined_base', 'combined_new', 'combined_change', 'combined_pct_change']].transpose()

Combined Tax Liability


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
combined_base,2049761794852.92,2110841250755.81,2192101336640.05,2328589406483.97,2326852347902.63,2455208000725.77,2578843260672.17,2711805209706.87,2853027976383.55,3001258288406.5,3155871943780.05,3317717564356.19,3682346347868.92,3862491374617.44
combined_new,2058390497110.17,2271690936870.95,2347085334260.75,2479418762478.26,2457484512690.08,2581454789579.83,2700165928804.79,2827084110131.07,2962020856424.67,3103050844271.33,3247337684766.05,3390302243030.46,3756037098960.04,3918888283719.68
combined_change,8628702257.25,160849686115.13,154983997620.7,150829355994.29,130632164787.45,126246788854.06,121322668132.62,115278900424.2,108992880041.12,101792555864.83,91465740986.0,72584678674.27,73690751091.12,56396909102.24
combined_pct_change,0.42,7.62,7.07,6.48,5.61,5.14,4.7,4.25,3.82,3.39,2.9,2.19,2.0,1.46


In [47]:
print('Income Tax Liability')
data_df[['iitax_base', 'iitax_new', 'iitax_change', 'iitax_pct_change']].transpose()

Income Tax Liability


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
iitax_base,1052768062368.58,1079321792276.94,1118073598335.02,1194682943874.67,1134225986055.48,1207623350219.63,1278600912769.22,1355456344576.62,1435537108564.83,1518783771055.15,1605135131486.43,1695298234281.98,1984937041617.15,2086678633847.68
iitax_new,1059013729033.85,1202226585586.57,1239149637816.91,1313705269307.49,1236986326416.77,1311141901323.8,1383626797718.44,1461563882221.55,1543359589580.43,1628136753597.27,1713727151569.36,1795697763985.66,2098164719867.73,2195369456455.77
iitax_change,6245666665.27,122904793309.63,121076039481.9,119022325432.82,102760340361.29,103518551104.17,105025884949.22,106107537644.93,107822481015.6,109352982542.12,108592020082.93,100399529703.68,113227678250.58,108690822608.1
iitax_pct_change,0.59,11.39,10.83,9.96,9.06,8.57,8.21,7.83,7.51,7.2,6.77,5.92,5.7,5.21


In [48]:
print('Payroll Tax Liability')
data_df[['payroll_base', 'payroll_new', 'payroll_change', 'payroll_pct_change']].transpose()

Payroll Tax Liability


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
payroll_base,996993732484.34,1031519458478.87,1074027738305.03,1133906462609.3,1192626361847.16,1247584650506.14,1300242347902.95,1356348865130.26,1417490867818.72,1482474517351.36,1550736812293.63,1622419330074.21,1697409306251.77,1775812740769.76
payroll_new,999376768076.32,1069464351284.38,1107935696443.84,1165713493170.77,1220498186273.32,1270312888256.03,1316539131086.35,1365520227909.53,1418661266844.24,1474914090674.07,1533610533196.69,1594604479044.8,1657872379092.31,1723518827263.91
payroll_change,2383035591.98,37944892805.5,33907958138.8,31807030561.48,27871824426.16,22728237749.89,16296783183.4,9171362779.27,1170399025.52,-7560426677.29,-17126279096.94,-27814851029.41,-39536927159.46,-52293913505.86
payroll_pct_change,0.24,3.68,3.16,2.81,2.34,1.82,1.25,0.68,0.08,-0.51,-1.1,-1.71,-2.33,-2.94


In [49]:
def hist_plots(base_data, new_data, year):
    change_wts = new_data['s006'] - base_data['s006']
    pct_change_wts = (change_wts / base_data['s006']) * 100
    hist_diff, edges_diff = np.histogram(change_wts, density=True, bins=50)
    hist_pct, edges_pct = np.histogram(pct_change_wts, density=True, bins=50)
    
    # create plots
    f1 = figure(title=f'Change in Weights for {year}', width=350, height=350)  # difference
    f1.quad(top=hist_diff, bottom=0, left=edges_diff[:-1], right=edges_diff[1:],
            fill_color='cyan', line_color='black')
    f1.yaxis.axis_label = 'Frequency'
    f1.xaxis.axis_label = 'Change'

    f2 = figure(title=f'Pct Change in Weights for {year}', width=350, height=350)  # pct change
    f2.quad(top=hist_pct, bottom=0, left=edges_pct[:-1], right=edges_pct[1:],
            fill_color='cyan', line_color='black')
    f2.yaxis.axis_label = 'Frequency'
    f2.xaxis.axis_label = 'Pct Change'
    
    return f1, f2

In [50]:
# analyze change in each record's weight
plot_list_diff = []  # holds graphs showing difference in wt
plot_list_pct = []  # holds graphs showing histogram of pct change in wt
for year in years:
    base_data1 = base_data[year]
    new_data1 = new_data[year]

    hist_diff1, hist_pct1 = hist_plots(base_data1, new_data1, year)
    plot_list_diff.append(hist_diff1)
    plot_list_pct.append(hist_pct1)

In [51]:
show(column(plot_list_diff))

In [52]:
show(column(plot_list_pct))