[Rendered Notebook](http://nbviewer.jupyter.org/github/andersonfrailey/Notebook-Uploads/blob/master/newcpswts.ipynb)

In [20]:
import pandas as pd
import numpy as np
import taxcalc as tc
from bokeh.io import show, output_notebook
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, NumeralTickFormatter
from bokeh.layouts import layout, row, gridplot, column
from notebookfunctions import distribution, index_list, percentile
output_notebook()

In [21]:
years = [i for i in range(2014, 2028)]

In [22]:
cps = pd.read_csv('cps_data/cps.csv.gz', compression='gzip')
new_wts = pd.read_csv('cps_stage2/cps_weights.csv.gz', compression='gzip')

In [23]:
new_wts.head()

Unnamed: 0,WT2014,WT2015,WT2016,WT2017,WT2018,WT2019,WT2020,WT2021,WT2022,WT2023,WT2024,WT2025,WT2026,WT2027
0,23002,23519,23890,24301,24681,25014,25309,25608,25895,26185,26477,26774,27075,27377
1,19460,19898,20211,20559,20881,21163,21412,21665,21908,22153,22400,22651,22906,23161
2,24528,28361,29303,30272,31287,32350,33466,34564,35707,36838,37939,39085,40169,41182
3,17133,26277,17794,18101,18383,18632,18851,19074,19288,19504,19721,19942,20167,20391
4,26012,30078,31077,32104,33181,34308,35491,36657,37869,39068,40236,41451,42600,43675


In [24]:
base = tc.Calculator(records=tc.Records.cps_constructor(),
                     policy=tc.Policy())
new = tc.Calculator(records=tc.Records(data=cps, weights=new_wts, start_year=2014),
                    policy=tc.Policy())

You loaded data for 2014.
Tax-Calculator startup automatically extrapolated your data to 2014.
You loaded data for 2014.
Tax-Calculator startup automatically extrapolated your data to 2014.


In [25]:
# collect data for each of the years
base_data = {}
new_data = {}
var_list = ['s006', 'iitax', 'combined', 'payrolltax', 'c00100',
            'expanded_income']
for year in years:
    base.advance_to_year(year)
    base.calc_all()
    new.advance_to_year(year)
    new.calc_all()
    base_data[year] = base.dataframe(var_list)
    new_data[year] = new.dataframe(var_list)

In [26]:
# tax liability by year in 2014
data = {'year': [], 'combined_base': [], 'combined_new': [], 'combined_change': [],
        'combined_pct_change': [], 'iitax_pct_change': [], 'payroll_pct_change': [],
        'iitax_base': [], 'iitax_new': [], 'iitax_change': [],
        'payroll_base': [], 'payroll_new': [], 'payroll_change': []}
for year in years:
    data['year'].append(year)
    combined_new = (new_data[year]['combined'] * new_data[year]['s006']).sum()
    combined_base = (base_data[year]['combined'] * base_data[year]['s006']).sum()
    combined_change = combined_new - combined_base
    combined_pct_change = (combined_change / combined_base) * 100
    
    iitax_new = (new_data[year]['iitax'] * new_data[year]['s006']).sum()
    iitax_base = (base_data[year]['iitax'] * base_data[year]['s006']).sum()
    iitax_change = iitax_new - iitax_base
    iitax_pct_change = (iitax_change / iitax_base) * 100

    payroll_new = (new_data[year]['payrolltax'] * new_data[year]['s006']).sum()
    payroll_base = (base_data[year]['payrolltax'] * base_data[year]['s006']).sum()
    payroll_change = payroll_new - payroll_base
    payroll_pct_change = (payroll_change / payroll_base) * 100

    data['combined_base'].append(combined_base)
    data['combined_new'].append(combined_new)
    data['combined_change'].append(combined_change)
    data['combined_pct_change'].append(combined_pct_change)
    data['iitax_base'].append(iitax_base)
    data['iitax_new'].append(iitax_new)
    data['iitax_change'].append(iitax_change)
    data['iitax_pct_change'].append(iitax_pct_change)
    data['payroll_base'].append(payroll_base)
    data['payroll_new'].append(payroll_new)
    data['payroll_change'].append(payroll_change)
    data['payroll_pct_change'].append(payroll_pct_change)
cds = ColumnDataSource(data)
data_df = pd.DataFrame(data, index=data['year'])

In [27]:
f = figure(title='Tax Liability by Year')
f.line(x='year', y='combined_base', line_width=2, color='blue', source=cds, legend='Combined-Base')
f.line(x='year', y='combined_new', line_width=2, color='lightblue', source=cds, legend='Combined-new')
f.line(x='year', y='iitax_base', line_width=2, color='green', source=cds, legend='IITax-Base')
f.line(x='year', y='iitax_new', line_width=2, color='lime', source=cds, legend='IITax-New')
f.line(x='year', y='payroll_base', line_width=2, color='red', source=cds, legend='Payroll-Base')
f.line(x='year', y='payroll_new', line_width=2, color='pink', source=cds, legend='Payroll-New')
f.legend.location='top_left'
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
f.xaxis[0].ticker.desired_num_ticks = 12
f.xaxis.minor_tick_line_color = None
show(f)

In [28]:
pd.options.display.float_format = '{:,.2f}'.format
print('Combined Tax Liability')
data_df[['combined_base', 'combined_new', 'combined_change', 'combined_pct_change']].transpose()

Combined Tax Liability


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
combined_base,2049761794852.92,2110841250755.81,2192101336640.05,2328589406483.97,2326852347902.63,2455208000725.77,2578843260672.17,2711805209706.87,2853027976383.55,3001258288406.5,3155871943780.05,3317717564356.19,3682346347868.92,3862491374617.44
combined_new,2084831791776.91,2295110576180.62,2370532771117.32,2503099387988.52,2489320156856.33,2614023314335.22,2733363461926.12,2861052957906.73,2996785636297.09,3138967785113.46,3284019034842.64,3427805535299.55,3785096780502.17,3948583315675.52
combined_change,35069996924.0,184269325424.81,178431434477.27,174509981504.55,162467808953.7,158815313609.45,154520201253.95,149247748199.86,143757659913.55,137709496706.96,128147091062.58,110087970943.36,102750432633.25,86091941058.08
combined_pct_change,1.71,8.73,8.14,7.49,6.98,6.47,5.99,5.5,5.04,4.59,4.06,3.32,2.79,2.23


In [29]:
print('Income Tax Liability')
data_df[['iitax_base', 'iitax_new', 'iitax_change', 'iitax_pct_change']].transpose()

Income Tax Liability


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
iitax_base,1052768062368.58,1079321792276.94,1118073598335.02,1194682943874.67,1134225986055.48,1207623350219.63,1278600912769.22,1355456344576.62,1435537108564.83,1518783771055.15,1605135131486.43,1695298234281.98,1984937041617.15,2086678633847.68
iitax_new,1085451650467.22,1225642282849.22,1262593055473.18,1337381627474.41,1268817324182.53,1343705638699.74,1416819497983.29,1495527798011.77,1578119381298.53,1664048618178.26,1750403286831.14,1833195689987.18,2127218857253.27,2225058758581.44
iitax_change,32683588098.65,146320490572.28,144519457138.17,142698683599.74,134591338127.05,136082288480.11,138218585214.08,140071453435.15,142582272733.7,145264847123.11,145268155344.72,137897455705.2,142281815636.12,138380124733.76
iitax_pct_change,3.1,13.56,12.93,11.94,11.87,11.27,10.81,10.33,9.93,9.56,9.05,8.13,7.17,6.63


In [30]:
print('Payroll Tax Liability')
data_df[['payroll_base', 'payroll_new', 'payroll_change', 'payroll_pct_change']].transpose()

Payroll Tax Liability


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
payroll_base,996993732484.34,1031519458478.87,1074027738305.03,1133906462609.3,1192626361847.16,1247584650506.14,1300242347902.95,1356348865130.26,1417490867818.72,1482474517351.36,1550736812293.63,1622419330074.21,1697409306251.77,1775812740769.76
payroll_new,999380141309.69,1069468293331.4,1107939715644.13,1165717760514.11,1220502832673.8,1270317675635.47,1316543963942.83,1365525159894.96,1418666254998.56,1474919166935.2,1533615748011.49,1594609845312.38,1657877923248.91,1723524557094.08
payroll_change,2386408825.35,37948834852.52,33911977339.1,31811297904.82,27876470826.64,22733025129.33,16301616039.88,9176294764.7,1175387179.85,-7555350416.16,-17121064282.13,-27809484761.83,-39531383002.87,-52288183675.68
payroll_pct_change,0.24,3.68,3.16,2.81,2.34,1.82,1.25,0.68,0.08,-0.51,-1.1,-1.71,-2.33,-2.94


In [31]:
def hist_plots(base_data, new_data, year):
    change_wts = new_data['s006'] - base_data['s006']
    pct_change_wts = (change_wts / base_data['s006']) * 100
    hist_diff, edges_diff = np.histogram(change_wts, density=True, bins=50)
    hist_pct, edges_pct = np.histogram(pct_change_wts, density=True, bins=50)
    
    # create plots
    f1 = figure(title=f'Change in Weights for {year}', width=350, height=350)  # difference
    f1.quad(top=hist_diff, bottom=0, left=edges_diff[:-1], right=edges_diff[1:],
            fill_color='cyan', line_color='black')
    f1.yaxis.axis_label = 'Frequency'
    f1.xaxis.axis_label = 'Change'

    f2 = figure(title=f'Pct Change in Weights for {year}', width=350, height=350)  # pct change
    f2.quad(top=hist_pct, bottom=0, left=edges_pct[:-1], right=edges_pct[1:],
            fill_color='cyan', line_color='black')
    f2.yaxis.axis_label = 'Frequency'
    f2.xaxis.axis_label = 'Pct Change'
    
    return f1, f2

In [32]:
# analyze change in each record's weight
plot_list_diff = []  # holds graphs showing difference in wt
plot_list_pct = []  # holds graphs showing histogram of pct change in wt
for year in years:
    base_data1 = base_data[year]
    new_data1 = new_data[year]

    hist_diff1, hist_pct1 = hist_plots(base_data1, new_data1, year)
    plot_list_diff.append(hist_diff1)
    plot_list_pct.append(hist_pct1)

In [33]:
show(column(plot_list_diff))

In [34]:
show(column(plot_list_pct))