In [38]:
import pandas as pd
import numpy as np
import taxcalc as tc
from bokeh.io import show, output_notebook
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, NumeralTickFormatter
from bokeh.layouts import layout, row, gridplot, column
from notebookfunctions import distribution, index_list, percentile
output_notebook()

In [2]:
years = [i for i in range(2014, 2028)]

In [3]:
cps = pd.read_csv('cps_data/cps.csv.gz', compression='gzip')
new_wts = pd.read_csv('cps_stage2/cps_weights.csv.gz', compression='gzip')

In [4]:
new_wts.head()

Unnamed: 0,WT2014,WT2015,WT2016,WT2017,WT2018,WT2019,WT2020,WT2021,WT2022,WT2023,WT2024,WT2025,WT2026,WT2027
0,23002,23292,23659,24067,24443,24773,25065,25362,25646,25933,26223,26517,26813,27114
1,19460,19706,20016,20361,20679,20959,21206,21457,21697,21940,22185,22434,22685,22939
2,24528,25369,26213,27080,27988,28937,29937,30918,31942,32952,33940,34964,35934,36840
3,17133,17349,17623,17926,18206,18452,18670,18890,19102,19316,19532,19751,19972,20195
4,26012,26905,27800,28719,29682,30689,31749,32790,33875,34947,35994,37080,38109,39070


In [5]:
base = tc.Calculator(records=tc.Records.cps_constructor(),
                     policy=tc.Policy())
new = tc.Calculator(records=tc.Records(data=cps, weights=new_wts, start_year=2014),
                    policy=tc.Policy())

You loaded data for 2014.
Tax-Calculator startup automatically extrapolated your data to 2014.
You loaded data for 2014.
Tax-Calculator startup automatically extrapolated your data to 2014.


In [6]:
# collect data for each of the years
base_data = {}
new_data = {}
var_list = ['s006', 'iitax', 'combined', 'payrolltax', 'c00100',
            'expanded_income']
for year in years:
    base.advance_to_year(year)
    base.calc_all()
    new.advance_to_year(year)
    new.calc_all()
    base_data[year] = base.dataframe(var_list)
    new_data[year] = new.dataframe(var_list)

In [7]:
# tax liability by year in 2014
data = {'year': [], 'combined_base': [], 'combined_new': [], 'combined_change': [],
        'combined_pct_change': [], 'iitax_pct_change': [], 'payroll_pct_change': [],
        'iitax_base': [], 'iitax_new': [], 'iitax_change': [],
        'payroll_base': [], 'payroll_new': [], 'payroll_change': []}
for year in years:
    data['year'].append(year)
    combined_change = ((new_data[year]['combined'] - base_data[year]['combined']) *
                       base_data[year]['s006']).sum()
    iitax_change = ((new_data[year]['iitax'] - base_data[year]['iitax']) *
                    base_data[year]['s006']).sum()
    payroll_change = ((new_data[year]['payrolltax'] - base_data[year]['payrolltax']) *
                       base_data[year]['s006']).sum()
    data['combined_base'].append((base_data[year]['combined'] * base_data[year]['s006']).sum())
    data['combined_new'].append((new_data[year]['combined'] * new_data[year]['s006']).sum())
    data['combined_change'].append(combined_change)
    data['combined_pct_change'].append((combined_change / (base_data[year]['combined'] * base_data[year]['s006']).sum()) * 100)
    data['iitax_base'].append((base_data[year]['iitax'] * base_data[year]['s006']).sum())
    data['iitax_new'].append((new_data[year]['iitax'] * new_data[year]['s006']).sum())
    data['iitax_change'].append(iitax_change)
    data['iitax_pct_change'].append((iitax_change / (base_data[year]['iitax'] * base_data[year]['s006']).sum()) * 100)
    data['payroll_base'].append((base_data[year]['payrolltax'] * base_data[year]['s006']).sum())
    data['payroll_new'].append((new_data[year]['payrolltax'] * new_data[year]['s006']).sum())
    data['payroll_change'].append(payroll_change)
    data['payroll_pct_change'].append((payroll_change / (base_data[year]['payrolltax'] * base_data[year]['s006']).sum()) * 100)
cds = ColumnDataSource(data)
data_df = pd.DataFrame(data, index=data['year'])

In [8]:
f = figure(title='Tax Liability by Year')
f.line(x='year', y='combined_base', line_width=2, color='blue', source=cds, legend='Combined-Base')
f.line(x='year', y='combined_new', line_width=2, color='lightblue', source=cds, legend='Combined-new')
f.line(x='year', y='iitax_base', line_width=2, color='green', source=cds, legend='IITax-Base')
f.line(x='year', y='iitax_new', line_width=2, color='lime', source=cds, legend='IITax-New')
f.line(x='year', y='payroll_base', line_width=2, color='red', source=cds, legend='Payroll-Base')
f.line(x='year', y='payroll_new', line_width=2, color='pink', source=cds, legend='Payroll-New')
f.legend.location='top_left'
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
f.xaxis[0].ticker.desired_num_ticks = 12
f.xaxis.minor_tick_line_color = None
show(f)

In [10]:
pd.options.display.float_format = '{:,.2f}'.format
print('Combined Tax Liability')
data_df[['combined_base', 'combined_new', 'combined_change', 'combined_pct_change']].transpose()

Combined Tax Liability


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
combined_base,2049761794852.92,2110841250755.81,2192101336640.05,2328589406483.97,2326852347902.63,2455208000725.77,2578843260672.17,2711805209706.87,2853027976383.55,3001258288406.5,3155871943780.05,3317717564356.19,3682346347868.92,3862491374617.44
combined_new,2084831791776.91,2227866048934.6,2301483090379.68,2431091153696.04,2418858387996.62,2539256822608.46,2654739806451.6,2778319777588.02,2909246629920.52,3045034190334.13,3186057488138.09,3332429799601.7,3675426636598.99,3825669018978.03
combined_change,26413499785.21,27033682686.64,27542615012.17,28602340502.34,37107392064.59,38716512290.4,40529626433.19,42146550093.93,44034258849.26,45890196410.0,47777781030.12,49699952881.09,42416313359.43,43866277687.06
combined_pct_change,1.29,1.28,1.26,1.23,1.59,1.58,1.57,1.55,1.54,1.53,1.51,1.5,1.15,1.14


In [11]:
print('Income Tax Liability')
data_df[['iitax_base', 'iitax_new', 'iitax_change', 'iitax_pct_change']].transpose()

Income Tax Liability


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
iitax_base,1052768062368.58,1079321792276.94,1118073598335.02,1194682943874.67,1134225986055.48,1207623350219.63,1278600912769.22,1355456344576.62,1435537108564.83,1518783771055.15,1605135131486.43,1695298234281.98,1984937041617.15,2086678633847.68
iitax_new,1085451650467.22,1176561628015.85,1212301679282.04,1285708589620.07,1220045104581.47,1291661457148.97,1361629058117.49,1436952417366.96,1515623101256.72,1596070637663.63,1679430994854.57,1765775846303.1,2046470478838.13,2132198093192.09
iitax_change,26410123218.17,27030239999.87,27539203463.57,28598543123.03,37103410956.3,38712343400.52,40525274342.73,42142028103.61,44029535868.34,45885244644.23,47772593830.65,49694513100.16,42410604520.34,43860315443.03
iitax_pct_change,2.51,2.5,2.46,2.39,3.27,3.21,3.17,3.11,3.07,3.02,2.98,2.93,2.14,2.1


In [12]:
print('Payroll Tax Liability')
data_df[['payroll_base', 'payroll_new', 'payroll_change', 'payroll_pct_change']].transpose()

Payroll Tax Liability


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
payroll_base,996993732484.34,1031519458478.87,1074027738305.03,1133906462609.3,1192626361847.16,1247584650506.14,1300242347902.95,1356348865130.26,1417490867818.72,1482474517351.36,1550736812293.63,1622419330074.21,1697409306251.77,1775812740769.76
payroll_new,999380141309.69,1051304420918.74,1089181411097.63,1145382564075.98,1198813283415.14,1247595365459.49,1293110748334.11,1341367360221.05,1393623528663.8,1448963552670.49,1506626493283.52,1566653953298.61,1628956157760.86,1693470925785.94
payroll_change,3376567.03,3442686.77,3411548.6,3797379.31,3981108.28,4168889.89,4352090.46,4521990.32,4722980.93,4951765.78,5187199.47,5439780.93,5708839.09,5962244.04
payroll_pct_change,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
def hist_plots(base_data, new_data, year):
    change_wts = new_data['s006'] - base_data['s006']
    pct_change_wts = (change_wts / base_data['s006'])
    hist_diff, edges_diff = np.histogram(change_wts, density=True, bins=50)
    hist_pct, edges_pct = np.histogram(pct_change_wts, density=True, bins=50)
    
    # create plots
    f1 = figure(title=f'Change in Weights for {year}', width=350, height=350)  # difference
    f1.quad(top=hist_diff, bottom=0, left=edges_diff[:-1], right=edges_diff[1:],
            fill_color='cyan', line_color='black')

    f2 = figure(title=f'Pct Change in Weights for {year}', width=350, height=350)  # pct change
    f2.quad(top=hist_pct, bottom=0, left=edges_pct[:-1], right=edges_pct[1:],
            fill_color='cyan', line_color='black')
    
    return f1, f2

In [47]:
# analyze change in each record's weight
plot_list_diff = []  # holds graphs showing difference in wt
plot_list_pct = []  # holds graphs showing histogram of pct change in wt
for year in years:
    base_data1 = base_data[year]
    new_data1 = new_data[year]

    hist_diff1, hist_pct1 = hist_plots(base_data1, new_data1, year)
    plot_list_diff.append(hist_diff1)
    plot_list_pct.append(hist_pct1)

In [48]:
show(column(plot_list_diff))

In [50]:
show(column(plot_list_pct))