In [23]:
import pandas as pd
import numpy as np
import taxcalc as tc
from bokeh.io import show, output_notebook
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, NumeralTickFormatter
from bokeh.layouts import layout, row, gridplot, column
from notebookfunctions import distribution, index_list, percentile
output_notebook()

In [24]:
years = [i for i in range(2014, 2028)]

In [25]:
puf_path = 'puf_data/puf.csv'
new_wts = pd.read_csv('puf_stage2/puf_weights.csv.gz')
# I'm reading in weights because the taxcalc package hasn't been updated since the new PUF was
# adopted and I want to compare weights with the same PUF
cur_wts = pd.read_csv('/Users/andersonfrailey/Tax-Calculator/taxcalc/puf_weights.csv.gz')

In [26]:
new_wts.head()

Unnamed: 0,WT2011,WT2012,WT2013,WT2014,WT2015,WT2016,WT2017,WT2018,WT2019,WT2020,WT2021,WT2022,WT2023,WT2024,WT2025,WT2026,WT2027
0,126255,184446,188363,190540,194165,196088,213479,222251,229795,239445,247308,253648,265472,271456,283675,291543,301016
1,17482,18243,18900,19543,20215,27152,21576,22300,23057,23853,24636,25451,26256,27041,27858,28631,29353
2,139730,139310,141630,91705,95725,101586,93003,92957,94213,93784,94892,97531,95441,98114,95961,97041,96458
3,27872,27788,28251,28142,28499,20263,29447,29907,30311,30668,31030,31378,31729,32083,32443,32808,33173
4,149520,89443,93963,98131,102432,108704,99520,99470,100814,100355,101541,104364,102128,104989,102684,103840,103216


In [27]:
base = tc.Calculator(records=tc.Records(data=puf_path,
                                        weights=cur_wts),
                     policy=tc.Policy())
new = tc.Calculator(records=tc.Records(data=puf_path,
                                       weights=new_wts,
                                       adjust_ratios='puf_stage3/puf_ratios.csv'),
                    policy=tc.Policy())

You loaded data for 2011.
Tax-Calculator startup automatically extrapolated your data to 2013.
You loaded data for 2011.
Tax-Calculator startup automatically extrapolated your data to 2013.


In [28]:
# collect data for each of the years
base_data = {}
new_data = {}
var_list = ['s006', 'iitax', 'combined', 'payrolltax', 'c00100',
            'expanded_income']
for year in years:
    base.advance_to_year(year)
    base.calc_all()
    new.advance_to_year(year)
    new.calc_all()
    base_data[year] = base.dataframe(var_list)
    new_data[year] = new.dataframe(var_list)

In [29]:
# tax liability by year in 2014
data = {'year': [], 'combined_base': [], 'combined_new': [], 'combined_change': [],
        'combined_pct_change': [], 'iitax_pct_change': [], 'payroll_pct_change': [],
        'iitax_base': [], 'iitax_new': [], 'iitax_change': [],
        'payroll_base': [], 'payroll_new': [], 'payroll_change': []}
for year in years:
    data['year'].append(year)
    combined_new = (new_data[year]['combined'] * new_data[year]['s006']).sum()
    combined_base = (base_data[year]['combined'] * base_data[year]['s006']).sum()
    combined_change = combined_new - combined_base
    combined_pct_change = (combined_change / combined_base) * 100
    
    iitax_new = (new_data[year]['iitax'] * new_data[year]['s006']).sum()
    iitax_base = (base_data[year]['iitax'] * base_data[year]['s006']).sum()
    iitax_change = iitax_new - iitax_base
    iitax_pct_change = (iitax_change / iitax_base) * 100

    payroll_new = (new_data[year]['payrolltax'] * new_data[year]['s006']).sum()
    payroll_base = (base_data[year]['payrolltax'] * base_data[year]['s006']).sum()
    payroll_change = payroll_new - payroll_base
    payroll_pct_change = (payroll_change / payroll_base) * 100

    data['combined_base'].append(combined_base)
    data['combined_new'].append(combined_new)
    data['combined_change'].append(combined_change)
    data['combined_pct_change'].append(combined_pct_change)
    data['iitax_base'].append(iitax_base)
    data['iitax_new'].append(iitax_new)
    data['iitax_change'].append(iitax_change)
    data['iitax_pct_change'].append(iitax_pct_change)
    data['payroll_base'].append(payroll_base)
    data['payroll_new'].append(payroll_new)
    data['payroll_change'].append(payroll_change)
    data['payroll_pct_change'].append(payroll_pct_change)
cds = ColumnDataSource(data)
data_df = pd.DataFrame(data, index=data['year'])

In [30]:
f = figure(title='Tax Liability by Year')
f.line(x='year', y='combined_base', line_width=2, color='blue', source=cds, legend='Combined-Base')
f.line(x='year', y='combined_new', line_width=2, color='lightblue', source=cds, legend='Combined-new')
f.line(x='year', y='iitax_base', line_width=2, color='green', source=cds, legend='IITax-Base')
f.line(x='year', y='iitax_new', line_width=2, color='lime', source=cds, legend='IITax-New')
f.line(x='year', y='payroll_base', line_width=2, color='red', source=cds, legend='Payroll-Base')
f.line(x='year', y='payroll_new', line_width=2, color='pink', source=cds, legend='Payroll-New')
f.legend.location='top_left'
f.yaxis[0].formatter = NumeralTickFormatter(format='$0.00a')
f.xaxis[0].ticker.desired_num_ticks = 12
f.xaxis.minor_tick_line_color = None
show(f)

In [31]:
pd.options.display.float_format = '{:,.2f}'.format
print('Combined Tax Liability')
data_df[['combined_base', 'combined_new', 'combined_change', 'combined_pct_change']].transpose()

Combined Tax Liability


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
combined_base,2311511379865.96,2482419403189.37,2562180996936.11,2714458752125.23,2660586903315.75,2767205186030.06,2873078303529.79,2988397898948.32,3116620119315.79,3257103101250.7,3404346263507.58,3557306398226.46,3949641992824.11,4126887490788.88
combined_new,2310596388690.71,2480038536447.01,2552223024643.41,2710923204210.91,2654907235000.87,2760001327611.44,2866321341675.17,2981233405091.05,3107221834664.18,3250136758043.99,3395658061955.87,3550220265836.61,3942986809484.82,4120235487072.04
combined_change,-914991175.25,-2380866742.36,-9957972292.7,-3535547914.32,-5679668314.88,-7203858418.62,-6756961854.62,-7164493857.27,-9398284651.61,-6966343206.71,-8688201551.71,-7086132389.85,-6655183339.29,-6652003716.85
combined_pct_change,-0.04,-0.1,-0.39,-0.13,-0.21,-0.26,-0.24,-0.24,-0.3,-0.21,-0.26,-0.2,-0.17,-0.16


In [32]:
print('Income Tax Liability')
data_df[['iitax_base', 'iitax_new', 'iitax_change', 'iitax_pct_change']].transpose()

Income Tax Liability


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
iitax_base,1354267453946.5,1481491878585.55,1523631199391.6,1621340711946.2,1518402243507.87,1580262006138.34,1644181331987.12,1714938300405.92,1794472809529.6,1882872841532.93,1975599726573.87,2072206901973.47,2405557949519.14,2521121860033.65
iitax_new,1350705732835.88,1475600659396.53,1506386968481.28,1614628217697.35,1509342315460.84,1569278706968.1,1634077485468.64,1704322587210.08,1780973769183.37,1872679608527.53,1963126951155.51,2061881366653.6,2395271141243.9,2511067137633.93
iitax_change,-3561721110.62,-5891219189.02,-17244230910.33,-6712494248.85,-9059928047.03,-10983299170.24,-10103846518.48,-10615713195.84,-13499040346.23,-10193233005.4,-12472775418.36,-10325535319.87,-10286808275.24,-10054722399.72
iitax_pct_change,-0.26,-0.4,-1.13,-0.41,-0.6,-0.7,-0.61,-0.62,-0.75,-0.54,-0.63,-0.5,-0.43,-0.4


In [33]:
print('Payroll Tax Liability')
data_df[['payroll_base', 'payroll_new', 'payroll_change', 'payroll_pct_change']].transpose()

Payroll Tax Liability


Unnamed: 0,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
payroll_base,957243925919.46,1000927524603.83,1038549797544.5,1093118040179.03,1142184659807.88,1186943179891.72,1228896971542.67,1273459598542.4,1322147309786.19,1374230259717.77,1428746536933.71,1485099496252.99,1544084043304.97,1605765630755.24
payroll_new,959890655854.82,1004437877050.48,1045836056162.13,1096294986513.55,1145564919540.03,1190722620643.34,1232243856206.53,1276910817880.97,1326248065480.81,1377457149516.46,1432531110800.36,1488338899183.01,1547715668240.92,1609168349438.11
payroll_change,2646729935.36,3510352446.65,7286258617.63,3176946334.52,3380259732.15,3779440751.62,3346884663.86,3451219338.58,4100755694.63,3226889798.69,3784573866.65,3239402930.03,3631624935.95,3402718682.87
payroll_pct_change,0.28,0.35,0.7,0.29,0.3,0.32,0.27,0.27,0.31,0.23,0.26,0.22,0.24,0.21


In [34]:
def hist_plots(base_data, new_data, year):
    change_wts = new_data['s006'] - base_data['s006']
    pct_change_wts = (change_wts / base_data['s006']) * 100
    hist_diff, edges_diff = np.histogram(change_wts, density=True, bins=50)
    
    # create plots
    f1 = figure(title=f'Change in Weights for {year}', width=350, height=350)  # difference
    f1.quad(top=hist_diff, bottom=0, left=edges_diff[:-1], right=edges_diff[1:],
            fill_color='cyan', line_color='black')
    f1.yaxis.axis_label = 'Frequency'
    f1.xaxis.axis_label = 'Change'
    
    return f1#, f2

In [35]:
# analyze change in each record's weight
plot_list_diff = []  # holds graphs showing difference in wt
plot_list_pct = []  # holds graphs showing histogram of pct change in wt
for year in years:
    base_data1 = base_data[year]
    new_data1 = new_data[year]

    hist_diff1 = hist_plots(base_data1, new_data1, year)
    plot_list_diff.append(hist_diff1)

In [36]:
show(column(plot_list_diff))

### Raw Files

In [38]:
cur_wts.head()

Unnamed: 0,WT2011,WT2012,WT2013,WT2014,WT2015,WT2016,WT2017,WT2018,WT2019,WT2020,WT2021,WT2022,WT2023,WT2024,WT2025,WT2026,WT2027
0,126255,193669,202012,211711,218984,226256,235295,243188,253108,261840,270437,281218,290123,300749,311841,322559,330693
1,17482,18243,18900,19543,20215,20886,21576,22300,23057,23853,24636,25451,26256,27041,27858,28631,29353
2,139730,139310,141630,70543,71437,72562,72336,73466,72939,73797,74669,73935,74762,73988,73190,72369,73175
3,27872,27788,28251,28142,28499,28948,29447,29907,30311,30668,31030,31378,31729,32083,32443,32808,33173
4,149520,79008,78808,75485,76442,77646,77404,78613,78049,78968,79901,79115,80000,79172,78319,77440,78302


In [39]:
new_wts.head()

Unnamed: 0,WT2011,WT2012,WT2013,WT2014,WT2015,WT2016,WT2017,WT2018,WT2019,WT2020,WT2021,WT2022,WT2023,WT2024,WT2025,WT2026,WT2027
0,126255,184446,188363,190540,194165,196088,213479,222251,229795,239445,247308,253648,265472,271456,283675,291543,301016
1,17482,18243,18900,19543,20215,27152,21576,22300,23057,23853,24636,25451,26256,27041,27858,28631,29353
2,139730,139310,141630,91705,95725,101586,93003,92957,94213,93784,94892,97531,95441,98114,95961,97041,96458
3,27872,27788,28251,28142,28499,20263,29447,29907,30311,30668,31030,31378,31729,32083,32443,32808,33173
4,149520,89443,93963,98131,102432,108704,99520,99470,100814,100355,101541,104364,102128,104989,102684,103840,103216


In [40]:
for column in cur_wts.columns:
    cur = cur_wts[column].sum()
    new = new_wts[column].sum()
    print(f'{column}: Current: {cur:,} New: {new:,} Diff: {new - cur:,} Pct: {((new - cur) / cur) * 100:.2f}')

WT2011: Current: 15,454,814,962 New: 15,454,814,962 Diff: 0 Pct: 0.00
WT2012: Current: 15,934,992,640 New: 15,930,314,530 Diff: -4,678,110 Pct: -0.03
WT2013: Current: 16,219,843,516 New: 16,211,230,848 Diff: -8,612,668 Pct: -0.05
WT2014: Current: 16,193,704,375 New: 16,174,046,809 Diff: -19,657,566 Pct: -0.12
WT2015: Current: 16,417,243,644 New: 16,384,528,763 Diff: -32,714,881 Pct: -0.20
WT2016: Current: 16,686,532,466 New: 16,597,289,050 Diff: -89,243,416 Pct: -0.53
WT2017: Current: 16,982,888,408 New: 16,951,537,679 Diff: -31,350,729 Pct: -0.18
WT2018: Current: 17,258,333,089 New: 17,221,775,888 Diff: -36,557,201 Pct: -0.21
WT2019: Current: 17,509,230,586 New: 17,459,131,939 Diff: -50,098,647 Pct: -0.29
WT2020: Current: 17,735,225,806 New: 17,686,839,341 Diff: -48,386,465 Pct: -0.27
WT2021: Current: 17,962,244,861 New: 17,912,756,673 Diff: -49,488,188 Pct: -0.28
WT2022: Current: 18,187,123,527 New: 18,127,772,988 Diff: -59,350,539 Pct: -0.33
WT2023: Current: 18,410,595,147 New: 18,3

### Change in Weight by Tax Liability

In [41]:
def wt_plot(data, year, tax):
    f3 = figure(title=f'Average Change in Weight by Tax Liability - {year}')
    f3.circle(x='perc', y='change', size=5, source=data)
    f3.xaxis.axis_label = 'Tax Liability Percentile'
    f3.yaxis.axis_label = 'Average Weight Change'
    return(f3)

In [42]:
plots = []
data = []
for year in years:
    tax = 'combined'
    temp = pd.DataFrame({'wt_change': new_data[year]['s006'] - base_data[year]['s006'],
                         's006': base_data[year]['s006'],
                         tax: new_data[year][tax]})
    change = percentile(temp, 'wt_change', 100, 'combined', result_type='avg')
    cds = ColumnDataSource({'perc': [i for i in range(1, 101)], 'change': change})
    plot = wt_plot(cds, year, 'Combined')
    plots.append(plot)

In [43]:
for p in plots:
    show(p)

In [44]:
tc.__version__

'0.20.1'