In [1]:
import pandas as pd
import taxcalc as tc

In [2]:
cps = pd.read_csv('cps.csv.gz')
puf = pd.read_csv('../puf_data/puf.csv')

In [3]:
year = 2015

In [4]:
def distribution(df, numerator, denominator):
    df['ratio'] = df[numerator] / df[denominator]
    subdf = df[(df[denominator] > 0) & (df['filer'] == 1)]
    # variables are equal
    full = subdf['s006'][subdf['ratio'] == 1.0].sum() / subdf['s006'].sum()
    # numerator is zero 
    zero = subdf['s006'][subdf['ratio'] == 0.0].sum() / subdf['s006'].sum()
    # somewhere in the middle
    partial = (subdf['s006'][(subdf['ratio'] != 1.0) & (subdf['ratio'] != 0.0)].sum() /
               subdf['s006'].sum())
    print(f'Percent of units where {numerator} == {denominator}: {round(full, 3)}')
    print(f'Percent of units where {numerator} == 0: {round(zero, 3)}')
    print(f'Percent of units where {numerator} != {denominator} & {numerator} != 0: {round(partial, 3)}')
    # fraction
    fracdf = subdf[(subdf[numerator] != subdf[denominator]) &
                   (subdf[numerator] != 0.0)]
    frac = (fracdf[numerator] * fracdf['s006']).sum() / (fracdf[denominator] * fracdf['s006']).sum()
    print(f'Average fraction where {numerator} != {denominator} & {numerator} != 0: {round(frac, 3)}')

In [5]:
# calculator with the PUF
rec_g = tc.Records(data=puf)
pol_g = tc.Policy()
calc_g = tc.Calculator(records=rec_g, policy=pol_g)
calc_g.advance_to_year(year)
calc_g.calc_all()

You loaded data for 2011.
Tax-Calculator startup automatically extrapolated your data to 2013.


In [6]:
# calculator with the CPS
rec = tc.Records.cps_constructor(data=cps)
pol = tc.Policy()
calc = tc.Calculator(records=rec, policy=pol)
calc.advance_to_year(year)
calc.calc_all()

You loaded data for 2014.
Your data include the following unused variables that will be ignored:
  tanf_ben
  wic_ben
  housing_ben
Tax-Calculator startup automatically extrapolated your data to 2014.


# Pensions

In [7]:
print('Goal')
distribution(calc_g.dataframe(['e01700', 'e01500', 's006', 'filer']), 'e01700', 'e01500')

Goal
Percent of units where e01700 == e01500: 0.612
Percent of units where e01700 == 0: 0.073
Percent of units where e01700 != e01500 & e01700 != 0: 0.315
Average fraction where e01700 != e01500 & e01700 != 0: 0.577


In [8]:
print('CPS')
distribution(calc.dataframe(['e01700', 'e01500', 's006', 'filer']), 'e01700', 'e01500')

CPS
Percent of units where e01700 == e01500: 0.605
Percent of units where e01700 == 0: 0.073
Percent of units where e01700 != e01500 & e01700 != 0: 0.322
Average fraction where e01700 != e01500 & e01700 != 0: 0.577


# Qualified Dividends

In [9]:
print('Goal Distribution')
distribution(calc_g.dataframe(['e00650', 'e00600', 's006', 'filer']), 'e00650', 'e00600')

Goal Distribution
Percent of units where e00650 == e00600: 0.429
Percent of units where e00650 == 0: 0.093
Percent of units where e00650 != e00600 & e00650 != 0: 0.478
Average fraction where e00650 != e00600 & e00650 != 0: 0.678


In [10]:
print('CPS')
distribution(calc.dataframe(['e00650', 'e00600', 's006', 'filer']), 'e00650', 'e00600')

CPS
Percent of units where e00650 == e00600: 0.428
Percent of units where e00650 == 0: 0.093
Percent of units where e00650 != e00600 & e00650 != 0: 0.48
Average fraction where e00650 != e00600 & e00650 != 0: 0.678


# Interest Income

In [11]:
def grouped_interest_income(df):
    df['int_inc'] = df['e00300'] + df['e00400']
    subdf = df[(df['filer'] == 1) & (df['int_inc'] > 0)].copy()
    taxable_total = (subdf['e00300'] * subdf['s006']).sum() / (subdf['int_inc'] * subdf['s006']).sum()
    print(f'Taxable/Total Ratio for all units where Total > 0: {round(taxable_total, 3)}')
    all_taxable = subdf['s006'][subdf['int_inc'] == subdf['e00300']].sum()
    all_taxable_prob = all_taxable / subdf['s006'].sum()
    print(f'Units with all taxable income: {round(all_taxable * 1e-6, 3)}')
    print(f'\tProb: {round(all_taxable_prob, 3)}')
    zero_taxable = subdf['s006'][subdf['e00300'] == 0.0].sum()
    zero_taxable_prob = zero_taxable / subdf['s006'].sum()
    print(f'Units with no taxable income: {round(zero_taxable * 1e-6, 3)}')
    print(f'\tProb: {round(zero_taxable_prob, 3)}')
    some_taxable = subdf['s006'][(subdf['e00300'] != subdf['int_inc']) & (subdf['e00300'] != 0)].sum()
    sometaxabledf = subdf[(subdf['e00300'] != subdf['int_inc']) & (subdf['e00300'])]
    some_taxable_prob = some_taxable / subdf['s006'].sum()
    frac = ((sometaxabledf['s006'] * sometaxabledf['e00300']).sum() /
            (sometaxabledf['s006'] * sometaxabledf['int_inc']).sum())
    print(f'Units with some taxable interest income: {round(some_taxable * 1e-6, 3)}')
    print(f'\tProb: {round(some_taxable_prob, 3)}')
    print(f'\tFrac: {round(frac, 3)}')

In [12]:
print('Goal')
grouped_interest_income(calc_g.dataframe(['e00300', 'e00400', 'filer', 's006']))

Goal
Taxable/Total Ratio for all units where Total > 0: 0.599
Units with all taxable income: 48.801
	Prob: 0.882
Units with no taxable income: 0.505
	Prob: 0.009
Units with some taxable interest income: 6.056
	Prob: 0.109
	Frac: 0.453


In [13]:
print('CPS')
grouped_interest_income(calc.dataframe(['e00300', 'e00400', 'filer', 's006']))

CPS
Taxable/Total Ratio for all units where Total > 0: 1.0
Units with all taxable income: 59.064
	Prob: 0.882
Units with no taxable income: 0.0
	Prob: 0.0
Units with some taxable interest income: 7.899
	Prob: 0.118
	Frac: 0.567


In [24]:
data = calc.dataframe(['e00300', 'e00400', 'filer', 's006'])

In [25]:
data['total'] = data['e00300'] + data['e00400']

In [32]:
(data['e00300'] * data['s006']).sum() / (data['total'] * data['s006']).sum()

0.9999324520948686