# New York Fed Replication

In [1]:
# Setting up 

import pandas as pd
import numpy as np

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

sample = "~//projects//equifaxmacro_proj//EquiFax3//2_10_Percent_Sample_Construction//output//sample//"
output_3 = "~//projects//equifaxmacro_proj//EquiFax3//3_Balance_In_And_Out_Construction//output//"

In [2]:
# we're only going to pull every third month, since it's quarterly 

def monyr(monnum):

    mon = (monnum + 5) % 12 + 1

    if mon < 10: 
        MONNUM = "0" + str(mon)
    else: 
        MONNUM = str(mon)

    YRNUM = int(((monnum - mon)/12) + 2006)

    DATE = str(YRNUM) + MONNUM
    
    return DATE;

quartercols = [monyr(3*quarternum) for quarternum in range(1, 66)]

## Ones that just look at balances and accounts 

In [None]:
alltl_str = output_3 + "1_balance_in_and_out.parquet"
alltl = pd.read_parquet(alltl_str, columns = quartercols).reset_index()

In [None]:
non_other_pc_types = [
            'FM', # first mortgage
            'HR', # home equity revolving 
            'AB2', # auto bank loan
            'AF2', # auto finance loan
            'BC', # bank card 
            'RT', # retail 
            'SL1', # student loan deferred
            'SL2', # student loan non-deferred  
            ]

In [None]:
# calculate totals for the classic types first
non_other = alltl[alltl.product_category.isin(non_other_pc_types)]
other = alltl[~alltl.product_category.isin(non_other_pc_types)]

In [None]:
# now, recode the equifax product categories for the non-others 

# mortgage 
non_other.loc[non_other['product_category'] == 'FM', 'product_category'] = 'first_mortgage'

# home equity revolving 
non_other.loc[non_other['product_category'] == 'HR', 'product_category'] = 'he_revolving'

# auto loan 
non_other.loc[non_other['product_category'].isin(['AB2', 'AF2']), 'product_category'] = 'auto'

# credit card
non_other.loc[non_other['product_category'].isin(['BC', 'RT']), 'product_category'] = 'credit_card'

# student loan
non_other.loc[non_other['product_category'].isin(['SL1', 'SL2']), 'product_category'] = 'student_loan'

In [None]:
sum = non_other.drop(['consumer_id', 'trade_id'], axis = 1).head(100000).groupby('product_category').sum()

In [None]:
# now let's group the "other" group for each month

sum_other = other.iloc[:, 3:].head(1000).sum()
sum_other = pd.DataFrame(sum_other).T


In [None]:
sum_other.index = ['other']

In [None]:
a = pd.concat([sum, sum_other], axis = 0).T
a.columns = a.columns + "_bal"
a

now we need to append these to each other and we're done 

# Number of accounts by loan type 

In [None]:
acc = non_other.drop(['consumer_id', 'trade_id'], axis = 1).head(10000).groupby('product_category').count().T

In [None]:
acc.columns = acc.columns + "_acc"
acc

* Percent of balance 90+ days delinquent by loan type 
* geography!

# going back to the 10% sample files

In [4]:
# now, recode the equifax product categories for the non-others 
def recode_nyfrb(non_other): 
    # mortgage 
    non_other.loc[non_other['product_category'] == 'FM', 'product_category'] = 'first_mortgage'

    # home equity revolving 
    non_other.loc[non_other['product_category'] == 'HR', 'product_category'] = 'he_revolving'

    # auto loan 
    non_other.loc[non_other['product_category'].isin(['AB2', 'AF2']), 'product_category'] = 'auto'

    # credit card
    non_other.loc[non_other['product_category'].isin(['BC', 'RT']), 'product_category'] = 'credit_card'

    # student loan
    non_other.loc[non_other['product_category'].isin(['SL1', 'SL2']), 'product_category'] = 'student_loan'
    
    non_other.loc[~non_other.product_category.isin(['first_mortgage', 'he_revolving', 'auto', 'credit_card', 'student_loan']), 'product_category'] = 'other'
    
    return non_other;

# we want to make one giant row. Let's put the values in a list for now
rowlist = []

# in retrospect, these are all the columns we want
moncols = ['consumer_id', 'inquiries_12_months', 'number_accounts_opened_within_12_months'] + \
            ['transferred_sold_flag', 'origination_vantage_score3', 'product_category'] + \
            ['status_category', 'balance'] + \
            ['number_3rd_party_collection_accts','total_amount_3rd_party_collections'] + \
            ['bankruptcy_flag', 'foreclosure_flag'] + \
            ['state'] + \
            ['consumer_age_archive']
            
# now let's just read in one month at a time

mon1_str = sample + "200507_10perc.parquet"
mon1 = pd.read_parquet(mon1_str, columns = moncols)

## total number of new accounts and inquiries


In [8]:
# clean the exception values to zero 
mon1.loc[mon1['inquiries_12_months']>=92, 'inquiries_12_months'] = 0
mon1.loc[mon1['number_accounts_opened_within_12_months']>=92, 'number_accounts_opened_within_12_months'] = 0

# generate the statistics 
acc_iq = mon1[['consumer_id', 'inquiries_12_months', 'number_accounts_opened_within_12_months']].drop_duplicates()
acc_iq_sum = acc_iq[['inquiries_12_months', 'number_accounts_opened_within_12_months']].sum()

# add to the list 
rowlist.append(acc_iq_sum)
acc_iq_sum

## Credit score at origination: Mortgages and Auto loans

In [14]:
orig_cred = mon1[['transferred_sold_flag', 'origination_vantage_score3', 'product_category']]

# rename the column because uh, we don't actually know what's the new_origination_falg and what's the transferred_sold_flag 
orig_cred.columns = ['new_origination_flag', 'origination_vantage_score3', 'product_category']

# find people with new mortgages and calculate percentiles 
mort = orig_cred[(orig_cred['product_category'] == "first_mortgage") & (orig_cred['new_origination_flag'] == 1)]
mp = mort['origination_vantage_score3'].describe(percentiles = [0.1, 0.25, 0.5]).iloc[4:7]

# find people with new auto loans and calculate percentils 
auto = orig_cred[(orig_cred['product_category'] == "auto") & (orig_cred['new_origination_flag'] == 1)]
ap = auto['origination_vantage_score3'].describe(percentiles = [0.1, 0.25, 0.5]).iloc[4:7]

# create the values for the rows and make labels
row = pd.DataFrame(pd.concat([mp, ap]))
row.index = ['mort_10%', 'mort_25%', 'mort_50%', 'auto_10%', 'auto_25%', 'auto_50%']


In [17]:
row.index = row.index + "_orig_vant3"
rowlist.append(row)
row

Unnamed: 0,origination_vantage_score3
mort_10%_orig_vant3,584.0
mort_25%_orig_vant3,653.0
mort_50%_orig_vant3,728.0
auto_10%_orig_vant3,524.0
auto_25%_orig_vant3,590.0
auto_50%_orig_vant3,670.0


## Derogatory debt

### by status

In [18]:
derog_vars = ['status_category', 'balance']
derog = mon1[derog_vars]

derog_stat = derog.groupby('status_category').sum()/derog.balance.sum()
derog_stat.index = ['Miscellaneous',
                    'Current',
                    '30 DPD',
                    '60 DPD',
                    '90 DPD',
                    '120 DPD or Collections',
                    'Foreclosure Started',
                    'Closed-Positive',
                    'Closed-Severe Derogatory',
                    'Closed-Bankruptcy']

In [22]:
derog_stat.index = derog_stat.index + "_perc"
rowlist.append(derog_stat)
derog_stat

Unnamed: 0,balance
Miscellaneous_perc,0.012915
Current_perc,0.950366
30 DPD_perc,0.013224
60 DPD_perc,0.004043
90 DPD_perc,0.001849
120 DPD or Collections_perc,0.005138
Foreclosure Started_perc,0.002186
Closed-Positive_perc,0.0
Closed-Severe Derogatory_perc,0.00812
Closed-Bankruptcy_perc,0.002159


### by type of debt

In [23]:
derog_type_vars = ['product_category', 'status_category', 'balance']
derog_type = mon1[derog_type_vars]

derog_type['sd'] = (derog_type.status_category >= 4) * 1
derog_type_row = derog_type.loc[derog_type.sd==1, ['product_category', 'balance']].groupby(['product_category']).sum()/derog_type[['product_category', 'balance']].groupby('product_category').sum()

In [27]:
derog_type_row.index = derog_type_row.index + "_derog_perc"
rowlist.append(derog_type_row)
derog_type_row

Unnamed: 0_level_0,balance
product_category,Unnamed: 1_level_1
auto_derog_perc,0.022711
credit_card_derog_perc,0.087769
first_mortgage_derog_perc,0.009869
he_revolving_derog_perc,0.002131
other_derog_perc,0.048589
student_loan_derog_perc,0.073219


## Third party collections

In [28]:
tpc_vars = ['consumer_id', 'number_3rd_party_collection_accts','total_amount_3rd_party_collections']

tpc = mon1[tpc_vars].drop_duplicates()

In [29]:
# drop duplicates, recode variables
 # clean exception code variables 
tpc.loc[tpc['number_3rd_party_collection_accts']>=92, 'number_3rd_party_collection_accts'] = 0

# we just want to know whether this person has third part collections, not how many accounts 
tpc.loc[tpc['number_3rd_party_collection_accts']>=1, 'number_3rd_party_collection_accts'] = 1

# clearn exception code variables 
tpc.loc[tpc['total_amount_3rd_party_collections']>=9999992, 'total_amount_3rd_party_collections'] = np.NaN

# we want to know the average collection amount ONLY for people who have collections 
tpc.loc[tpc['total_amount_3rd_party_collections']==0, 'total_amount_3rd_party_collections'] = np.NaN

row = tpc[['number_3rd_party_collection_accts','total_amount_3rd_party_collections']].mean()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

In [30]:
row.index = ["perc_3rd_party_collection_accts", "total_amount_3rd_party_collections"]
rowlist.append(row)
row

perc_3rd_party_collection_accts          0.215849
total_amount_3rd_party_collections    2552.357796
dtype: float64

## Foreclosures and Bankruptcies 

In [31]:
fb_variables = ['bankruptcy_flag', 'foreclosure_flag']
fb = mon1[fb_variables].drop_duplicates()

In [32]:
# clean exception code variables 
fb.loc[fb['bankruptcy_flag']>=7, 'bankruptcy_flag'] = 0

# clean exception code variables 
fb.loc[fb['foreclosure_flag']>=7, 'foreclosure_flag'] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_wi

In [34]:
row = fb.sum()

rowlist.append(row)
row

bankruptcy_flag     940949.0
foreclosure_flag    101148.0
dtype: float64

# per capita

In [37]:
pc_vars = ['consumer_id', 'state', 'product_category', 'balance']

# relevant states
states = ['IL', 'NJ', 'TX', 'OH', 'PA', 'FL', 'MI', 'NV', 'CA', 'NY', 'AZ']

pc = mon1[pc_vars]
pc = recode_nyfrb(pc)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [38]:
totalnat = pc.groupby('consumer_id').sum().mean()

totalnat.index = ['national_per_capita']
rowlist.append(totalnat)
totalnat

national_per_capita    72304.527065
dtype: float64

In [39]:
# State level time! Let's only pick the tradelines in states that we want 
states = pc[pc['state'].isin(states)]

# total debt balance per capita by state
count_states = states[['state', 'consumer_id']].drop_duplicates().groupby('state').count()
state_pc = states[['state', 'balance']].groupby(['state']).sum()/count_states.to_numpy()

state_byperson = states.drop('consumer_id', axis = 1).groupby(['state', 'product_category']).sum()
count_states

Unnamed: 0_level_0,consumer_id
state,Unnamed: 1_level_1
AZ,37508
CA,225222
FL,126938
IL,82211
MI,67908
NJ,60255
NV,15933
NY,124781
OH,81615
PA,77479


In [45]:
# per capita by type of debt
state_comb = state_byperson.join(count_states).reset_index()
state_comb['per_capita'] = state_comb.balance/state_comb.consumer_id
state_comb

Unnamed: 0,state,product_category,balance,consumer_id,per_capita
0,AZ,auto,245691760,37508,6550.382852
1,AZ,credit_card,167251626,37508,4459.092087
2,AZ,first_mortgage,2149643104,37508,57311.589634
3,AZ,he_revolving,190201308,37508,5070.953077
4,AZ,other,164788245,37508,4393.415938
5,AZ,student_loan,77558153,37508,2067.776288
6,CA,auto,1084383465,225222,4814.731532
7,CA,credit_card,1075650006,225222,4775.954418
8,CA,first_mortgage,21145225790,225222,93886.146957
9,CA,he_revolving,1612483871,225222,7159.530912


In [48]:
pd.pivot_table(state_comb, values='per_capita', index='state', columns='product_category')

product_category,auto,credit_card,first_mortgage,he_revolving,other,student_loan
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AZ,6550.382852,4459.092087,57311.589634,5070.953077,4393.415938,2067.776288
CA,4814.731532,4775.954418,93886.146957,7159.530912,3796.991249,1798.538122
FL,5228.621461,4655.300304,47338.992658,4364.63854,4698.439845,1615.272834
IL,4327.627824,4331.234433,55767.23284,3905.65383,3977.154541,2087.184014
MI,3540.52069,4208.048256,48551.950212,4033.950212,4917.949756,1877.052365
NJ,3552.256311,5099.604033,63030.433757,5692.673554,6121.698398,2086.840262
NV,6477.869704,4634.642001,70341.474048,6235.655809,4850.494885,1507.248917
NY,3207.473638,5559.167582,45186.458123,4045.986248,4150.716455,2589.754915
OH,4268.227838,3949.903192,40511.675746,3931.025902,5190.348245,2067.357618
PA,4140.234864,4032.068548,35234.154906,3257.688741,6333.78207,2368.66862


In [41]:
state_comb['title'] = state_comb.state + "_" + state_comb.product_category
state_comb = state_comb[['title', 'per_capita']].set_index('title')

In [42]:
state_pc.index = state_pc.index + "_per_capita"
rowlist.append(state_pc)
state_pc

Unnamed: 0_level_0,balance
state,Unnamed: 1_level_1
AZ_per_capita,79853.209875
CA_per_capita,116231.89319
FL_per_capita,67901.265641
IL_per_capita,74396.087482
MI_per_capita,67129.471491
NJ_per_capita,85583.506315
NV_per_capita,94047.385364
NY_per_capita,64739.55696
OH_per_capita,59918.538541
PA_per_capita,55366.597749


In [43]:
rowlist.append(state_comb)
state_comb

Unnamed: 0_level_0,per_capita
title,Unnamed: 1_level_1
AZ_auto,6550.382852
AZ_credit_card,4459.092087
AZ_first_mortgage,57311.589634
AZ_he_revolving,5070.953077
AZ_other,4393.415938
AZ_student_loan,2067.776288
CA_auto,4814.731532
CA_credit_card,4775.954418
CA_first_mortgage,93886.146957
CA_he_revolving,7159.530912


## Age 

* Total debt balance by age 
* Debt share by product type and age
* percentiles of student loan debt by age 

In [10]:
age_vars = ['consumer_age_archive', 'product_category', 'balance']
age = mon1[age_vars]
age = recode_nyfrb(age)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [11]:
bal_by_age = age.groupby('consumer_age_archive').sum()

In [22]:
bal_by_age.index = ['Unknown',
                    '18–24',
                    '25–34',
                    '35-44',
                    '45-54',
                    '55–64',
                    '65–74',
                    '75+'] 
bal_by_age.index = bal_by_age.index + '_total'

In [23]:
bal_by_age

Unnamed: 0,balance
Unknown_total,1367274475
18–24_total,2543923031
25–34_total,23121353346
35-44_total,42031053620
45-54_total,40125740838
55–64_total,23621523367
65–74_total,7428430135
75+_total,2493308231


In [12]:
bal_by_type_and_age = age.groupby(['consumer_age_archive', 'product_category']).sum()

In [16]:
all_age = bal_by_type_and_age.join(bal_by_age, rsuffix="_all")

In [17]:
all_age['perc'] = all_age.balance/all_age.balance_all

In [24]:
all_age

Unnamed: 0_level_0,Unnamed: 1_level_0,balance,balance_all,perc
consumer_age_archive,product_category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,auto,81822241,1367274475,0.059843
0,credit_card,56097008,1367274475,0.041028
0,first_mortgage,1006085596,1367274475,0.735833
0,he_revolving,39003274,1367274475,0.028526
0,other,82673586,1367274475,0.060466
0,student_loan,101592770,1367274475,0.074303
1,auto,506168511,2543923031,0.198972
1,credit_card,212665398,2543923031,0.083597
1,first_mortgage,937210325,2543923031,0.368411
1,he_revolving,28786988,2543923031,0.011316


In [19]:
pd.pivot_table(all_age.reset_index(), values = 'perc', index = 'consumer_age_archive', columns = 'product_category')


product_category,auto,credit_card,first_mortgage,he_revolving,other,student_loan
consumer_age_archive,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.059843,0.041028,0.735833,0.028526,0.060466,0.074303
1,0.198972,0.083597,0.368411,0.011316,0.088433,0.249271
2,0.088099,0.050716,0.693177,0.033,0.063268,0.07174
3,0.059924,0.050059,0.766101,0.046222,0.060046,0.017647
4,0.057748,0.05941,0.737984,0.058117,0.074427,0.012314
5,0.057943,0.069966,0.708819,0.073649,0.081387,0.008235
6,0.063601,0.091604,0.660887,0.087632,0.092642,0.003634
7,0.059857,0.129907,0.616296,0.090747,0.097541,0.005652


In [47]:
finalrow['200507'] = finalrow.sum(axis=1)
finalrow = finalrow['200507']

In [48]:
allrowlist = []
allrowlist.append(finalrow)
allrowlist.append(finalrow)

In [49]:
pd.concat(allrowlist, axis = 1).T

Unnamed: 0,inquiries_12_months,number_accounts_opened_within_12_months,mort_10%_orig_vant3,mort_25%_orig_vant3,mort_50%_orig_vant3,auto_10%_orig_vant3,auto_25%_orig_vant3,auto_50%_orig_vant3,Miscellaneous_perc,Current_perc,30 DPD_perc,60 DPD_perc,90 DPD_perc,120 DPD or Collections_perc,Foreclosure Started_perc,Closed-Positive_perc,Closed-Severe Derogatory_perc,Closed-Bankruptcy_perc,auto_derog_perc,credit_card_derog_perc,first_mortgage_derog_perc,he_revolving_derog_perc,other_derog_perc,student_loan_derog_perc,perc_3rd_party_collection_accts,total_amount_3rd_party_collections,bankruptcy_flag,foreclosure_flag,national_per_capita,AZ_per_capita,CA_per_capita,FL_per_capita,IL_per_capita,MI_per_capita,NJ_per_capita,NV_per_capita,NY_per_capita,OH_per_capita,PA_per_capita,TX_per_capita,AZ_auto,AZ_credit_card,AZ_first_mortgage,AZ_he_revolving,AZ_other,AZ_student_loan,CA_auto,CA_credit_card,CA_first_mortgage,CA_he_revolving,CA_other,CA_student_loan,FL_auto,FL_credit_card,FL_first_mortgage,FL_he_revolving,FL_other,FL_student_loan,IL_auto,IL_credit_card,IL_first_mortgage,IL_he_revolving,IL_other,IL_student_loan,MI_auto,MI_credit_card,MI_first_mortgage,MI_he_revolving,MI_other,MI_student_loan,NJ_auto,NJ_credit_card,NJ_first_mortgage,NJ_he_revolving,NJ_other,NJ_student_loan,NV_auto,NV_credit_card,NV_first_mortgage,NV_he_revolving,NV_other,NV_student_loan,NY_auto,NY_credit_card,NY_first_mortgage,NY_he_revolving,NY_other,NY_student_loan,OH_auto,OH_credit_card,OH_first_mortgage,OH_he_revolving,OH_other,OH_student_loan,PA_auto,PA_credit_card,PA_first_mortgage,PA_he_revolving,PA_other,PA_student_loan,TX_auto,TX_credit_card,TX_first_mortgage,TX_he_revolving,TX_other,TX_student_loan
200507,3530814.0,2397473.0,584.0,653.0,728.0,524.0,590.0,670.0,0.012915,0.950366,0.013224,0.004043,0.001849,0.005138,0.002186,0.0,0.00812,0.002159,0.022711,0.087769,0.009869,0.002131,0.048589,0.073219,0.215849,2552.357796,940949.0,101148.0,72304.527065,79853.209875,116231.89319,67901.265641,74396.087482,67129.471491,85583.506315,94047.385364,64739.55696,59918.538541,55366.597749,52825.783827,3076.792535,2094.488453,26919.933555,2381.886818,2063.639586,971.259053,9329.482943,9254.344711,181922.751232,13872.98982,7357.412321,3485.018973,9774.644769,8702.84971,88497.865146,8159.472165,8783.496911,3019.671299,4782.222064,4786.207529,61625.283455,4315.921951,4394.933431,2306.431572,3581.581586,4256.850749,49115.02745,4080.733617,4974.985272,1898.821325,2500.963249,3590.37219,44376.527085,4007.922318,4309.976921,1469.238238,1097.445693,785.176012,11916.87256,1056.411123,821.744642,255.34997,6182.182684,10714.90944,87093.759917,7798.357507,8000.217708,4991.572744,5813.750193,5380.160412,55180.925579,5354.447669,7069.769763,2815.946385,5793.768627,5642.402671,49306.029248,4558.749793,8863.378299,3314.671363,18071.994372,12607.854342,94162.801754,772.896889,13821.753339,4928.699304
200507,3530814.0,2397473.0,584.0,653.0,728.0,524.0,590.0,670.0,0.012915,0.950366,0.013224,0.004043,0.001849,0.005138,0.002186,0.0,0.00812,0.002159,0.022711,0.087769,0.009869,0.002131,0.048589,0.073219,0.215849,2552.357796,940949.0,101148.0,72304.527065,79853.209875,116231.89319,67901.265641,74396.087482,67129.471491,85583.506315,94047.385364,64739.55696,59918.538541,55366.597749,52825.783827,3076.792535,2094.488453,26919.933555,2381.886818,2063.639586,971.259053,9329.482943,9254.344711,181922.751232,13872.98982,7357.412321,3485.018973,9774.644769,8702.84971,88497.865146,8159.472165,8783.496911,3019.671299,4782.222064,4786.207529,61625.283455,4315.921951,4394.933431,2306.431572,3581.581586,4256.850749,49115.02745,4080.733617,4974.985272,1898.821325,2500.963249,3590.37219,44376.527085,4007.922318,4309.976921,1469.238238,1097.445693,785.176012,11916.87256,1056.411123,821.744642,255.34997,6182.182684,10714.90944,87093.759917,7798.357507,8000.217708,4991.572744,5813.750193,5380.160412,55180.925579,5354.447669,7069.769763,2815.946385,5793.768627,5642.402671,49306.029248,4558.749793,8863.378299,3314.671363,18071.994372,12607.854342,94162.801754,772.896889,13821.753339,4928.699304


### How old are people with student loan debt?

In [136]:
age2 = mon1[age_vars + ['consumer_id']]
age2 = recode_nyfrb(age2)
age_sl = age2[age2.product_category=="student_loan"]
age_sl = age_sl.groupby('consumer_id').agg({'consumer_age_archive':'mean', 'balance':'sum'})
age_sl['consumer_age_archive'] = age_sl['consumer_age_archive'].astype(int)

summary_age_sl = age_sl.groupby('consumer_age_archive').describe(percentiles = (0.25, 0.5, 0.75, 0.9))
summary_age_sl.columns = summary_age_sl.columns.droplevel()
summary_age_sl = summary_age_sl[['count','mean', '25%', '50%', '75%', '90%']]
summary_age_sl.index = ['Unknown',
                    '18–24',
                    '25–34',
                    '35-44',
                    '45-54',
                    '55–64',
                    '65–74',
                    '75+'] 
summary_age_sl.index = summary_age_sl.index + "_SL"
summary_age_sl

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0,count,mean,25%,50%,75%,90%
Unknown_SL,9390.0,10810.591267,2625.0,5739.5,12110.5,23255.0
18–24_SL,55873.0,11355.572316,2750.0,6678.0,14910.0,25271.8
25–34_SL,88534.0,18732.370762,4032.0,10271.0,22747.75,44045.7
35-44_SL,39905.0,18588.403558,3612.0,9206.0,22185.0,45473.2
45-54_SL,30460.0,16219.939297,3775.75,8942.0,19855.0,38390.8
55–64_SL,12277.0,15845.437077,2946.0,8052.0,19430.0,38809.4
65–74_SL,2141.0,12608.186362,2504.0,7010.0,15714.0,30319.0
75+_SL,1020.0,13814.696078,3402.25,8272.0,17077.0,34023.3


* percentage of people in each age group with student debt
* average/median/25/75% of people with student debt

In [137]:
a = mon1[['consumer_id', 'consumer_age_archive']].drop_duplicates().groupby('consumer_age_archive').count()
summary_age_sl['perc'] = summary_age_sl['count']/a.consumer_id.to_numpy()


In [138]:
finalrow = pd.melt(summary_age_sl.reset_index(), id_vars='index', value_vars=summary_age_sl.columns)
finalrow.head()

Unnamed: 0,index,variable,value
0,Unknown_SL,count,0.13792
1,18–24_SL,count,0.353864
2,25–34_SL,count,0.267862
3,35-44_SL,count,0.102482
4,45-54_SL,count,0.078057


In [139]:
finalrow['colname'] = finalrow['index'] + "_" + finalrow['variable']
finalrow = finalrow.set_index('colname')['value']
finalrow.head()

colname
Unknown_SL_count    0.137920
18–24_SL_count      0.353864
25–34_SL_count      0.267862
35-44_SL_count      0.102482
45-54_SL_count      0.078057
Name: value, dtype: float64

In [146]:
a.index = ['Unknown',
                    '18–24',
                    '25–34',
                    '35-44',
                    '45-54',
                    '55–64',
                    '65–74',
                    '75+'] 
a.index = a.index + "_count"
a.columns = ["value"]

In [169]:
pd.concat([a, pd.DataFrame(finalrow)], axis = 0)

Unnamed: 0,value
Unknown_count,68083.0
18–24_count,157894.0
25–34_count,330521.0
35-44_count,389386.0
45-54_count,390230.0
55–64_count,288661.0
65–74_count,175340.0
75+_count,174608.0
Unknown_SL_count,0.13792
18–24_SL_count,0.353864


In [161]:
pd.Series(a)

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().