In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from scipy.stats import ks_2samp

In [4]:
acctDF = pd.read_parquet('../../data/q2-ucsd-acctDF.pqt')
consDF = pd.read_parquet('../../data/q2-ucsd-consDF.pqt')
cat_map = pd.read_csv('../../data/q2-ucsd-cat-map.csv')
trxnDF = pd.read_parquet('../../data/q2-ucsd-trxnDF.pqt')

In [247]:
acctDF.head()

Unnamed: 0,prism_consumer_id,prism_account_id,account_type,balance_date,balance
0,3023,0,SAVINGS,2021-08-31,90.57
1,3023,1,CHECKING,2021-08-31,225.95
2,4416,2,SAVINGS,2022-03-31,15157.17
3,4416,3,CHECKING,2022-03-31,66.42
4,4227,4,CHECKING,2021-07-31,7042.9


In [248]:
consDF.head()

Unnamed: 0,prism_consumer_id,evaluation_date,credit_score,DQ_TARGET
0,0,2021-09-01,726.0,0.0
1,1,2021-07-01,626.0,0.0
2,2,2021-05-01,680.0,0.0
3,3,2021-03-01,734.0,0.0
4,4,2021-10-01,676.0,0.0


In [249]:
cat_map

Unnamed: 0,category_id,category
0,0,SELF_TRANSFER
1,1,EXTERNAL_TRANSFER
2,2,DEPOSIT
3,3,PAYCHECK
4,4,MISCELLANEOUS
5,5,PAYCHECK_PLACEHOLDER
6,6,REFUND
7,7,INVESTMENT_INCOME
8,8,OTHER_BENEFITS
9,9,UNEMPLOYMENT_BENEFITS


In [250]:
trxnDF.head()

Unnamed: 0,prism_consumer_id,prism_transaction_id,category,amount,credit_or_debit,posted_date
0,3023,0,4,0.05,CREDIT,2021-04-16
1,3023,1,12,481.56,CREDIT,2021-04-30
2,3023,2,4,0.05,CREDIT,2021-05-16
3,3023,3,4,0.07,CREDIT,2021-06-16
4,3023,4,4,0.06,CREDIT,2021-07-16


In [251]:
consDF = consDF.dropna()

In [252]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score,r2_score
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression

In [253]:
consDF['evaluation_date'] = pd.to_datetime(consDF['evaluation_date'])

# Create a new feature 'days_since_evaluation' representing the number of days since the evaluation date
consDF['days_since_evaluation'] = (pd.Timestamp('now') - consDF['evaluation_date']).dt.total_seconds()

In [254]:
# Drop the original 'evaluation_date' column
X = consDF.drop(columns=['DQ_TARGET', 'evaluation_date'])
y = consDF['DQ_TARGET']

# Split the data 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(random_state=42)

model.fit(X_train, y_train)

In [255]:
# Get feature importances
feature_importances = model.feature_importances_

feature_importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': feature_importances
})

feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

most_important_feature = feature_importance_df.iloc[0]
print(f"The most important feature is: {most_important_feature['Feature']} with an importance score of {most_important_feature['Importance']:.4f}")

The most important feature is: prism_consumer_id with an importance score of 0.3850


In [256]:
most_important_feature['Feature']

'prism_consumer_id'

In [257]:
X_train_single = X_train[[most_important_feature['Feature']]]
X_test_single = X_test[[most_important_feature['Feature']]]

single_feature_model = RandomForestClassifier(random_state=42)
single_feature_model.fit(X_train_single, y_train)

In [258]:
X_train_single = X_train[['credit_score']]
X_test_single = X_test[['credit_score']]

single_feature_model = RandomForestClassifier(random_state=42)
single_feature_model.fit(X_train_single, y_train)

In [259]:
y_pred = single_feature_model.predict(X_test_single)
y_pred_proba = single_feature_model.predict_proba(X_test_single)[:, 1]

accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_proba)
r2 = r2_score(y_test, y_pred)

print(f"Accuracy using the single best feature: {accuracy:.4f}")
print(f"ROC AUC using the single best feature: {roc_auc:.4f}")
print(f"R² score using the single best feature: {r2:.4f}")

Accuracy using the single best feature: 0.9079
ROC AUC using the single best feature: 0.6798
R² score using the single best feature: -0.1633


In [260]:
consDF

Unnamed: 0,prism_consumer_id,evaluation_date,credit_score,DQ_TARGET,days_since_evaluation
0,0,2021-09-01,726.0,0.0,1.077271e+08
1,1,2021-07-01,626.0,0.0,1.130839e+08
2,2,2021-05-01,680.0,0.0,1.183543e+08
3,3,2021-03-01,734.0,0.0,1.236247e+08
4,4,2021-10-01,676.0,0.0,1.051351e+08
...,...,...,...,...,...
13995,13995,2022-01-22,802.0,0.0,9.537190e+07
13996,13996,2022-02-01,652.0,0.0,9.450790e+07
13997,13997,2021-12-24,765.0,0.0,9.787750e+07
13998,13998,2022-01-30,685.0,0.0,9.468070e+07


### Single Feature using Transaction Frequency

In [261]:
transaction_frequency = (
    trxnDF.groupby('prism_consumer_id')
    .size()  # Count transactions per consumer
    .reset_index(name='transaction_frequency')
)

In [262]:
consDF_merge = consDF.merge(transaction_frequency, on='prism_consumer_id', how='left')

In [263]:
consDF_merge['transaction_frequency'] = consDF_merge['transaction_frequency'].fillna(0)


In [264]:
X = consDF_merge.drop(columns=['DQ_TARGET', 'evaluation_date'])
y = consDF_merge['DQ_TARGET']

# Split the data 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_single = X_train[['transaction_frequency']]
X_test_single = X_test[['transaction_frequency']]


single_feature_model = RandomForestClassifier(random_state=42)
single_feature_model.fit(X_train_single, y_train)

y_pred = single_feature_model.predict(X_test_single)
y_pred_proba = single_feature_model.predict_proba(X_test_single)[:, 1]

accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_proba)
r2 = r2_score(y_test, y_pred)

print(f"Accuracy using transaction frequency: {accuracy:.4f}")
print(f"ROC AUC using transaction frequency: {roc_auc:.4f}")
print(f"R² score using transaction frequency: {r2:.4f}")


Accuracy using transaction frequency: 0.9050
ROC AUC using transaction frequency: 0.4835
R² score using transaction frequency: -0.2002


### Single Feature (Sum of Balance)

In [265]:
# Calculate total balance per consumer
total_balance = (
    acctDF.groupby('prism_consumer_id')['balance']
    .sum()
    .reset_index(name='total_balance')
)

# Merge with consDF
consDF_merge = consDF.merge(total_balance, on='prism_consumer_id', how='left')
consDF_merge['total_balance'] = consDF_merge['total_balance'].fillna(0)


In [266]:
X = consDF_merge.drop(columns=['DQ_TARGET', 'evaluation_date'])
y = consDF_merge['DQ_TARGET']

# Split the data 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_single = X_train[['total_balance']]
X_test_single = X_test[['total_balance']]


single_feature_model = RandomForestClassifier(random_state=42)
single_feature_model.fit(X_train_single, y_train)

y_pred = single_feature_model.predict(X_test_single)
y_pred_proba = single_feature_model.predict_proba(X_test_single)[:, 1]

accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_proba)
r2 = r2_score(y_test, y_pred)

print(f"Accuracy using transaction frequency: {accuracy:.4f}")
print(f"ROC AUC using transaction frequency: {roc_auc:.4f}")
print(f"R² score using transaction frequency: {r2:.4f}")

Accuracy using transaction frequency: 0.8708
ROC AUC using transaction frequency: 0.5995
R² score using transaction frequency: -0.6318


In [267]:
### Single Feature (Credit-To-Debit Ratio)

In [268]:
# Calculate credit and debit counts
credit_debit_ratio = (
    trxnDF.groupby(['prism_consumer_id', 'credit_or_debit'])['amount']
    .count()
    .unstack(fill_value=0)
    .reset_index()
    .rename(columns={'CREDIT': 'credit_count', 'DEBIT': 'debit_count'})
)


credit_debit_ratio['credit_debit_ratio'] = (
    credit_debit_ratio['credit_count'] / (credit_debit_ratio['debit_count'] + 1)  # Avoid division by zero
)


consDF_merge = consDF.merge(credit_debit_ratio[['prism_consumer_id', 'credit_debit_ratio']], on='prism_consumer_id', how='left')
consDF_merge['credit_to_debit_ratio'] = consDF_merge['credit_debit_ratio'].fillna(0)


In [269]:
consDF_merge

Unnamed: 0,prism_consumer_id,evaluation_date,credit_score,DQ_TARGET,days_since_evaluation,credit_debit_ratio,credit_to_debit_ratio
0,0,2021-09-01,726.0,0.0,1.077271e+08,0.102426,0.102426
1,1,2021-07-01,626.0,0.0,1.130839e+08,0.290984,0.290984
2,2,2021-05-01,680.0,0.0,1.183543e+08,0.220109,0.220109
3,3,2021-03-01,734.0,0.0,1.236247e+08,0.230769,0.230769
4,4,2021-10-01,676.0,0.0,1.051351e+08,0.149813,0.149813
...,...,...,...,...,...,...,...
11995,13995,2022-01-22,802.0,0.0,9.537190e+07,2.315789,2.315789
11996,13996,2022-02-01,652.0,0.0,9.450790e+07,0.322581,0.322581
11997,13997,2021-12-24,765.0,0.0,9.787750e+07,3.300000,3.300000
11998,13998,2022-01-30,685.0,0.0,9.468070e+07,0.404651,0.404651


In [270]:
X = consDF_merge.drop(columns=['DQ_TARGET', 'evaluation_date'])
y = consDF_merge['DQ_TARGET']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_single = X_train[['credit_debit_ratio']]
X_test_single = X_test[['credit_debit_ratio']]


single_feature_model = RandomForestClassifier(random_state=42)
single_feature_model.fit(X_train_single, y_train)

y_pred = single_feature_model.predict(X_test_single)
y_pred_proba = single_feature_model.predict_proba(X_test_single)[:, 1]

accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_proba)
r2 = r2_score(y_test, y_pred)

print(f"Accuracy using transaction frequency: {accuracy:.4f}")
print(f"ROC AUC using transaction frequency: {roc_auc:.4f}")
print(f"R² score using transaction frequency: {r2:.4f}")

Accuracy using transaction frequency: 0.8750
ROC AUC using transaction frequency: 0.5505
R² score using transaction frequency: -0.5792


In [5]:
# Merge transaction data with category names
trxnDF = trxnDF.merge(cat_map, left_on='category', right_on='category_id', how='left')
trxnDF['category'] = trxnDF['category_y']
trxnDF = trxnDF.drop(columns=['category_id', 'category_y'])
trxnDF.head()


Unnamed: 0,prism_consumer_id,prism_transaction_id,category_x,amount,credit_or_debit,posted_date,category
0,3023,0,4,0.05,CREDIT,2021-04-16,MISCELLANEOUS
1,3023,1,12,481.56,CREDIT,2021-04-30,LOAN
2,3023,2,4,0.05,CREDIT,2021-05-16,MISCELLANEOUS
3,3023,3,4,0.07,CREDIT,2021-06-16,MISCELLANEOUS
4,3023,4,4,0.06,CREDIT,2021-07-16,MISCELLANEOUS


In [6]:
# Convert dates to datetime format
acctDF['balance_date'] = pd.to_datetime(acctDF['balance_date'])
consDF['evaluation_date'] = pd.to_datetime(consDF['evaluation_date'])
trxnDF['posted_date'] = pd.to_datetime(trxnDF['posted_date'])

In [7]:
# Aggregate transactions by category type per consumer
category_spending = trxnDF.pivot_table(index='prism_consumer_id',
                                       columns='category',
                                       values='amount',
                                       aggfunc='sum',
                                       fill_value=0)
category_spending

category,ACCOUNT_FEES,ATM_CASH,AUTOMOTIVE,AUTO_LOAN,BANKING_CATCH_ALL,BILLS_UTILITIES,BNPL,CHILD_DEPENDENTS,CORPORATE_PAYMENTS,CREDIT_CARD_PAYMENT,...,REFUND,RENT,RISK_CATCH_ALL,RTO_LTO,SELF_TRANSFER,TAX,TIME_OR_STUFF,TRANSPORATION,TRAVEL,UNEMPLOYMENT_BENEFITS
prism_consumer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.00,540.00,527.15,0.00,1980.00,0.00,0.00,0.0,0.0,0.00,...,19.96,0.0,0.0,0.0,2683.77,2603.52,0.0,2.48,108.75,0.0
1,0.00,6999.13,195.18,0.00,0.00,0.00,251.43,0.0,0.0,0.00,...,2.42,0.0,0.0,0.0,18206.00,2325.40,0.0,51.80,0.00,0.0
10,0.00,4112.00,483.06,0.00,0.00,0.00,0.00,0.0,0.0,180.00,...,92.33,103.0,0.0,0.0,3801.10,0.00,0.0,35.04,0.00,0.0
100,0.00,200.00,0.00,0.00,0.00,0.00,0.00,0.0,0.0,16188.17,...,11.75,0.0,0.0,0.0,19702.68,0.00,0.0,0.00,0.00,0.0
1000,0.00,0.00,204.03,0.00,0.00,0.00,0.00,0.0,0.0,14756.05,...,1.37,0.0,0.0,0.0,67142.55,0.00,0.0,0.00,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0.00,1260.18,115.56,672.78,0.00,53.98,167.60,0.0,0.0,512.48,...,0.00,0.0,0.0,0.0,0.00,0.00,0.0,0.00,0.00,0.0
9996,0.00,0.00,75.39,0.00,184.34,0.00,0.00,0.0,0.0,0.00,...,0.00,0.0,0.0,0.0,0.00,0.00,0.0,0.00,27.69,0.0
9997,10.50,1053.25,217.60,962.16,0.00,230.00,729.08,0.0,0.0,829.00,...,335.00,0.0,0.0,0.0,0.00,0.00,0.0,0.00,136.34,0.0
9998,23.35,503.00,185.15,511.72,0.00,0.00,0.00,0.0,0.0,0.00,...,928.63,0.0,0.0,0.0,156.00,0.00,0.0,51.12,0.00,0.0


In [274]:
# Classify income and spending based on 'credit_or_debit' column
trxnDF['income'] = trxnDF['amount'].where(trxnDF['credit_or_debit'] == 'CREDIT', 0)
trxnDF['spending'] = trxnDF['amount'].where(trxnDF['credit_or_debit'] == 'DEBIT', 0)


In [275]:
income_spending = trxnDF.groupby('prism_consumer_id').agg({
    'income': 'sum',
    'spending': 'sum'
}).fillna(0)

In [276]:

income_spending['net_income'] = income_spending['income'] - income_spending['spending']

In [277]:
# Calculate balance changes over time
acctDF.sort_values(by=['prism_consumer_id', 'balance_date'], inplace=True)
acctDF['balance_diff'] = acctDF.groupby('prism_consumer_id')['balance'].diff().fillna(0)
acctDF['days_zero_balance'] = (acctDF['balance'] == 0).astype(int)

In [278]:
# Aggregate balance statistics
balance_features = acctDF.groupby('prism_consumer_id').agg({
    'balance': ['mean', 'std', 'min', 'max'],
    'balance_diff': ['mean', 'std', 'min', 'max'],
    'days_zero_balance': 'sum'
})

In [279]:
# Flatten multi-level column names
balance_features.columns = ['_'.join(col).strip() for col in balance_features.columns]


In [280]:
# Merge all features
final_features = consDF.merge(income_spending, on='prism_consumer_id', how='left')
final_features = final_features.merge(balance_features, on='prism_consumer_id', how='left')
final_features = final_features.merge(category_spending, on='prism_consumer_id', how='left')


In [281]:
final_features.fillna(0, inplace=True)

In [282]:
final_features.head()

Unnamed: 0,prism_consumer_id,evaluation_date,credit_score,DQ_TARGET,days_since_evaluation,income,spending,net_income,balance_mean,balance_std,...,REFUND,RENT,RISK_CATCH_ALL,RTO_LTO,SELF_TRANSFER,TAX,TIME_OR_STUFF,TRANSPORATION,TRAVEL,UNEMPLOYMENT_BENEFITS
0,0,2021-09-01,726.0,0.0,107727100.0,14386.82,14908.41,-521.59,160.185,190.190511,...,19.96,0.0,0.0,0.0,2683.77,2603.52,0.0,2.48,108.75,0.0
1,1,2021-07-01,626.0,0.0,113083900.0,24903.8,23098.37,1805.43,1651.21,2206.130731,...,2.42,0.0,0.0,0.0,18206.0,2325.4,0.0,51.8,0.0,0.0
2,2,2021-05-01,680.0,0.0,118354300.0,22764.71,22334.58,430.13,1402.68,1638.719965,...,56.48,0.0,0.0,0.0,19865.91,6094.48,0.0,24.5,391.5,0.0
3,3,2021-03-01,734.0,0.0,123624700.0,22641.25,19846.01,2795.24,3833.505,4039.96267,...,37.88,0.0,0.0,0.0,19564.38,2044.04,0.0,1286.71,0.0,5700.0
4,4,2021-10-01,676.0,0.0,105135100.0,14966.11,17509.71,-2543.6,197.275,274.845335,...,116.5,0.0,0.0,0.0,5562.27,0.0,0.0,150.0,673.23,12020.0


In [283]:
# Create rolling spending trends
for days in [30, 90, 180]:
    recent_trxns = trxnDF[trxnDF['posted_date'] >= (trxnDF['posted_date'].max() - pd.Timedelta(days=days))]
    trend_features = recent_trxns.groupby('prism_consumer_id').agg({
        'spending': ['sum', 'mean', 'std'],
        'income': ['sum', 'mean', 'std']
    }).fillna(0)
    trend_features.columns = [f"{col[0]}_{days}d_{col[1]}" for col in trend_features.columns]
    consDF = consDF.merge(trend_features, on='prism_consumer_id', how='left')


In [284]:
# Spending Ratios
trxnDF['essential_spending'] = trxnDF['amount'].where(trxnDF['category'].isin(['GROCERIES', 'BILLS_UTILITIES', 'RENT']), 0)
trxnDF['non_essential_spending'] = trxnDF['amount'].where(~trxnDF['category'].isin(['GROCERIES', 'BILLS_UTILITIES', 'RENT']), 0)

spending_ratios = trxnDF.groupby('prism_consumer_id').agg({
    'essential_spending': 'sum',
    'non_essential_spending': 'sum'
}).fillna(0)

spending_ratios['essential_ratio'] = spending_ratios['essential_spending'] / (spending_ratios['essential_spending'] + spending_ratios['non_essential_spending'] + 1e-6)
spending_ratios['non_essential_ratio'] = spending_ratios['non_essential_spending'] / (spending_ratios['essential_spending'] + spending_ratios['non_essential_spending'] + 1e-6)

In [285]:
# High-Value & Small-Dollar Transactions
percentiles = trxnDF.groupby('prism_consumer_id')['amount'].quantile([0.10, 0.90]).unstack()
percentiles.columns = ['small_txn_threshold', 'large_txn_threshold']
trxnDF = trxnDF.merge(percentiles, on='prism_consumer_id', how='left')

trxnDF['high_value_txn'] = (trxnDF['amount'] >= trxnDF['large_txn_threshold']).astype(int)
trxnDF['small_value_txn'] = (trxnDF['amount'] <= trxnDF['small_txn_threshold']).astype(int)

txn_stats = trxnDF.groupby('prism_consumer_id').agg({
    'high_value_txn': 'sum',
    'small_value_txn': 'sum'
}).fillna(0)

In [286]:
# Merge all new features into final dataset
final_features = final_features.merge(spending_ratios, on='prism_consumer_id', how='left')
final_features = final_features.merge(txn_stats, on='prism_consumer_id', how='left')

In [287]:
final_features.fillna(0, inplace=True)

In [288]:
final_features.head()

Unnamed: 0,prism_consumer_id,evaluation_date,credit_score,DQ_TARGET,days_since_evaluation,income,spending,net_income,balance_mean,balance_std,...,TIME_OR_STUFF,TRANSPORATION,TRAVEL,UNEMPLOYMENT_BENEFITS,essential_spending,non_essential_spending,essential_ratio,non_essential_ratio,high_value_txn,small_value_txn
0,0,2021-09-01,726.0,0.0,107727100.0,14386.82,14908.41,-521.59,160.185,190.190511,...,0.0,2.48,108.75,0.0,997.63,28297.6,0.034054,0.965946,41.0,41.0
1,1,2021-07-01,626.0,0.0,113083900.0,24903.8,23098.37,1805.43,1651.21,2206.130731,...,0.0,51.8,0.0,0.0,427.27,47574.9,0.008901,0.991099,39.0,32.0
2,2,2021-05-01,680.0,0.0,118354300.0,22764.71,22334.58,430.13,1402.68,1638.719965,...,0.0,24.5,391.5,0.0,371.36,44727.93,0.008234,0.991766,51.0,45.0
3,3,2021-03-01,734.0,0.0,123624700.0,22641.25,19846.01,2795.24,3833.505,4039.96267,...,0.0,1286.71,0.0,5700.0,166.84,42320.42,0.003927,0.996073,42.0,28.0
4,4,2021-10-01,676.0,0.0,105135100.0,14966.11,17509.71,-2543.6,197.275,274.845335,...,0.0,150.0,673.23,12020.0,2362.87,30112.95,0.072758,0.927242,32.0,31.0


In [289]:
final_features.columns

Index(['prism_consumer_id', 'evaluation_date', 'credit_score', 'DQ_TARGET',
       'days_since_evaluation', 'income', 'spending', 'net_income',
       'balance_mean', 'balance_std', 'balance_min', 'balance_max',
       'balance_diff_mean', 'balance_diff_std', 'balance_diff_min',
       'balance_diff_max', 'days_zero_balance_sum', 'ACCOUNT_FEES', 'ATM_CASH',
       'AUTOMOTIVE', 'AUTO_LOAN', 'BANKING_CATCH_ALL', 'BILLS_UTILITIES',
       'BNPL', 'CHILD_DEPENDENTS', 'CORPORATE_PAYMENTS', 'CREDIT_CARD_PAYMENT',
       'DEBT', 'DEPOSIT', 'EDUCATION', 'ENTERTAINMENT', 'ESSENTIAL_SERVICES',
       'EXTERNAL_TRANSFER', 'FITNESS', 'FOOD_AND_BEVERAGES', 'GAMBLING',
       'GENERAL_MERCHANDISE', 'GIFTS_DONATIONS', 'GOVERNMENT_SERVICES',
       'GROCERIES', 'HEALTHCARE_MEDICAL', 'HOME_IMPROVEMENT', 'INSURANCE',
       'INVESTMENT', 'INVESTMENT_INCOME', 'LEGAL', 'LOAN', 'MISCELLANEOUS',
       'MORTGAGE', 'OTHER_BENEFITS', 'OVERDRAFT', 'PAYCHECK', 'PENSION',
       'PETS', 'REFUND', 'RENT', 'RISK_