In [1]:
import pandas as pd
import numpy as np
import random
import warnings
from datetime import datetime

warnings.filterwarnings("ignore")
random.seed(51)
np.random.seed(51)

In [2]:
def conv(string):
    return float(string.replace("%", ""))/100

test_loan_list = pd.read_csv("../data/test/X_test_y_pred.csv", 
                        converters = {'int_rate':conv},
                        dtype={'Unnamed: 0.1':'string', 'loan_amnt':'float'})
test_loan_list = test_loan_list.rename(columns={'Unnamed: 0.1':'loan_id', 'score':'default_prob'})
test_loan_list['loan_status'] = np.where(test_loan_list['loan_status'] == 'Fully Paid', 1, 0)

opt_portfolio_list = pd.read_csv("..//data//expected_return_porfolios.csv",
                            dtype={'id':'string', 'max_qtty':'string'})
opt_portfolio_id = opt_portfolio_list[['max_qtty', 'grade']].drop_duplicates(ignore_index=True)
opt_portfolio_id['portfolio_id'] = opt_portfolio_id.index+1
opt_portfolio_list = opt_portfolio_list.merge(opt_portfolio_id, left_on=['max_qtty', 'grade'], right_on=['max_qtty', 'grade'])
opt_portfolio_list = opt_portfolio_list.rename(columns={'id':'loan_id','grade':'port_grades'})

In [3]:
opt_port_id_sample = random.sample(list(range(1,len(opt_portfolio_id)+1)), round(len(opt_portfolio_id)))

opt_portfolio_sample = opt_portfolio_list[opt_portfolio_list['portfolio_id'].isin(opt_port_id_sample)]
opt_portfolio_sample = opt_portfolio_sample.merge(test_loan_list, left_on=['loan_id'], right_on=['loan_id'])

opt_portfolio_sample['exp_payment'] = opt_portfolio_sample['loan_amnt'] * pow(1 + opt_portfolio_sample['int_rate']/365, 3*365)
del opt_portfolio_sample['int_rate']

port_amnt_dict = opt_portfolio_sample.groupby("portfolio_id").agg({"funded_amnt": "sum"}).to_dict()['funded_amnt']

In [4]:
opt_portfolio_sample["port_amount"] = opt_portfolio_sample["portfolio_id"].apply(lambda x: port_amnt_dict.get(x))
opt_portfolio_sample2 = opt_portfolio_sample[['loan_id', "portfolio_id", "port_amount", "max_qtty", "port_grades", "exp_payment", "loan_status", "funded_amnt_inv"]]

In [5]:
def diff_month(d1, d2):
    return (d1.year - d2.year) * 12 + d1.month - d2.month

In [6]:
test_loan_list2 = test_loan_list[['loan_id', "total_pymnt_inv", "funded_amnt_inv", "issue_d", "last_pymnt_d", "int_rate", "loan_status"]]
test_loan_list2['issue_d'] = test_loan_list2['issue_d'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))

test_loan_list2 = test_loan_list2[test_loan_list2["last_pymnt_d"].apply(lambda x: isinstance(x, float)) == False]
test_loan_list2['last_pymnt_d'] = test_loan_list2['last_pymnt_d'].apply(lambda x: datetime.strptime(x, "%b-%Y"))

test_loan_list2['months_elapsed'] = test_loan_list2[['issue_d', 'last_pymnt_d']].apply(lambda x: diff_month(x["last_pymnt_d"], x["issue_d"]), axis = 1)

test_loan_list2['default_count'] = np.where(test_loan_list2['loan_status'] == 1, 0, 1)

In [7]:
opt_portfolio_sample_actual = pd.merge(opt_portfolio_sample2, test_loan_list2, how = 'left', left_on = 'loan_id', right_on = 'loan_id')
opt_portfolio_sample_actual = opt_portfolio_sample_actual.sort_values("portfolio_id")

In [8]:
grp_opt_portfolio_sample = opt_portfolio_sample_actual.groupby(['portfolio_id', 'port_amount', 'max_qtty', 'port_grades']).agg(actual_payment = ('total_pymnt_inv', 'sum'),
                                                                                                                               expected_payment = ('exp_payment', 'sum'),
                                                                                                                               port_avg_int_rate = ('int_rate', 'mean'),
                                                                                                                               avg_months_elapsed = ('months_elapsed', 'mean'),
                                                                                                                               default_count = ('default_count', 'sum')).round(2).reset_index()

grp_opt_portfolio_sample['total_return'] = round((grp_opt_portfolio_sample['actual_payment'] - grp_opt_portfolio_sample['port_amount']) / grp_opt_portfolio_sample['port_amount'], 2)
grp_opt_portfolio_sample['return_diff_actual_to_expected'] = grp_opt_portfolio_sample['total_return'] - grp_opt_portfolio_sample['port_avg_int_rate']

grp_opt_portfolio_sample['annualized_return'] = round((grp_opt_portfolio_sample['total_return'] * (12/grp_opt_portfolio_sample['avg_months_elapsed'])).apply(lambda x: max(-1, x)), 2)

In [9]:
all_temp_rand_ports = pd.DataFrame()

for i in range(0, len(grp_opt_portfolio_sample)):
    temp_port_ir = grp_opt_portfolio_sample.iloc[i]['port_avg_int_rate']
    portfolio_id = grp_opt_portfolio_sample.iloc[i]['portfolio_id']
    
    temp_rand = test_loan_list2[(test_loan_list2['int_rate'] >= temp_port_ir - 0.03) & (test_loan_list2['int_rate'] <= temp_port_ir + 0.03) ]
    temp_rand_port = temp_rand.sample(n = 5)
    temp_rand_port['rand_port_amt'] = temp_rand_port['funded_amnt_inv'].sum()
    temp_rand_port['rand_exp_payment'] = temp_rand_port['funded_amnt_inv'] * pow(1 + temp_rand_port['int_rate']/365, 3*365)
    temp_rand_port['portfolio_id'] = portfolio_id
    
    all_temp_rand_ports = all_temp_rand_ports.append(temp_rand_port)

In [10]:
grp_rand_portfolio = all_temp_rand_ports.groupby(['portfolio_id', 'rand_port_amt']).agg(rand_actual_payment = ('total_pymnt_inv', 'sum'),
                                                                                        rand_expected_payment = ('rand_exp_payment', 'sum'),
                                                                                        rand_avg_int_rate = ('int_rate', 'mean'),
                                                                                        rand_avg_months_elapsed = ('months_elapsed', 'mean'),
                                                                                        rand_default_count = ('default_count', 'sum')).round(2).reset_index()

grp_rand_portfolio['rand_total_return'] = round((grp_rand_portfolio['rand_actual_payment'] - grp_rand_portfolio['rand_port_amt']) / grp_rand_portfolio['rand_port_amt'], 2)
grp_rand_portfolio['rand_return_diff'] = grp_rand_portfolio['rand_total_return'] - grp_rand_portfolio['rand_avg_int_rate']

grp_rand_portfolio['rand_annualized_return'] = round((grp_rand_portfolio['rand_total_return'] * (12/grp_rand_portfolio['rand_avg_months_elapsed'])).apply(lambda x: max(-1, x)), 2)

In [11]:
combined = pd.merge(grp_opt_portfolio_sample, grp_rand_portfolio, how = 'left', left_on = 'portfolio_id', right_on = 'portfolio_id')
combined['Better Portfolio'] = np.where(combined['annualized_return'] < combined['rand_annualized_return'], "Random", "Optimized")

In [12]:
opt_avg_return = str(round(grp_opt_portfolio_sample['total_return'].mean()*100, 2)) + "%"
opt_avg_months = str(round(grp_opt_portfolio_sample['avg_months_elapsed'].mean(), 2))
opt_num_default = str(int(grp_opt_portfolio_sample['default_count'].sum()))
opt_avg_annual_return = str(round(grp_opt_portfolio_sample['annualized_return'].mean()*100, 2)) + "%"
opt_best_port = str(grp_opt_portfolio_sample['annualized_return'].iloc[grp_opt_portfolio_sample['annualized_return'].argmax()]*100) + "%" 
opt_worst_port = str(grp_opt_portfolio_sample['annualized_return'].iloc[grp_opt_portfolio_sample['annualized_return'].argmin()]*100) + "%"
opt_winning = combined.groupby("Better Portfolio").count()['portfolio_id']['Optimized']
opt_sharpe = str(round(grp_opt_portfolio_sample['annualized_return'].mean() / grp_opt_portfolio_sample['annualized_return'].std(), 2))

In [13]:
rand_avg_return = str(round(grp_rand_portfolio['rand_total_return'].mean()*100, 2)) + "%"
rand_avg_months = str(round(grp_rand_portfolio['rand_avg_months_elapsed'].mean(), 2))
rand_num_default = str(int(grp_rand_portfolio['rand_default_count'].sum()))
rand_avg_annual_return = str(round(grp_rand_portfolio['rand_annualized_return'].mean()*100, 2)) + "%"
rand_best_port = str(grp_rand_portfolio['rand_annualized_return'].iloc[grp_rand_portfolio['rand_annualized_return'].argmax()]*100) + "%" 
rand_worst_port = str(grp_rand_portfolio['rand_annualized_return'].iloc[grp_rand_portfolio['rand_annualized_return'].argmin()]*100) + "%" 
rand_winning = combined.groupby("Better Portfolio").count()['portfolio_id']['Random']
rand_sharpe = str(round(grp_rand_portfolio['rand_annualized_return'].mean() / grp_rand_portfolio['rand_annualized_return'].std(), 2))

In [14]:
compare_res = pd.DataFrame()
compare_res['Average Return'] = [opt_avg_return, rand_avg_return]
compare_res['Average Months to Paid/Default'] = [opt_avg_months, rand_avg_months]
compare_res['# of Defaulted Loans'] = [opt_num_default, rand_num_default]
compare_res['Average Annualized Return'] = [opt_avg_annual_return, rand_avg_annual_return]
compare_res['Best Portfolio (Annualized)'] = [opt_best_port, rand_best_port]
compare_res['Worst Portfolio (Annualized)'] = [opt_worst_port, rand_worst_port]
compare_res['Outperforming Portfolio Count'] = [opt_winning, rand_winning]
compare_res['Sharpe Ratio'] = [opt_sharpe, rand_sharpe]
compare_res.index = ['Optimized', 'Random']
compare_res.T

Unnamed: 0,Optimized,Random
Average Return,3.75%,-3.07%
Average Months to Paid/Default,2.54,2.54
# of Defaulted Loans,0,18
Average Annualized Return,18.35%,-7.45%
Best Portfolio (Annualized),38.0%,30.0%
Worst Portfolio (Annualized),8.0%,-100.0%
Outperforming Portfolio Count,44,16
Sharpe Ratio,2.7,-0.17
