In [1]:
# The purpose of this notebook is to determine what minimum non-def rate to use as a cutoff
# as well as determine which top% of remaining and how exactly to choose?
# e.g. something like, prob of not def >= 95% and then of those passing, choose top 5%?
# Or should I also choose a hard cutoff with the regr as well?
# Those investigations are in this notebook

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
import numpy as np
import pandas as pd
# from pandas.testing import assert_frame_equal
from catboost import CatBoostClassifier, CatBoostRegressor
from joblib import load
import pickle

import j_utils.munging as mg
from lendingclub import config
from lendingclub.modeling.models import Model

pd.set_option('max_columns', 999)

ppath = config.prj_dir
dpath = config.data_dir

In [4]:
all_scores = pd.read_feather(os.path.join(config.data_dir, 'all_eval_loan_info_scored.fth'))
base_loan_info = pd.read_feather(os.path.join(config.data_dir, 'base_loan_info.fth'))
print(all_scores.shape, base_loan_info.shape)

(2507335, 46) (2507335, 90)


In [5]:
all_scores.head()

Unnamed: 0,end_d,issue_d,maturity_paid,maturity_time,maturity_time_stat_adj,maturity_paid_stat_adj,rem_to_be_paid,roi_simple,target_loose,target_strict,loan_status,id,grade,term,0.05,0.06,0.07,0.08,0.09,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,catboost_both_score
0,2011-10-01,2009-08-01,1.0,1.0,1.0,1.0,0.0,1.173214,0,0,paid,54734,B,36,0.095903,0.081254,0.066865,0.052729,0.038842,0.025198,0.011793,-0.001379,-0.01432,-0.027038,-0.039534,-0.051815,-0.063884,-0.075745,-0.087403,-0.098861,-0.110124,-0.121195,-0.132078,-0.142776,-0.153293,-0.163634,-0.1738,-0.183796,-0.193624,-0.203289,-0.212792,-0.222138,-0.231329,-0.240368,-0.249259,-0.010776
1,2010-03-01,2008-07-01,1.0,1.0,1.0,1.0,0.0,1.207769,0,0,paid,55521,F,36,0.13769,0.124301,0.111112,0.098121,0.085324,0.072718,0.060299,0.048065,0.036013,0.024139,0.012441,0.000915,-0.010441,-0.02163,-0.032654,-0.043517,-0.054222,-0.064769,-0.075164,-0.085407,-0.095501,-0.105449,-0.115253,-0.124916,-0.134439,-0.143825,-0.153077,-0.162196,-0.171184,-0.180044,-0.188778,-0.022282
2,2018-06-01,2016-08-01,1.0,1.0,1.0,1.0,0.0,1.353502,0,0,paid,55716,E,36,0.274963,0.259472,0.244228,0.229225,0.214461,0.199931,0.185629,0.171554,0.1577,0.144063,0.13064,0.117427,0.10442,0.091615,0.07901,0.0666,0.054382,0.042353,0.030509,0.018847,0.007365,-0.003942,-0.015076,-0.026041,-0.036838,-0.047471,-0.057944,-0.068257,-0.078416,-0.088421,-0.098276,0.11653
3,2011-06-01,2008-05-01,1.0,1.0,1.0,1.0,0.0,1.173648,0,0,paid,55742,B,36,0.08777,0.071643,0.055847,0.040373,0.025214,0.010362,-0.004189,-0.018448,-0.03242,-0.046112,-0.059532,-0.072685,-0.085577,-0.098215,-0.110604,-0.122751,-0.13466,-0.146337,-0.157788,-0.169018,-0.180031,-0.190833,-0.201429,-0.211822,-0.222018,-0.232021,-0.241836,-0.251467,-0.260917,-0.27019,-0.279292,0.007863
4,2018-04-01,2016-01-01,0.583209,1.0,1.0,1.0,3677.225098,0.643185,1,1,charged_off,56121,A,36,-0.385032,-0.390533,-0.395965,-0.40133,-0.406627,-0.411859,-0.417025,-0.422128,-0.427167,-0.432145,-0.437061,-0.441916,-0.446713,-0.45145,-0.45613,-0.460753,-0.46532,-0.469831,-0.474288,-0.478691,-0.483042,-0.48734,-0.491586,-0.495782,-0.499928,-0.504025,-0.508072,-0.512072,-0.516025,-0.519931,-0.523792,-0.050554


In [6]:
# load in clf and regr models to see their individual scores
clf = Model('catboost_clf')
regr = Model('catboost_regr')

In [7]:
all_scores['catboost_regr'] = regr.score(base_loan_info)
all_scores['catboost_clf'] = clf.score(base_loan_info)

In [28]:
regr_scores = regr.score(base_loan_info)
clf_scores = clf.score(base_loan_info)

In [36]:
mask = clf_scores < .95

In [40]:
regr_scores[mask] = 0
regr_scores

array([ 0.        ,  0.        ,  0.        , ...,  0.        ,
       -0.46670904, -0.48192983])

In [42]:
clf_scores

array([0.94264304, 0.92153312, 0.90987156, ..., 0.92372481, 0.97333052,
       0.97890425])

In [43]:
mask

array([ True,  True,  True, ...,  True, False, False])

In [10]:
all_scores.head()

Unnamed: 0,end_d,issue_d,maturity_paid,maturity_time,maturity_time_stat_adj,maturity_paid_stat_adj,rem_to_be_paid,roi_simple,target_loose,target_strict,loan_status,id,grade,term,0.05,0.06,0.07,0.08,0.09,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,catboost_both_score,catboost_regr,catboost_clf
0,2011-10-01,2009-08-01,1.0,1.0,1.0,1.0,0.0,1.173214,0,0,paid,54734,B,36,0.095903,0.081254,0.066865,0.052729,0.038842,0.025198,0.011793,-0.001379,-0.01432,-0.027038,-0.039534,-0.051815,-0.063884,-0.075745,-0.087403,-0.098861,-0.110124,-0.121195,-0.132078,-0.142776,-0.153293,-0.163634,-0.1738,-0.183796,-0.193624,-0.203289,-0.212792,-0.222138,-0.231329,-0.240368,-0.249259,-0.010776,-0.010776,0.951939
1,2010-03-01,2008-07-01,1.0,1.0,1.0,1.0,0.0,1.207769,0,0,paid,55521,F,36,0.13769,0.124301,0.111112,0.098121,0.085324,0.072718,0.060299,0.048065,0.036013,0.024139,0.012441,0.000915,-0.010441,-0.02163,-0.032654,-0.043517,-0.054222,-0.064769,-0.075164,-0.085407,-0.095501,-0.105449,-0.115253,-0.124916,-0.134439,-0.143825,-0.153077,-0.162196,-0.171184,-0.180044,-0.188778,-0.022282,-0.022282,0.918151
2,2018-06-01,2016-08-01,1.0,1.0,1.0,1.0,0.0,1.353502,0,0,paid,55716,E,36,0.274963,0.259472,0.244228,0.229225,0.214461,0.199931,0.185629,0.171554,0.1577,0.144063,0.13064,0.117427,0.10442,0.091615,0.07901,0.0666,0.054382,0.042353,0.030509,0.018847,0.007365,-0.003942,-0.015076,-0.026041,-0.036838,-0.047471,-0.057944,-0.068257,-0.078416,-0.088421,-0.098276,0.11653,0.11653,0.898378
3,2011-06-01,2008-05-01,1.0,1.0,1.0,1.0,0.0,1.173648,0,0,paid,55742,B,36,0.08777,0.071643,0.055847,0.040373,0.025214,0.010362,-0.004189,-0.018448,-0.03242,-0.046112,-0.059532,-0.072685,-0.085577,-0.098215,-0.110604,-0.122751,-0.13466,-0.146337,-0.157788,-0.169018,-0.180031,-0.190833,-0.201429,-0.211822,-0.222018,-0.232021,-0.241836,-0.251467,-0.260917,-0.27019,-0.279292,0.007863,0.007863,0.932767
4,2018-04-01,2016-01-01,0.583209,1.0,1.0,1.0,3677.225098,0.643185,1,1,charged_off,56121,A,36,-0.385032,-0.390533,-0.395965,-0.40133,-0.406627,-0.411859,-0.417025,-0.422128,-0.427167,-0.432145,-0.437061,-0.441916,-0.446713,-0.45145,-0.45613,-0.460753,-0.46532,-0.469831,-0.474288,-0.478691,-0.483042,-0.48734,-0.491586,-0.495782,-0.499928,-0.504025,-0.508072,-0.512072,-0.516025,-0.519931,-0.523792,-0.050554,-0.050554,0.959719
