# This notebooks adds scores from all models to eval_loan_info

In [1]:
# steps when adding new score from new model:
#     1) load in eval_loan_info_scored.fth
#     2) add model scoring steps into class Model:
#         a) load model
#         b) process base_loan_info in same way model was trained
#     3) score all loans, add column to eval_loan_info
#     4) save over eval_loan_info_scored.fth:
#         a) Maybe make a backup of the previous eval_loan_info_scored.fth?

In [1]:
import sys
import os
# from typing import 
from tqdm import tqdm
sys.path.append(os.path.join(os.path.expanduser('~'), 'projects'))
import j_utils.munging as mg
from lendingclub.lc_utils import gen_datasets

pd.options.display.max_columns = 999
pd.options.display.max_rows = 60
pd.options.display.max_seq_items = None

ppath = os.path.join(os.path.expanduser('~'), 'projects', 'lendingclub', )
dpath = os.path.join(ppath,'data')

# testing
from pandas.testing import assert_frame_equal

In [3]:
base_loan_info = pd.read_feather(os.path.join(dpath, 'base_loan_info.fth'))
eval_loan_info = pd.read_feather(os.path.join(dpath, 'eval_loan_info.fth'))

In [56]:
class Model():
    '''
    Model class loads appropriate model based on name in constructor
    '''
    
    def __init__(self, name: str):
        self.name = name
        self.ppath = ppath
#         self.model_dir = model_dir
        
    def score(self, df: pd.DataFrame):
        '''
        Given a dataframe (base_loan_info, non imputed or scaled or normalized)
        return scores. Imputation, Scaling, and Normalizing will be handled
        inside this method to match that done at training
        '''
        if self.name == 'baseline':
            return np.random.random(len(df))
        
        elif self.name in ['A', 'B', 'C', 'D', 'E', 'F', 'G']:
            scores = np.random.random(len(df))
            mask = np.where(df['grade'] == self.name, 0, 1).astype(bool)
            scores[mask] = 0
            return scores
            
        else:
            print('unknown model??')  

# baselines

In [52]:
for model in ['baseline', 'A', 'B', 'C', 'D', 'E', 'F', 'G']:
    m = Model(model)
    eval_loan_info['{0}_score'.format(model)] = m.score(eval_loan_info)

In [55]:
eval_loan_info.head()

Unnamed: 0,end_d,issue_d,maturity_paid,maturity_time,maturity_time_stat_adj,maturity_paid_stat_adj,rem_to_be_paid,roi_simple,target_loose,target_strict,loan_status,id,grade,int_rate,term,0.05,0.06,0.07,0.08,0.09,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,baseline_score,A_score,B_score,C_score,D_score,E_score,F_score,G_score
0,2011-10-01,2009-08-01,1.0,1.0,1.0,1.0,0.0,1.173214,0,0,paid,54734,B,0.1189,36,0.095902,0.081254,0.066865,0.052729,0.038841,0.025198,0.011792,-0.001379,-0.014321,-0.027038,-0.039534,-0.051815,-0.063884,-0.075745,-0.087403,-0.098861,-0.110124,-0.121195,-0.132078,-0.142776,-0.153294,-0.163634,-0.1738,-0.183796,-0.193624,-0.203289,-0.212792,-0.222138,-0.231329,-0.240368,-0.249259,0.715647,0.0,0.554286,0.0,0.0,0.0,0.0,0.0
1,2010-03-01,2008-07-01,1.0,1.0,1.0,1.0,0.0,1.207769,0,0,paid,55521,F,0.1608,36,0.13769,0.124301,0.111112,0.098121,0.085324,0.072718,0.060299,0.048065,0.036013,0.024139,0.012441,0.000915,-0.010441,-0.02163,-0.032654,-0.043517,-0.054222,-0.06477,-0.075164,-0.085407,-0.095501,-0.105449,-0.115253,-0.124916,-0.134439,-0.143825,-0.153077,-0.162196,-0.171184,-0.180044,-0.188778,0.553308,0.0,0.0,0.0,0.0,0.0,0.055234,0.0
2,2018-06-01,2016-08-01,1.0,0.944444,1.0,1.0,0.0,1.353502,0,0,paid,55716,E,0.2499,36,0.274963,0.259472,0.244227,0.229225,0.214461,0.19993,0.185629,0.171554,0.157699,0.144063,0.13064,0.117426,0.104419,0.091615,0.079009,0.0666,0.054382,0.042352,0.030509,0.018847,0.007364,-0.003943,-0.015077,-0.026041,-0.036838,-0.047472,-0.057944,-0.068258,-0.078416,-0.088421,-0.098276,0.395822,0.0,0.0,0.0,0.0,0.049961,0.0,0.0
3,2011-06-01,2008-05-01,1.0,1.0,1.0,1.0,0.0,1.173648,0,0,paid,55742,B,0.1071,36,0.08777,0.071643,0.055847,0.040373,0.025214,0.010362,-0.00419,-0.018448,-0.03242,-0.046112,-0.059532,-0.072685,-0.085577,-0.098215,-0.110604,-0.122751,-0.13466,-0.146337,-0.157788,-0.169018,-0.180031,-0.190833,-0.201429,-0.211822,-0.222018,-0.232022,-0.241836,-0.251467,-0.260917,-0.270191,-0.279292,0.996632,0.0,0.463809,0.0,0.0,0.0,0.0,0.0
4,2018-04-01,2016-01-01,0.583209,1.0,1.0,1.0,3677.225098,0.643185,1,1,charged_off,56121,A,0.0649,36,-0.385032,-0.390533,-0.395965,-0.40133,-0.406627,-0.411859,-0.417025,-0.422128,-0.427167,-0.432145,-0.437061,-0.441916,-0.446713,-0.45145,-0.45613,-0.460753,-0.46532,-0.469831,-0.474288,-0.478691,-0.483042,-0.48734,-0.491586,-0.495782,-0.499928,-0.504025,-0.508072,-0.512072,-0.516025,-0.519931,-0.523792,0.62124,0.789628,0.0,0.0,0.0,0.0,0.0,0.0


In [54]:
eval_loan_info.to_feather(os.path.join(dpath, 'eval_loan_info_scored.fth'))

# Logistic Regr