In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pickle
import sys

import numpy as np
import pandas as pd
import seaborn as sns
# testing
from pandas.testing import assert_frame_equal
from tqdm import tqdm

import j_utils.munging as mg
from lendingclub.lc_utils import gen_datasets
from lendingclub import config
from lendingclub.modeling.models import Model

pd.options.display.max_columns = 999
pd.options.display.max_rows = 60
pd.options.display.max_seq_items = None

# Make the scoring script

In [1]:
%%writefile ../../lendingclub/modeling/09_score.py
import os
import pickle
import sys
import argparse

import numpy as np
import pandas as pd
import seaborn as sns
# testing
from pandas.testing import assert_frame_equal
from tqdm import tqdm

import j_utils.munging as mg
from lendingclub.lc_utils import gen_datasets
from lendingclub import config
from lendingclub.modeling.models import Model

parser = argparse.ArgumentParser()
parser.add_argument('--model', '-m', help='specify model(s) to train')

if not len(sys.argv) > 1:
    models = ['baseline'] # , 'A', 'B', 'C', 'D', 'E', 'F', 'G'

args = parser.parse_args()
if args.model:
    models = args.model.split()    
    
    
# load in relevant dataframes
base_loan_info = pd.read_feather(os.path.join(config.data_dir, 'base_loan_info.fth'))
try:
    eval_loan_info = pd.read_feather(os.path.join(config.data_dir, 'eval_loan_info_scored.fth'))
    print('found an existing eval_loan_info_scored.fth to add scores')
    all_scores = pd.read_feather(os.path.join(config.data_dir, 'all_eval_loan_info_scored.fth'))
    print('found an existing all_eval_loan_info_scored.fth to add scores')
except:
    eval_loan_info = pd.read_feather(os.path.join(config.data_dir, 'eval_loan_info.fth'))
    print('no existing eval_loan_info_scored.fth')
    print('this is the first time adding scores')
    all_scores = pd.read_feather(os.path.join(config.data_dir, 'eval_loan_info.fth'))
    print('no existing all_eval_loan_info_scored.fth')
    print('this is the first time adding scores')
    
# check that loans are all in correct order
assert (base_loan_info['id'] == eval_loan_info['id']).all()

# score relevant dataframes
for model_n in models:
    m = Model(model_n)
    scores = m.score(base_loan_info)
    eval_loan_info['{0}_score'.format(model_n)] = scores
    all_scores['{0}_score'.format(model_n)] = scores
    
print('saving scored dataframe at {0}'.format(os.path.join(config.data_dir,'eval_loan_info_scored.fth')))
eval_loan_info.to_feather(os.path.join(config.data_dir,'eval_loan_info_scored.fth'))
print('saving scored dataframe at {0}'.format(os.path.join(config.data_dir,'all_eval_loan_info_scored.fth')))
eval_loan_info.to_feather(os.path.join(config.data_dir,'eval_loan_info_scored.fth'))

Overwriting ../../lendingclub/modeling/09_score.py


# Notebook

In [3]:
# load in relevant dataframes
base_loan_info = pd.read_feather(os.path.join(config.data_dir, 'base_loan_info.fth'))
try:
    eval_loan_info = pd.read_feather(os.path.join(config.data_dir, 'eval_loan_info_scored.fth'))
    print('found an existing eval_loan_info_scored.fth to add scores')
except:
    eval_loan_info = pd.read_feather(os.path.join(config.data_dir, 'eval_loan_info.fth'))
    print('no existing eval_loan_info_scored.fth')
    print('this is the first time adding scores')

no existing eval_loan_info_scored.fth
this is the first time adding scores


In [7]:
# check that loans are all in correct order
assert (base_loan_info['id'] == eval_loan_info['id']).all()

# score relevant dataframes
models = ['baseline', 'A', 'B', 'C', 'D', 'E', 'F', 'G']
for model_n in models:
    m = Model(model_n)
    eval_loan_info['{0}_score'.format(model_n)] = m.score(base_loan_info)
    
print('saving scored dataframe at {0}'.format(os.path.join(config.data_dir,'eval_loan_info_scored.fth')))
eval_loan_info.to_feather(os.path.join(config.data_dir,'eval_loan_info_scored.fth'))

saving scored dataframe at /home/justin/projects/lendingclub/data/eval_loan_info_scored.fth


In [5]:
eval_loan_info.head()

Unnamed: 0,end_d,issue_d,maturity_paid,maturity_time,maturity_time_stat_adj,maturity_paid_stat_adj,rem_to_be_paid,roi_simple,target_loose,target_strict,loan_status,id,grade,0.05,0.06,0.07,0.08,0.09,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,baseline_score,A_score,B_score,C_score,D_score,E_score,F_score,G_score
0,2012-11-01,2009-11-01,1.0,1.0,1.0,1.0,0.0,1.222941,0,0,paid,57416,C,0.135866,0.118989,0.102459,0.086267,0.070405,0.054865,0.03964,0.024723,0.010105,-0.004219,-0.018257,-0.032016,-0.045502,-0.058721,-0.071679,-0.084383,-0.096838,-0.10905,-0.121025,-0.132768,-0.144285,-0.15558,-0.166658,-0.177525,-0.188186,-0.198644,-0.208904,-0.218972,-0.228851,-0.238545,-0.248059,0.37454,0.0,0.0,0.37454,0.0,0.0,0.0,0.0
1,2009-11-01,2007-11-01,0.509957,1.0,1.0,1.0,6498.933594,0.645634,1,1,charged_off,145926,E,-0.380326,-0.385478,-0.390568,-0.395598,-0.400569,-0.40548,-0.410333,-0.415129,-0.419869,-0.424553,-0.429182,-0.433757,-0.438279,-0.442748,-0.447165,-0.451532,-0.455848,-0.460114,-0.464332,-0.468501,-0.472623,-0.476698,-0.480726,-0.484709,-0.488647,-0.49254,-0.49639,-0.500196,-0.50396,-0.507682,-0.511362,0.950714,0.0,0.0,0.0,0.0,0.950714,0.0,0.0
2,2010-08-01,2007-11-01,1.0,1.0,1.0,1.0,0.0,1.204435,0,1,paid,147125,D,0.116842,0.100379,0.08425,0.068446,0.05296,0.037784,0.022912,0.008335,-0.005952,-0.019956,-0.033685,-0.047143,-0.060338,-0.073275,-0.08596,-0.098399,-0.110597,-0.122561,-0.134294,-0.145803,-0.157093,-0.168167,-0.179032,-0.189692,-0.200151,-0.210413,-0.220484,-0.230368,-0.240067,-0.249588,-0.258933,0.731994,0.0,0.0,0.0,0.731994,0.0,0.0,0.0
3,2010-12-01,2007-12-01,1.0,1.0,1.0,1.0,0.0,1.123967,0,0,paid,166683,A,0.041716,0.026271,0.011143,-0.003676,-0.018194,-0.032417,-0.046353,-0.060008,-0.073389,-0.086502,-0.099354,-0.11195,-0.124297,-0.1364,-0.148265,-0.159897,-0.171302,-0.182486,-0.193452,-0.204207,-0.214754,-0.225099,-0.235246,-0.245199,-0.254964,-0.264544,-0.273943,-0.283166,-0.292216,-0.301097,-0.309814,0.598658,0.598658,0.0,0.0,0.0,0.0,0.0,0.0
4,2010-02-01,2007-12-01,1.0,1.0,1.0,1.0,0.0,1.141801,0,0,paid,174377,B,0.06556,0.051043,0.036788,0.02279,0.009042,-0.00446,-0.017721,-0.030746,-0.043539,-0.056106,-0.068451,-0.080578,-0.092492,-0.104196,-0.115696,-0.126996,-0.138098,-0.149008,-0.159728,-0.170264,-0.180617,-0.190793,-0.200794,-0.210624,-0.220286,-0.229784,-0.239121,-0.248299,-0.257323,-0.266194,-0.274917,0.156019,0.0,0.156019,0.0,0.0,0.0,0.0,0.0


In [13]:
# delete eval_loan_info_scored.fth and check that scores
# match below

In [12]:
eval_loan_info.head()

Unnamed: 0,end_d,issue_d,maturity_paid,maturity_time,maturity_time_stat_adj,maturity_paid_stat_adj,rem_to_be_paid,roi_simple,target_loose,target_strict,loan_status,id,grade,0.05,0.06,0.07,0.08,0.09,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.2,0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,baseline_score,A_score,B_score,C_score,D_score,E_score,F_score,G_score
0,2012-11-01,2009-11-01,1.0,1.0,1.0,1.0,0.0,1.222941,0,0,paid,57416,C,0.135866,0.118989,0.102459,0.086267,0.070405,0.054865,0.03964,0.024723,0.010105,-0.004219,-0.018257,-0.032016,-0.045502,-0.058721,-0.071679,-0.084383,-0.096838,-0.10905,-0.121025,-0.132768,-0.144285,-0.15558,-0.166658,-0.177525,-0.188186,-0.198644,-0.208904,-0.218972,-0.228851,-0.238545,-0.248059,0.37454,0.0,0.0,0.37454,0.0,0.0,0.0,0.0
1,2009-11-01,2007-11-01,0.509957,1.0,1.0,1.0,6498.933594,0.645634,1,1,charged_off,145926,E,-0.380326,-0.385478,-0.390568,-0.395598,-0.400569,-0.40548,-0.410333,-0.415129,-0.419869,-0.424553,-0.429182,-0.433757,-0.438279,-0.442748,-0.447165,-0.451532,-0.455848,-0.460114,-0.464332,-0.468501,-0.472623,-0.476698,-0.480726,-0.484709,-0.488647,-0.49254,-0.49639,-0.500196,-0.50396,-0.507682,-0.511362,0.950714,0.0,0.0,0.0,0.0,0.950714,0.0,0.0
2,2010-08-01,2007-11-01,1.0,1.0,1.0,1.0,0.0,1.204435,0,1,paid,147125,D,0.116842,0.100379,0.08425,0.068446,0.05296,0.037784,0.022912,0.008335,-0.005952,-0.019956,-0.033685,-0.047143,-0.060338,-0.073275,-0.08596,-0.098399,-0.110597,-0.122561,-0.134294,-0.145803,-0.157093,-0.168167,-0.179032,-0.189692,-0.200151,-0.210413,-0.220484,-0.230368,-0.240067,-0.249588,-0.258933,0.731994,0.0,0.0,0.0,0.731994,0.0,0.0,0.0
3,2010-12-01,2007-12-01,1.0,1.0,1.0,1.0,0.0,1.123967,0,0,paid,166683,A,0.041716,0.026271,0.011143,-0.003676,-0.018194,-0.032417,-0.046353,-0.060008,-0.073389,-0.086502,-0.099354,-0.11195,-0.124297,-0.1364,-0.148265,-0.159897,-0.171302,-0.182486,-0.193452,-0.204207,-0.214754,-0.225099,-0.235246,-0.245199,-0.254964,-0.264544,-0.273943,-0.283166,-0.292216,-0.301097,-0.309814,0.598658,0.598658,0.0,0.0,0.0,0.0,0.0,0.0
4,2010-02-01,2007-12-01,1.0,1.0,1.0,1.0,0.0,1.141801,0,0,paid,174377,B,0.06556,0.051043,0.036788,0.02279,0.009042,-0.00446,-0.017721,-0.030746,-0.043539,-0.056106,-0.068451,-0.080578,-0.092492,-0.104196,-0.115696,-0.126996,-0.138098,-0.149008,-0.159728,-0.170264,-0.180617,-0.190793,-0.200794,-0.210624,-0.220286,-0.229784,-0.239121,-0.248299,-0.257323,-0.266194,-0.274917,0.156019,0.0,0.156019,0.0,0.0,0.0,0.0,0.0


# To delete

In [23]:
from IPython.display import FileLink, FileLinks
FileLinks('.') #lists all downloadable files on server

In [24]:
ls

[0m[01;34mcatboost_modeling[0m/         make_train_script.ipynb
[01;34mlogistic_regr_modeling[0m/    model_load_and_score.ipynb
make_eval_script.ipynb     testing_all_modeling_scripts.ipynb
make_scoring_script.ipynb


In [25]:
pwd

'/home/justin/projects/lendingclub/notebooks/modeling'

In [30]:
FileLink('../../../../horizan_zip.zip')