## Model Validation

#### Validation Measures:

Accuracy = $\frac{\sum(y_{true} == y_{predicted})}{len(y_{true})}$

Precision = $\frac{tp}{tp+fp}$, of the loans the model flags as "is_bad", what percentage will actually default?

Recall = $\frac{tp}{tp+fn}$, of all the loans that default, what percentage did the model catch?

F1-Score = $\frac{2 * (precision * recall)}{(precision + recall)}$, If precision and recall = 1, then F1-Score will equal 1. Best value = 1, worst value = 0.

In [1]:
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np
import pickle

In [2]:
def validate_metrics(y_true, y_pred):

    accuracy = sum([y1==y2 for y1,y2 in zip(y_true.values, y_pred)])/len(y_true)
    precision = precision_score(y_true=y_true, y_pred=y_pred)
    recall = recall_score(y_true=y_true, y_pred=y_pred)
    f_score  = f1_score(y_true=y_true, y_pred=y_pred)
    
    print('F-Score: %f' % f_score)
    print('Additional Metrics:')
    print('  Accuracy:  %f' % accuracy)
    print('  Precision: %f' % precision)
    print('  Recall:    %f' % recall)
    
    print(sum(y_pred))

In [8]:
model_type = 'logistic_regression_model'
version = '0.1.1556568744'
filepath = 'saved_models/{}/{}/'.format(model_type, version)

### High Level Model Metrics

In [9]:
model_training_data = pickle.load(open(filepath + 'training_data.p', 'rb'))
models_to_validate = dict()

models_to_validate['model_trained_on_validation_data.p'] = {
    'X':model_training_data.X_validate,
    'y':model_training_data.y_validate
}

for fold in model_training_data.folds:
    models_to_validate['model_trained_on_k_{}_data.p'.format(fold)] = {
        'X':model_training_data.folds[fold]['X_test'],
        'y':model_training_data.folds[fold]['y_test']
    }

for model_name in models_to_validate:
    print(model_name)
    model = pickle.load(open(filepath + model_name, 'rb'))
    y_pred = model.predict(models_to_validate[model_name]['X'])    
    validate_metrics(y_true=models_to_validate[model_name]['y'], y_pred=y_pred)
    print()
    print()

model_trained_on_validation_data.p
F-Score: 0.120000
Additional Metrics:
  Accuracy:  0.879369
  Precision: 1.000000
  Recall:    0.063830
12


model_trained_on_k_fold_0_data.p
F-Score: 0.115044
Additional Metrics:
  Accuracy:  0.879081
  Precision: 1.000000
  Recall:    0.061033
13


model_trained_on_k_fold_1_data.p
F-Score: 0.171674
Additional Metrics:
  Accuracy:  0.883243
  Precision: 1.000000
  Recall:    0.093897
20


model_trained_on_k_fold_2_data.p
F-Score: 0.187234
Additional Metrics:
  Accuracy:  0.884383
  Precision: 0.956522
  Recall:    0.103774
23


model_trained_on_k_fold_3_data.p
F-Score: 0.123894
Additional Metrics:
  Accuracy:  0.880145
  Precision: 1.000000
  Recall:    0.066038
14


model_trained_on_k_fold_4_data.p
F-Score: 0.132159
Additional Metrics:
  Accuracy:  0.880751
  Precision: 1.000000
  Recall:    0.070755
15




In [43]:
threshold = 0.24


In [44]:
np.percentile(probabilities, 95)

0.24094457035016792

In [45]:
np.sum(high_prob_defaulters)/len(high_prob_defaulters)

is_bad    0.44186
dtype: float64

In [52]:
thresholds = np.arange(0.1,1,.01)

In [53]:
output = dict()
for thresh in thresholds:
    probabilities = np.asarray([x[1] for x in model.predict_proba(models_to_validate[model_name]['X'])])
    y_true = models_to_validate[model_name]['y']
    high_prob_defaulters = y_true[probabilities > threshold]

array([0.1 , 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 ,
       0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31,
       0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42,
       0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53,
       0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64,
       0.65, 0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75,
       0.76, 0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86,
       0.87, 0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97,
       0.98, 0.99])

In [54]:
probabilities[probabilities > threshold]

array([0.96251531, 0.25725416, 0.27842378, 0.33188173, 0.27180484,
       0.25760511, 0.2549078 , 0.25285176, 0.2758139 , 0.29081967,
       0.2621898 , 0.36336809, 0.91810311, 0.30740736, 0.33693288,
       0.27711423, 0.27307182, 0.88938717, 0.94433075, 0.36541271,
       0.35761518, 0.26622309, 0.27179549, 0.2409633 , 0.3219665 ,
       0.2925082 , 0.2669193 , 0.27607645, 0.35264282, 0.24979251,
       0.34699451, 0.30875087, 0.92160785, 0.24327277, 0.39433446,
       0.94823416, 0.28411678, 0.24278896, 0.26682798, 0.9625978 ,
       0.28834753, 0.34077732, 0.31414546, 0.28768498, 0.29594465,
       0.3369201 , 0.26030251, 0.35347272, 0.24694046, 0.31273212,
       0.25281662, 0.27804374, 0.2629949 , 0.24380482, 0.41627251,
       0.92227667, 0.27216908, 0.26017821, 0.26097922, 0.24092925,
       0.2879563 , 0.2532436 , 0.24488462, 0.35519191, 0.24395085,
       0.24933718, 0.9286176 , 0.90762415, 0.98132369, 0.94607617,
       0.31483285, 0.96881413, 0.29330938, 0.29886625, 0.24054

In [48]:
high_prob_defaulters

Unnamed: 0,is_bad
7205,1
8150,0
4277,0
9295,0
4145,0
3962,1
6412,1
9163,0
6315,0
4100,0
