## Взаимодействие с моделью через API

In [29]:
import numpy as np
import pandas as pd
from urllib import request # , parse
import json

from sklearn.metrics import precision_recall_curve, \
                            roc_auc_score, \
                            log_loss, \
                            confusion_matrix

In [17]:
X_valid = pd.read_csv("./data/X_valid.csv")
y_valid = pd.read_csv("./data/y_valid.csv")

In [18]:
X_valid[:1]

Unnamed: 0,Home Ownership,Annual Income,Years in current job,Tax Liens,Number of Open Accounts,Years of Credit History,Maximum Open Credit,Number of Credit Problems,Months since last delinquent,Bankruptcies,Purpose,Term,Current Loan Amount,Current Credit Balance,Monthly Debt,Credit Score
0,Home Mortgage,2396375.0,6 years,0.0,9.0,10.5,335280.0,1.0,8.0,1.0,debt consolidation,Short Term,621544.0,139859.0,33350.0,7130.0


In [19]:
total_columns_list = ['Maximum Open Credit', 'Annual Income', 'Current Loan Amount', 'Current Credit Balance',
                      'Monthly Debt', 'Credit Score', 'Home Ownership', 'Years in current job', 'Purpose', 'Term',
                      'Tax Liens', 'Number of Open Accounts', 'Bankruptcies', 'Months since last delinquent',
                      'Years of Credit History', 'Number of Credit Problems']

In [20]:
def print_my(body):
    print(body)

In [21]:
# [print_my(x) for x in X_valid[total_columns_list].head(2).T.to_dict('dict').values()]
[print_my(x) for x in X_valid.head(2).T.to_dict('dict').values()]

{'Home Ownership': 'Home Mortgage', 'Annual Income': 2396375.0, 'Years in current job': '6 years', 'Tax Liens': 0.0, 'Number of Open Accounts': 9.0, 'Years of Credit History': 10.5, 'Maximum Open Credit': 335280.0, 'Number of Credit Problems': 1.0, 'Months since last delinquent': 8.0, 'Bankruptcies': 1.0, 'Purpose': 'debt consolidation', 'Term': 'Short Term', 'Current Loan Amount': 621544.0, 'Current Credit Balance': 139859.0, 'Monthly Debt': 33350.0, 'Credit Score': 7130.0}
{'Home Ownership': 'Home Mortgage', 'Annual Income': 1364854.7169779572, 'Years in current job': '5 years', 'Tax Liens': 0.0, 'Number of Open Accounts': 8.0, 'Years of Credit History': 14.0, 'Maximum Open Credit': 883014.0, 'Number of Credit Problems': 0.0, 'Months since last delinquent': 33.52142363510712, 'Bankruptcies': 0.0, 'Purpose': 'debt consolidation', 'Term': 'Short Term', 'Current Loan Amount': 157256.0, 'Current Credit Balance': 183027.0, 'Monthly Debt': 3492.0, 'Credit Score': 850.0}


[None, None]

In [14]:
def get_prediction_my(body):
    myurl = "http://0.0.0.0:8180/predict"
    req = request.Request(myurl)
    req.add_header('Content-Type', 'application/json; charset=utf-8')
    jsondata = json.dumps(body)
    jsondataasbytes = jsondata.encode('utf-8')   # needs to be bytes
    req.add_header('Content-Length', len(jsondataasbytes))
#     print (jsondataasbytes)
    response = request.urlopen(req, jsondataasbytes)
    return json.loads(response.read())['predictions']

In [25]:
%%time

# predictions = [get_prediction_my(x) for x in X_valid[total_columns_list].head(1).T.to_dict('dict').values()]   # .head(500)
predictions = [get_prediction_my(x) for x in X_valid.head(100).T.to_dict('dict').values()]

CPU times: user 144 ms, sys: 39.4 ms, total: 184 ms
Wall time: 29.3 s


## Рассчитаем метрики

In [26]:
predictions[:10]

[0.9595445248340846,
 0.05040759870858322,
 0.27339089035583736,
 0.24736002982126318,
 0.012111689312096193,
 0.15428072327212866,
 0.09988118447813049,
 0.3296996149949156,
 0.3248313692110695,
 0.946386121937758]

In [34]:
precision, recall, thresholds = precision_recall_curve(y_valid[:100], predictions)
f_score = (2 * precision * recall) / (precision + recall)
# locate the index of the largest f score
ix = np.argmax(f_score)
print('Best Threshold=%f, F-Score=%.3f, Precision=%.3f, Recall=%.3f' % (thresholds[ix], 
                                                                        f_score[ix],
                                                                        precision[ix],
                                                                        recall[ix]))
r_auc = roc_auc_score(y_true=y_valid[:100], y_score=predictions)
l_los = log_loss(y_true=y_valid[:100], y_pred=predictions)

print("roc auc score: {:.4f}".format(r_auc))
print("log loss score: {:.4f}".format(l_los))

cnf_matrix = confusion_matrix(y_valid[:100], predictions>thresholds[ix])

TN = cnf_matrix[0][0]
FN = cnf_matrix[1][0]
TP = cnf_matrix[1][1]
FP = cnf_matrix[0][1]

TPR = TP/(TP+FN)
FPR = FP/(FP+TN)
TNR = TN/(FP+TN)

print(f"TN: {TN} FN: {FN} TP: {TP} FP: {FP}")
print(f"TPR: {TPR:.4f} FPR: {FPR:.4f} TNR: {TNR:.4f}")

Best Threshold=0.299733, F-Score=0.557, Precision=0.515, Recall=0.607
roc auc score: 0.7158
log loss score: 0.5619
TN: 56 FN: 12 TP: 16 FP: 16
TPR: 0.5714 FPR: 0.2222 TNR: 0.7778
