## Testing the model

In [5]:
import pickle

import pandas as pd

In [2]:
# See https://github.com/DataTalksClub/machine-learning-zoomcamp/blob/master/05-deployment/02-pickle.md

with open("model1.bin", "rb") as f_in: # very important to use 'rb' here, it means read-binary 
    model = pickle.load(f_in)

with open("dv.bin", "rb") as f_in: # very important to use 'rb' here, it means read-binary 
    dict_vectorizer = pickle.load(f_in)

In [4]:
type(dict_vectorizer), type(model)

(sklearn.feature_extraction._dict_vectorizer.DictVectorizer,
 sklearn.linear_model._logistic.LogisticRegression)

In [10]:
bank_df = pd.read_csv("bank.csv", delimiter=";")
features = ["job","duration", "poutcome"]
bank_df = bank_df[features]

In [12]:
bank_df.head(1)

Unnamed: 0,job,duration,poutcome
0,unemployed,79,unknown


In [15]:
# Testing the model
bank_dict = bank_df.to_dict(orient="records")
X = dict_vectorizer.transform(bank_dict)

y_proba = model.predict_proba(X)[:, 1]
y_pred = y_proba >= 0.5

In [16]:
y_pred

array([False, False, False, ..., False, False, False])

In [18]:
bank_dict[0]

{'job': 'unemployed', 'duration': 79, 'poutcome': 'unknown'}

In [24]:
sample = {"job": "retired", "duration": 445, "poutcome": "success"}
X_sample = dict_vectorizer.transform(sample)

y_proba_sample = model.predict_proba(X_sample)[:, 1]
y_pred_sample = y_proba_sample >= 0.5

print(round(y_proba_sample[0], 3), y_pred_sample)

0.902 [ True]


## Testing the model with requests

In [25]:
import requests

url = "http://localhost:9696/predict"

client = {"job": "unknown", "duration": 270, "poutcome": "failure"}
requests.post(url, json=client).json()

{'scoring': False, 'scoring_probability': 0.13968947052356817}

## Testing the model with Docker

In [26]:
import requests

url = "http://0.0.0.0:9696/predict"

client = {"job": "retired", "duration": 445, "poutcome": "success"}
requests.post(url, json=client).json()

{'scoring': True, 'scoring_probability': 0.726936946355423}