In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction import DictVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import xgboost as xgb

In [3]:
df = pd.read_csv('heart.csv')

df.columns = df.columns.str.lower().str.replace(' ', '_')

string_columns = list(df.dtypes[df.dtypes == 'object'].index)

for col in string_columns:
    df[col] = df[col].str.lower().str.replace(' ', '_')

In [4]:
categorical = ['sex', 'chestpaintype', 'restingecg', 'exerciseangina',  'st_slope']

numerical = ['age','restingbp', 'cholesterol', 'fastingbs','maxhr','oldpeak']

In [5]:
df_train_full, df_test = train_test_split(df, test_size=0.2, random_state=1)
df_train_full = df_train_full.reset_index(drop=True)
df_test = df_test.reset_index(drop=True)

In [6]:
y_train = df_train_full.heartdisease.values
y_test = df_test.heartdisease.values

In [7]:
del df_train_full['heartdisease']
del df_test['heartdisease']

In [8]:
def train(df, y):
    cat = df[categorical + numerical].to_dict(orient='records')
    
    dv = DictVectorizer(sparse=False)
    dv.fit(cat)

    X_train_full = dv.transform(cat)

    dtrain_full = xgb.DMatrix(X_train_full, label=y_train, feature_names=dv.feature_names_)

    xgb_params = {
        'eta': 0.1,
        'max_depth': 3,
        'min_child_weight': 11,

        'objective': 'binary:logistic',
        'eval_metric': 'auc',
        'nthread': 8,
        'seed': 1,
    }

    model = xgb.train(xgb_params, dtrain_full,
                  num_boost_round=70)

    return dv, model


def predict(df, dv, model):
    cat = df[categorical + numerical].to_dict(orient='records')
    
    X = dv.transform(cat)
    dtest = xgb.DMatrix(X, label=y_test, feature_names=dv.feature_names_)

    y_pred = model.predict(dtest)

    return y_pred

In [10]:
dv, model = train(df_train_full, y_train)
y_pred = predict(df_test, dv, model)

auc = roc_auc_score(y_test, y_pred)
print('auc = %.3f' % auc)

auc = 0.955


In [11]:
patient = {'age': 43,
 'sex': 'm',
 'chestpaintype': 'asy',
 'restingbp': 120,
 'cholesterol': 177,
 'fastingbs': 0,
 'restingecg': 'lvh',
 'maxhr': 120,
 'exerciseangina': 'y',
 'oldpeak': 2.5,
 'st_slope': 'flat'}

In [12]:
df = pd.DataFrame([patient])
y_pred = predict(df, dv, model)
y_pred[0]

np.float32(0.9175592)

In [15]:
def predict_single(patient, dv, model):
    df = pd.DataFrame([patient])
    y_pred = predict(df, dv, model)
    return y_pred[0]

In [16]:
predict_single(patient, dv, model)

np.float32(0.9175592)

In [None]:
import pickle 

In [21]:
output_file = f'model_xgb.bin'
output_file

'model_xgb.bin'

In [23]:
f_out = open(output_file, 'wb')
pickle.dump((dv, model),f_out)
f_out.close()

In [24]:
with open('heart_fail.bin', 'wb') as f_out:
    pickle.dump((dv, model), f_out)

In [1]:
import pickle

In [2]:
model_file = 'model_xgb.bin'

In [3]:
with open(model_file, 'rb') as f_in:
    dv, model = pickle.load(f_in)

In [4]:
dv, model

(DictVectorizer(sparse=False), <xgboost.core.Booster at 0x71bd05221160>)

In [5]:
patient = {'age': 43,
 'sex': 'm',
 'chestpaintype': 'asy',
 'restingbp': 120,
 'cholesterol': 177,
 'fastingbs': 0,
 'restingecg': 'lvh',
 'maxhr': 120,
 'exerciseangina': 'y',
 'oldpeak': 2.5,
 'st_slope': 'flat'}

In [7]:
X = dv.transform([patient])

In [12]:
dtest = Xgboost.DMatrix(X, label=y_test, feature_names=dv.feature_names_)
model.predict(X)

NameError: name 'Xgboost' is not defined

In [25]:
import requests
url = 'http://localhost:9696/predict'
response = requests.post(url, json=patient)
result = response.json()
result

ConnectionError: HTTPConnectionPool(host='localhost', port=9696): Max retries exceeded with url: /predict (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7e4eedc144d0>: Failed to establish a new connection: [Errno 111] Connection refused'))