# Heart Disease Random Forest Classifier

In this example we use the Heart Disease data from
https://www.kaggle.com/ronitf/heart-disease-uci

We train a pipeline composed of a DictVectorizer, RobustScaler and a RandomForest classifier and deploy it on Blazee.

In [4]:
import pandas as pd
import numpy as np

# Load the Data
df = pd.read_csv('./data/heart.csv')
df.head(10)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
5,57,1,0,140,192,0,1,148,0,0.4,1,0,1,1
6,56,0,1,140,294,0,0,153,0,1.3,1,0,2,1
7,44,1,1,120,263,0,1,173,0,0.0,2,0,3,1
8,52,1,2,172,199,1,1,162,0,0.5,2,0,3,1
9,57,1,2,150,168,0,1,174,0,1.6,2,0,2,1


In [50]:
from sklearn.model_selection import train_test_split

# Split the data into train/test sets
X = df.drop(['target'], axis = 1)
y = df['target']
# y = y[:, np.newaxis]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 101)

# Transform the features to a list of dict, which is the
# expected input of our pipeline
X_train = X_train.to_dict(orient = 'records')
X_test = X_test.to_dict(orient = 'records')
X_train[0]

{'age': 48.0,
 'sex': 1.0,
 'cp': 0.0,
 'trestbps': 130.0,
 'chol': 256.0,
 'fbs': 1.0,
 'restecg': 0.0,
 'thalach': 150.0,
 'exang': 1.0,
 'oldpeak': 0.0,
 'slope': 2.0,
 'ca': 2.0,
 'thal': 3.0}

In [51]:
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction import DictVectorizer
from sklearn.preprocessing import RobustScaler
from sklearn.ensemble import RandomForestClassifier

# Create a Pipeline that transforms and scales the data
# followed by a RandomForest classifier
pipeline = make_pipeline(DictVectorizer(sparse=False),
                         RobustScaler(),
                         RandomForestClassifier(n_estimators=1000, random_state=10))

# Train the pipeline
pipeline.fit(X_train, y_train)
pipeline.score(X_test, y_test)

0.82

In [53]:
import blazee

# Deploy the model on Blazee
bz = blazee.Blazee('YOUR-API-KEY')

model = bz.deploy_model(pipeline, model_name="HeartDisease RF")
model

<BlazeeModel 'HeartDisease RF'
	id=989da70d-ec73-43ce-a41f-d70a2446fef0>

In [46]:
# Make sure predictions work as expected
local_predictions = pipeline.predict(X_test)
local_probas = pipeline.predict_proba(X_test)

blazee_preds = model.predict_batch(X_test[:10])
for i, pred in enumerate(blazee_preds):
    print(pred)
    assert pred.prediction == local_predictions[i]
    assert (pred.probas == local_probas[i]).all()

<Prediction
	prediction=0
	probas=[0.874, 0.126]>
<Prediction
	prediction=1
	probas=[0.204, 0.796]>
<Prediction
	prediction=1
	probas=[0.253, 0.747]>
<Prediction
	prediction=1
	probas=[0.055, 0.945]>
<Prediction
	prediction=0
	probas=[0.991, 0.009]>
<Prediction
	prediction=1
	probas=[0.044, 0.956]>
<Prediction
	prediction=1
	probas=[0.171, 0.829]>
<Prediction
	prediction=0
	probas=[0.92, 0.08]>
<Prediction
	prediction=0
	probas=[0.829, 0.171]>
<Prediction
	prediction=1
	probas=[0.416, 0.584]>
