## Example Model to be used for deployment

Instructor: Andrew Berry

Date: Oct 26, 2021

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data/heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
y = df['target']
X = df.drop('target', axis=1)

In [4]:
from sklearn.model_selection import train_test_split

#split data into training and testing sets
X_test, X_train, y_test, y_train = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [6]:
## Pipeline
imputer = SimpleImputer(strategy='median')
scaler = StandardScaler()
ohe = OneHotEncoder(handle_unknown='ignore')


preprocessing = make_column_transformer(
    (make_pipeline(imputer, scaler), ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']),
    (ohe, ["cp"]),
    remainder='passthrough')

pipe = make_pipeline(preprocessing, LogisticRegression())

In [7]:
pipe.fit(X_train, y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('pipeline',
                                                  Pipeline(steps=[('simpleimputer',
                                                                   SimpleImputer(strategy='median')),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['age', 'trestbps', 'chol',
                                                   'thalach', 'oldpeak']),
                                                 ('onehotencoder',
                                                  OneHotEncoder(handle_unknown='ignore'),
                                                  ['cp'])])),
                ('logisticregression', LogisticRegression())])

In [8]:
pipe.score(X_test, y_test)

0.7892561983471075

In [9]:
# Save the model
import pickle

pickle.dump(pipe, open('model.pkl', 'wb'))

In [20]:
import json
json_data = X_test[0:1].to_json(orient = 'records') 
json_data = json.loads(json_data)

In [27]:
json_data

'[{"age":42,"sex":1,"cp":1,"trestbps":120,"chol":295,"fbs":0,"restecg":1,"thalach":162,"exang":0,"oldpeak":0.0,"slope":2,"ca":0,"thal":2}]'

In [14]:
#X_test.dtypes

In [32]:
print('hi')

hi


In [34]:
test = pd.read_csv('flask/test.csv', index_col=0)
test.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
132,42,1,1,120,295,0,1,162,0,0.0,2,0,2
202,58,1,0,150,270,0,0,111,1,0.8,2,0,3
196,46,1,2,150,231,0,1,147,0,3.6,1,0,2
75,55,0,1,135,250,0,0,161,0,1.4,1,0,2
176,60,1,0,117,230,1,1,160,1,1.4,2,2,3
