### Import Libraries


In [1]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.impute import SimpleImputer
import joblib

from keras.layers import Dense, Dropout
from keras.models import Model, Sequential
from keras.wrappers.scikit_learn import KerasClassifier

### Import Data

In [2]:
train = pd.read_csv('../data/readmissions_train.csv')
train.drop(['diag_1_desc','diag_2_desc','diag_3_desc'],axis=1,inplace=True)

X = train.drop('readmitted',axis=1)
y = train.pop('readmitted')

### Function for Keras Model

In [3]:
#First I define my keras model

def create_model():
    optimizer='adagrad'
    kernel_initializer='glorot_uniform'
    dropout=0.2
    
    model = Sequential()
    model.add(Dense(64,activation='relu',kernel_initializer=kernel_initializer))
    model.add(Dropout(dropout))
    model.add(Dense(1,activation='sigmoid',kernel_initializer=kernel_initializer))

    model.compile(loss='binary_crossentropy',optimizer=optimizer, metrics=['accuracy'])

    return model

### Define Preprocessing step per type of column

In [4]:

#Preprocessing for numerical features
numeric_features = list(set(X.columns) - set(list(X.select_dtypes('object').columns)))
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

#Preprocessing for categorical features
categorical_features = list(train.select_dtypes('object').columns)
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

#Preprocessor with all of the steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

### Fit the Preprocessing Pipeline

In [5]:
# Full preprocessing pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

#Train the model-Pipeline
pipeline.fit(X,y)

#Preprocess x
preprocessed = pipeline.transform(X)

### Train the Keras Classifier

In [6]:
#Create the keras model
clf = KerasClassifier(build_fn=create_model, verbose=0)
clf.fit(preprocessed,y)

<tensorflow.python.keras.callbacks.History at 0x7fe39f970290>

### Save Custom Model files

In [7]:
joblib.dump(pipeline,'custom_model/preprocessing.pkl')
clf.model.save('custom_model/model.h5')

### Verify custom model integrity with DRUM

In [25]:
!drum validation --code-dir ./custom_model --input ../data/readmissions_test.csv --target-type binary --positive-class-label yes --negative-class-label no

 0.527116

[500 rows x 2 columns]
2020-10-23 16:56:03.384111: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2020-10-23 16:56:03.398128: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fde5efef690 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-10-23 16:56:03.398165: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
Instructions for updating:
Please use `model.predict()` instead.
          yes        no
0    0.394183  0.605817
1    0.392307  0.607693
2    0.497047  0.502953
3    0.408983  0.591017
4    0.395105  0.604895
..        ...       ...
495  0.449922  0.550078
496  0.409577  0.590423
497  0.438001  0.56199