### Import Libraries


In [1]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.impute import SimpleImputer
import joblib

from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

### Import Data

In [2]:
train = pd.read_csv('../data/readmissions_train.csv')
train.drop(['diag_1_desc','diag_2_desc','diag_3_desc'],axis=1,inplace=True)

X = train.drop('readmitted',axis=1)
y = train.pop('readmitted')

### Function for Keras Model

In [3]:
#First I define my keras model

def create_model():
    optimizer='adagrad'
    kernel_initializer='glorot_uniform'
    dropout=0.2
    
    model = Sequential()
    model.add(Dense(64,activation='relu',kernel_initializer=kernel_initializer))
    model.add(Dropout(dropout))
    model.add(Dense(1,activation='sigmoid',kernel_initializer=kernel_initializer))

    model.compile(loss='binary_crossentropy',optimizer=optimizer, metrics=['accuracy'])

    return model

### Define Preprocessing step per type of column

In [4]:

#Preprocessing for numerical features
numeric_features = list(set(X.columns) - set(list(X.select_dtypes('object').columns)))
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

#Preprocessing for categorical features
categorical_features = list(train.select_dtypes('object').columns)
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

#Preprocessor with all of the steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

### Fit the Preprocessing Pipeline

In [5]:
# Full preprocessing pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

#Train the model-Pipeline
pipeline.fit(X,y)

#Preprocess x
preprocessed = pipeline.transform(X)

### Train the Keras Classifier

In [6]:
#Create the keras model
clf = KerasClassifier(build_fn=create_model, verbose=0)
clf.fit(preprocessed,y)

<tensorflow.python.keras.callbacks.History at 0x7fa6708545f8>

### Save Custom Model files

In [7]:
joblib.dump(pipeline,'custom_model/preprocessing.pkl')
clf.model.save('custom_model/model.h5')

### Verify custom model integrity with DRUM

In [None]:
!drum validation --code-dir ./custom_model --input ../data/readmissions_test.csv --target-type binary --positive-class-label yes --negative-class-label no

2020-11-30 00:34:37.577761: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2020-11-30 00:34:37.595374: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fad85b49740 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-11-30 00:34:37.595426: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
Instructions for updating:
Please use `model.predict()` instead.
          yes        no
0    0.504705  0.495295
1    0.540895  0.459105
2    0.586829  0.413171
3    0.525045  0.474955
4    0.505892  0.494108
..        ...       ...
495  0.507780  0.492220
496  0.530991  0.469009
497  0.574898  0.425102
498  0.696024  0.303976
499  0.5

2020-11-30 00:35:08.509357: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2020-11-30 00:35:08.537098: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f8ba04f9f50 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-11-30 00:35:08.537145: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
Instructions for updating:
Please use `model.predict()` instead.
          yes        no
0    0.497404  0.502596
1    0.542703  0.457297
2    0.590198  0.409802
3    0.513886  0.486114
4    0.498148  0.501852
..        ...       ...
495  0.497224  0.502776
496  0.517193  0.482807
497  0.560767  0.439233
498  0.685153  0.314847
499  0.5

2020-11-30 00:35:40.260874: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2020-11-30 00:35:40.276759: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fe5b86972b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-11-30 00:35:40.276804: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
Instructions for updating:
Please use `model.predict()` instead.
          yes        no
0    0.503029  0.496971
1    0.529129  0.470871
2    0.584462  0.415538
3    0.517322  0.482678
4    0.494127  0.505873
..        ...       ...
495  0.500449  0.499551
496  0.542322  0.457678
497  0.566222  0.433778
498  0.683624  0.316376
499  0.5

2020-11-30 00:36:09.546875: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2020-11-30 00:36:09.563256: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f8d586f24d0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-11-30 00:36:09.563290: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
Instructions for updating:
Please use `model.predict()` instead.
          yes        no
0    0.494658  0.505342
1    0.540895  0.459105
2    0.518532  0.481468
3    0.521482  0.478518
4    0.494917  0.505083
..        ...       ...
495  0.497018  0.502982
496  0.524005  0.475995
497  0.500617  0.499383
498  0.531206  0.468794
499  0.5

2020-11-30 00:36:36.050163: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2020-11-30 00:36:36.063598: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fd08440fe40 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-11-30 00:36:36.063632: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
Instructions for updating:
Please use `model.predict()` instead.
          yes        no
0    0.501348  0.498652
1    0.545512  0.454488
2    0.591535  0.408465
3    0.525536  0.474464
4    0.499439  0.500561
..        ...       ...
495  0.502728  0.497272
496  0.532838  0.467162
497  0.572162  0.427838
498  0.693042  0.306958
499  0.5

2020-11-30 00:37:04.308692: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2020-11-30 00:37:04.324500: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f98397b0520 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-11-30 00:37:04.324558: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
Instructions for updating:
Please use `model.predict()` instead.
          yes        no
0    0.496271  0.503729
1    0.539314  0.460686
2    0.583028  0.416972
3    0.519531  0.480469
4    0.495293  0.504707
..        ...       ...
495  0.498841  0.501159
496  0.520764  0.479236
497  0.564995  0.435005
498  0.688261  0.311739
499  0.5