### Import Libraries


In [1]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.impute import SimpleImputer
import joblib

from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

### Import Data

In [2]:
train = pd.read_csv('../data/readmissions_train.csv')
train.drop(['diag_1_desc','diag_2_desc','diag_3_desc'],axis=1,inplace=True)

X = train.drop('readmitted',axis=1)
y = train.pop('readmitted')

### Function for Keras Model

In [3]:
#First I define my keras model
def create_model():
    optimizer='adagrad'
    kernel_initializer='glorot_uniform'
    dropout=0.2
    
    model = Sequential()
    model.add(Dense(64,activation='relu',kernel_initializer=kernel_initializer))
    model.add(Dropout(dropout))
    model.add(Dense(1,activation='sigmoid',kernel_initializer=kernel_initializer))

    model.compile(loss='binary_crossentropy',optimizer=optimizer, metrics=['accuracy'])

    return model

### Define Preprocessing step per type of column

In [4]:

#Preprocessing for numerical features
numeric_features = list(set(X.columns) - set(list(X.select_dtypes('object').columns)))
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

#Preprocessing for categorical features
categorical_features = list(train.select_dtypes('object').columns)
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

#Preprocessor with all of the steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

### Fit the Preprocessing Pipeline

In [5]:
# Full preprocessing pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

#Train the model-Pipeline
pipeline.fit(X,y)

#Preprocess x
preprocessed = pipeline.transform(X)

#I could also train the model with the sparse matrix. I transform it to padnas because the hook function in custom.py expected a pandas dataframe to be used for scoring.
preprocessed = pd.DataFrame.sparse.from_spmatrix(preprocessed) 

### Train the Keras Classifier

In [6]:
#Create the keras model
clf = KerasClassifier(build_fn=create_model, verbose=0)
clf.fit(preprocessed,y)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



<tensorflow.python.keras.callbacks.History at 0x7fc0d81c27b8>

### Save Custom Model files

In [7]:
joblib.dump(pipeline,'custom_model/preprocessing.pkl')
clf.model.save('custom_model/model.h5')

### Verify custom model integrity with DRUM

In [8]:
!drum validation --code-dir ./custom_model --input ../data/readmissions_test.csv --target-type binary --positive-class-label yes --negative-class-label no

service.cc:176]   StreamExecutor device (0): Host, Default Version
          yes        no
0    0.355612  0.644388
1    0.390222  0.609778
2    0.463708  0.536292
3    0.476066  0.523934
4    0.388414  0.611586
..        ...       ...
495  0.411196  0.588804
496  0.399139  0.600861
497  0.443439  0.556561
498  0.467179  0.532821
499  0.471639  0.528361

[500 rows x 2 columns]
2020-12-02 18:27:55.359225: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2020-12-02 18:27:55.374342: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fab1159f980 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-12-02 18:27:55.374385: I tensorflow/compiler/xla/service/service.cc:176]   StreamE

In [99]:
!drum score --code-dir ./custom_model --input ../data/readmissions_test.csv --target-type binary --positive-class-label "yes" --negative-class-label "no" 

2020-12-01 16:52:54.300058: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2020-12-01 16:52:54.313482: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fd99ffd3770 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-12-01 16:52:54.313554: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
          yes        no
0    0.359182  0.640818
1    0.389884  0.610116
2    0.440797  0.559203
3    0.403963  0.596037
4    0.334088  0.665912
..        ...       ...
495  0.365820  0.634180
496  0.440708  0.559292
497  0.409103  0.590897
498  0.474418  0.525582
499  0.391372  0.608628

[500 rows x 2 columns]
