The example model built in this notebook comes from this [Keras Tutorial](https://keras.io/examples/structured_data/imbalanced_classification/), which uses data provided by this [Kaggle competition](https://www.kaggle.com/mlg-ulb/creditcardfraud/?select=creditcard.csv).   

In [1]:
import chassisml
import pickle
import getpass
from tensorflow import keras
import numpy as np
import pandas as pd
import csv
from io import StringIO

## Enter credentials
Dockerhub creds and Modzy API Key

In [2]:
dockerhub_user = getpass.getpass('docker hub username')
dockerhub_pass = getpass.getpass('docker hub password')
modzy_api_key = getpass.getpass('modzy api key')

docker hub username········
docker hub password········
modzy api key········


## Prepare Data

In [4]:
# Get the real data from https://www.kaggle.com/mlg-ulb/creditcardfraud/
fname = "./data/creditcard.csv"

all_features = []
all_targets = []
with open(fname) as f:
    for i, line in enumerate(f):
        if i == 0:
            print("HEADER:", line.strip())
            continue  # Skip header
        fields = line.strip().split(",")
        all_features.append([float(v.replace('"', "")) for v in fields[:-1]])
        all_targets.append([int(fields[-1].replace('"', ""))])
        if i == 1:
            print("EXAMPLE FEATURES:", all_features[-1])

features = np.array(all_features, dtype="float32")
targets = np.array(all_targets, dtype="uint8")
print("features.shape:", features.shape)
print("targets.shape:", targets.shape)

HEADER: "Time","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11","V12","V13","V14","V15","V16","V17","V18","V19","V20","V21","V22","V23","V24","V25","V26","V27","V28","Amount","Class"
EXAMPLE FEATURES: [0.0, -1.3598071336738, -0.0727811733098497, 2.53634673796914, 1.37815522427443, -0.338320769942518, 0.462387777762292, 0.239598554061257, 0.0986979012610507, 0.363786969611213, 0.0907941719789316, -0.551599533260813, -0.617800855762348, -0.991389847235408, -0.311169353699879, 1.46817697209427, -0.470400525259478, 0.207971241929242, 0.0257905801985591, 0.403992960255733, 0.251412098239705, -0.018306777944153, 0.277837575558899, -0.110473910188767, 0.0669280749146731, 0.128539358273528, -0.189114843888824, 0.133558376740387, -0.0210530534538215, 149.62]
features.shape: (284807, 30)
targets.shape: (284807, 1)


In [5]:
# Prepare Validation Set
num_val_samples = int(len(features) * 0.2)
train_features = features[:-num_val_samples]
train_targets = targets[:-num_val_samples]
val_features = features[-num_val_samples:]
val_targets = targets[-num_val_samples:]

print("Number of training samples:", len(train_features))
print("Number of validation samples:", len(val_features))

# Analyze Class Imbalances
counts = np.bincount(train_targets[:, 0])
print(
    "Number of positive samples in training data: {} ({:.2f}% of total)".format(
        counts[1], 100 * float(counts[1]) / len(train_targets)
    )
)

weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]

# Normalize Data
mean = np.mean(train_features, axis=0)
train_features -= mean
val_features -= mean
std = np.std(train_features, axis=0)
train_features /= std
val_features /= std

Number of training samples: 227846
Number of validation samples: 56961
Number of positive samples in training data: 417 (0.18% of total)


In [6]:
# save small sample test set for later
test_set = pd.DataFrame(val_features[:10])
test_targets = val_targets[:10]
with open("./data/credit_card_fraud_test.csv", "w") as out:
    test_set.to_csv(out, index=False)

## Build and Train Model

In [7]:
# binary classification model
model = keras.Sequential(
    [
        keras.layers.Dense(
            256, activation="relu", input_shape=(train_features.shape[-1],)
        ),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation="sigmoid"),
    ]
)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               7936      
                                                                 
 dense_1 (Dense)             (None, 256)               65792     
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 256)               65792     
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense_3 (Dense)             (None, 1)                 257       
                                                                 
Total params: 139,777
Trainable params: 139,777
Non-trai

In [8]:
# train model
metrics = [
    keras.metrics.FalseNegatives(name="fn"),
    keras.metrics.FalsePositives(name="fp"),
    keras.metrics.TrueNegatives(name="tn"),
    keras.metrics.TruePositives(name="tp"),
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
]

model.compile(
    optimizer=keras.optimizers.Adam(1e-2), loss="binary_crossentropy", metrics=metrics
)

callbacks = [keras.callbacks.ModelCheckpoint("fraud_model_at_epoch_{epoch}.h5")]
class_weight = {0: weight_for_0, 1: weight_for_1}

model.fit(
    train_features,
    train_targets,
    batch_size=2048,
    epochs=30,
    verbose=2,
    callbacks=callbacks,
    validation_data=(val_features, val_targets),
    class_weight=class_weight,
)

Epoch 1/30
112/112 - 4s - loss: 2.2329e-06 - fn: 47.0000 - fp: 25061.0000 - tn: 202368.0000 - tp: 370.0000 - precision: 0.0145 - recall: 0.8873 - val_loss: 0.0673 - val_fn: 9.0000 - val_fp: 593.0000 - val_tn: 56293.0000 - val_tp: 66.0000 - val_precision: 0.1002 - val_recall: 0.8800 - 4s/epoch - 33ms/step
Epoch 2/30
112/112 - 3s - loss: 1.7270e-06 - fn: 34.0000 - fp: 10535.0000 - tn: 216894.0000 - tp: 383.0000 - precision: 0.0351 - recall: 0.9185 - val_loss: 0.1122 - val_fn: 7.0000 - val_fp: 1843.0000 - val_tn: 55043.0000 - val_tp: 68.0000 - val_precision: 0.0356 - val_recall: 0.9067 - 3s/epoch - 26ms/step
Epoch 3/30
112/112 - 3s - loss: 1.1493e-06 - fn: 33.0000 - fp: 5637.0000 - tn: 221792.0000 - tp: 384.0000 - precision: 0.0638 - recall: 0.9209 - val_loss: 0.1192 - val_fn: 6.0000 - val_fp: 2196.0000 - val_tn: 54690.0000 - val_tp: 69.0000 - val_precision: 0.0305 - val_recall: 0.9200 - 3s/epoch - 27ms/step
Epoch 4/30
112/112 - 3s - loss: 1.1605e-06 - fn: 22.0000 - fp: 6915.0000 - tn: 22

Epoch 28/30
112/112 - 3s - loss: 4.2769e-07 - fn: 4.0000 - fp: 2976.0000 - tn: 224453.0000 - tp: 413.0000 - precision: 0.1219 - recall: 0.9904 - val_loss: 0.0188 - val_fn: 10.0000 - val_fp: 297.0000 - val_tn: 56589.0000 - val_tp: 65.0000 - val_precision: 0.1796 - val_recall: 0.8667 - 3s/epoch - 26ms/step
Epoch 29/30
112/112 - 3s - loss: 2.8418e-07 - fn: 4.0000 - fp: 2464.0000 - tn: 224965.0000 - tp: 413.0000 - precision: 0.1436 - recall: 0.9904 - val_loss: 0.0108 - val_fn: 11.0000 - val_fp: 174.0000 - val_tn: 56712.0000 - val_tp: 64.0000 - val_precision: 0.2689 - val_recall: 0.8533 - 3s/epoch - 26ms/step
Epoch 30/30
112/112 - 3s - loss: 1.6812e-07 - fn: 0.0000e+00 - fp: 1614.0000 - tn: 225815.0000 - tp: 417.0000 - precision: 0.2053 - recall: 1.0000 - val_loss: 0.0081 - val_fn: 11.0000 - val_fp: 115.0000 - val_tn: 56771.0000 - val_tp: 64.0000 - val_precision: 0.3575 - val_recall: 0.8533 - 3s/epoch - 26ms/step


<keras.callbacks.History at 0x219ae107760>

## Prepare context dict
Initialize anything here that should persist across inference runs

In [21]:
model_loaded = keras.models.load_model("./data/fraud_model.h5")

In [22]:
model_loaded.predict(val_features[:10])

array([[5.9610628e-28],
       [1.2432390e-15],
       [5.9897824e-21],
       [6.9545074e-08],
       [8.2415342e-04],
       [5.0830014e-08],
       [2.1298048e-09],
       [4.2522891e-28],
       [1.2371518e-05],
       [2.8836448e-23]], dtype=float32)

In [23]:
fraud_model = model_loaded    

# This will be passed to Chassis:
context = {
    "fraud_model": fraud_model,
}

## Write process function

* Must take bytes and context dict as input
* Preprocess bytes, run inference, postprocess model output, return results

In [24]:
def process(input_bytes,context):
    
    # read csv file
    pd_data = pd.read_csv(StringIO(str(input_bytes, "utf-8")))
    np_data = pd_data.to_numpy()
    
    # run inference
    predictions = context['fraud_model'].predict(np_data)

    inference_result = {
        "fraudPredictions": [
            {"entry": i, "probability": pred.item()}
        for i, pred in enumerate(predictions) ]
    }

    structured_output = {
        "data": {
            "result": inference_result,
            "explanation": None,
            "drift": None,
        }
    }
    
    return structured_output

## Initialize Chassis Client
We'll use this to interact with the Chassis service

In [25]:
chassis_client = chassisml.ChassisClient("http://localhost:5000")

## Create and test Chassis model
* Requires `context` dict containing all variables which should be loaded once and persist across inferences
* Requires `process_fn` defined above

In [26]:
# create Chassis model
chassis_model = chassis_client.create_model(context=context,process_fn=process)

# test Chassis model (can pass filepath, bufferedreader, bytes, or text here):
sample_filepath = './data/credit_card_fraud_test.csv'
results = chassis_model.test(sample_filepath)
print(results)

b'{"data":{"result":{"fraudPredictions":[{"entry":0,"probability":5.961062771117055e-28},{"entry":1,"probability":1.2432389610796037e-15},{"entry":2,"probability":5.989782432661615e-21},{"entry":3,"probability":6.954507369982821e-08},{"entry":4,"probability":0.0008241534233093262},{"entry":5,"probability":5.0830013975655675e-08},{"entry":6,"probability":2.129804776984656e-09},{"entry":7,"probability":4.252289131679258e-28},{"entry":8,"probability":1.2371518096188083e-05},{"entry":9,"probability":2.8836447514337685e-23}]},"explanation":null,"drift":null}}'


In [29]:
type(context["fraud_model"])

keras.engine.sequential.Sequential

In [27]:
# test environment and model within Chassis service, must pass filepath here:
test_env_result = chassis_model.test_env(sample_filepath)
print(test_env_result)

INFO:tensorflow:Assets written to: ram://af639b0b-8663-4dab-a099-3e4c0ef32148/assets


NotFoundError: 

## Publish model to Modzy
Need to provide model name, model version, Dockerhub credentials, and required Modzy info

In [28]:
response = chassis_model.publish(
    model_name="Keras Credit Card Fraud Classification",
    model_version="0.0.1",
    registry_user=dockerhub_user,
    registry_pass=dockerhub_pass,
    modzy_sample_input_path=sample_filepath,
    modzy_api_key=modzy_api_key
)

job_id = response.get('job_id')
final_status = chassis_client.block_until_complete(job_id)

INFO:tensorflow:Assets written to: ram://3c8f8450-4f10-437a-b1c4-ab8d8d9da3bb/assets



UnboundLocalError: local variable 'tmppath' referenced before assignment

In [None]:
if chassis_client.get_job_status(job_id)["result"] is not None:
    print("New model URL: {}".format(chassis_client.get_job_status(job_id)["result"]["container_url"]))
else:
    print("Chassis job failed \n\n {}".format(chassis_client.get_job_status(job_id)))

## Run sample job
Submit inference job to our newly-deploy model running on Modzy

In [None]:
from modzy import ApiClient

client = ApiClient(base_url='https://integration.modzy.engineering/api', api_key=modzy_api_key)

input_name = final_status['result']['inputs'][0]['name']
model_id = final_status['result'].get("model").get("modelId")
model_version = final_status['result'].get("version")

inference_job = client.jobs.submit_file(model_id, model_version, {input_name: sample_filepath})
inference_job_result = client.results.block_until_complete(inference_job, timeout=None)
inference_job_results_json = inference_job_result.get_first_outputs()['results.json']
print(inference_job_results_json)