In [1]:
!pip install --upgrade tensorflow tensorflow-probability h5py ezsmdeploy==1.1.4 --quiet

In [2]:
import numpy as np

import tensorflow.compat.v2 as tf
tf.enable_v2_behavior()

import tensorflow_probability as tfp

%matplotlib inline

tfd = tfp.distributions

### Create Sample Data

In [3]:
w0 = 0.125
b0 = 5.
x_range = [-20, 60]

def load_dataset(n=150, n_tst=150):
  np.random.seed(43)
  def s(x):
    g = (x - x_range[0]) / (x_range[1] - x_range[0])
    return 3 * (0.25 + g**2.)
  x = (x_range[1] - x_range[0]) * np.random.rand(n) + x_range[0]
  eps = np.random.randn(n) * s(x)
  y = (w0 * x * (1. + np.sin(x)) + b0) + eps
  x = x[..., np.newaxis]
  x_tst = np.linspace(*x_range, num=n_tst).astype(np.float32)
  x_tst = x_tst[..., np.newaxis]
  return y, x, x_tst

y, x, x_tst = load_dataset()

### Build model

In [4]:
# Build model.
negloglik = lambda y, rv_y: -rv_y.log_prob(y)
model = tf.keras.Sequential([
  tf.keras.layers.Dense(1 + 1),
  tfp.layers.DistributionLambda(
      lambda t: tfd.Normal(loc=t[..., :1],
                           scale=1e-3 + tf.math.softplus(0.05 * t[...,1:]))),
])

# Do inference.
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=negloglik)
model.fit(x, y, epochs=1000, verbose=False);

# Profit.
[print(np.squeeze(w.numpy())) for w in model.weights];
yhat = model(x_tst)
assert isinstance(yhat, tfd.Distribution)

[0.12282385 0.96513146]
[5.205943 7.004342]


In [5]:
model(x_tst)

<tfp.distributions.Normal 'sequential_distribution_lambda_Normal' batch_shape=[150, 1] event_shape=[] dtype=float32>

In [6]:
model.predict(x_tst) # <--- this is what happens internally. It returns a tensor instead of a distribution we can sample from

array([[ 2.3980758],
       [ 2.585518 ],
       [ 2.1993608],
       [ 3.3034415],
       [ 4.2840652],
       [ 3.3239174],
       [ 4.409056 ],
       [ 3.8066037],
       [ 2.5256186],
       [ 2.765533 ],
       [ 3.600575 ],
       [ 3.515073 ],
       [ 3.7046757],
       [ 3.6306553],
       [ 3.2321055],
       [ 4.1575603],
       [ 4.0326014],
       [ 3.7785063],
       [ 4.3272724],
       [ 4.806672 ],
       [ 3.0966952],
       [ 2.9335876],
       [ 4.903344 ],
       [ 4.9527016],
       [ 5.3946457],
       [ 5.4594507],
       [ 4.637113 ],
       [ 4.085116 ],
       [ 4.1591053],
       [ 5.2609854],
       [ 4.457866 ],
       [ 5.5775747],
       [ 4.64046  ],
       [ 5.479892 ],
       [ 5.883362 ],
       [ 5.2331967],
       [ 5.2612853],
       [ 5.83494  ],
       [ 5.3080373],
       [ 3.4654753],
       [ 4.595876 ],
       [ 5.613019 ],
       [ 7.1641006],
       [ 5.411243 ],
       [ 4.4108014],
       [ 4.4137125],
       [ 4.3924646],
       [ 6.57

In [7]:
# model.save('model1.h5')

_This method of saving has some problems..._

In [8]:
import h5py

In [9]:
!rm -r models
!mkdir models

In [10]:
model_name = 'reg1'
file = h5py.File('{}.h5'.format(model_name), 'w')
weight = model.get_weights()
for i in range(len(weight)):
    file.create_dataset('weight' + str(i), data=weight[i])
file.close()

In [11]:
negloglik = lambda y, rv_y: -rv_y.log_prob(y)
newmodel = tf.keras.Sequential([
  tf.keras.layers.Dense(1 + 1),
  tfp.layers.DistributionLambda(
      lambda t: tfd.Normal(loc=t[..., :1],
                           scale=1e-3 + tf.math.softplus(0.05 * t[...,1:]))),
])

In [12]:
x_tst.shape

(150, 1)

### Reload model to see if it is saved correctly

In [13]:
file = h5py.File('{}.h5'.format(model_name), 'r')
weight = []
for i in range(len(file.keys())):
   weight.append(file['weight' + str(i)][:])

newmodel.build(input_shape = (150,1))
newmodel.set_weights(weight)

In [14]:
y_tst = newmodel(x_tst)

In [15]:
y_tst.__dict__

{'_parameters': <function tensorflow_probability.python.distributions.distribution._DistributionMeta.__new__.<locals>.wrapped_init.<locals>.<lambda>()>,
 '_loc': <tf.Tensor: shape=(150, 1), dtype=float32, numpy=
 array([[ 2.7494662],
        [ 2.8154118],
        [ 2.8813574],
        [ 2.9473033],
        [ 3.013249 ],
        [ 3.0791948],
        [ 3.1451402],
        [ 3.2110858],
        [ 3.2770317],
        [ 3.3429773],
        [ 3.4089231],
        [ 3.4748688],
        [ 3.5408144],
        [ 3.60676  ],
        [ 3.672706 ],
        [ 3.7386513],
        [ 3.8045971],
        [ 3.870543 ],
        [ 3.9364886],
        [ 4.0024343],
        [ 4.06838  ],
        [ 4.1343255],
        [ 4.200271 ],
        [ 4.266217 ],
        [ 4.332163 ],
        [ 4.3981085],
        [ 4.464054 ],
        [ 4.5299997],
        [ 4.5959454],
        [ 4.661891 ],
        [ 4.7278366],
        [ 4.7937827],
        [ 4.8597283],
        [ 4.925674 ],
        [ 4.9916196],
        [ 5.057565

## Try with Ezsmdeploy

In [16]:
%%writefile tfp_predict_script.py
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import json
import h5py
import os
tfd = tfp.distributions

#Return loaded model
def load_model(modelpath):

    # (re)Defne model
    negloglik = lambda y, rv_y: -rv_y.log_prob(y)
    model = tf.keras.Sequential([
      tf.keras.layers.Dense(1 + 1),
      tfp.layers.DistributionLambda(
          lambda t: tfd.Normal(loc=t[..., :1],
                               scale=1e-3 + tf.math.softplus(0.05 * t[...,1:]))),
    ])


    # Load model
    print("1. Listing files in modelpath")
    print(os.listdir(modelpath))
    
    print("2. Loading h5 file")
    file = h5py.File(os.path.join(modelpath,'reg1.h5'), 'r')
    
    print("3. Loading weights")
    weight = []
    for i in range(len(file.keys())):
       weight.append(file['weight' + str(i)][:])

    model.build(input_shape = (150,1))
    model.set_weights(weight)
    
    print("4. Loaded model successfully")
    
    return model

# return prediction based on loaded model (from the step above) and an input payload
def predict(model, payload):
    try:
        # Note, for Multi model endpoints -> (payload[0]['body'].decode())
        data = np.frombuffer(payload,dtype=np.float32).reshape((150,1))
        tmpout = model(data)
        
        #Add outputs here !!
        
        out = {
            'mean':np.asarray(tmpout.mean()).T.tolist(),
            'mode':np.asarray(tmpout.mode()).T.tolist(),
            'stddev':np.asarray(tmpout.stddev()).T.tolist(),
            'quantile_75':np.asarray(tmpout.quantile(0.75)).T.tolist()
            }
                                     
    except Exception as e:
        out = str(e)
    return [json.dumps({'output':out})]

Overwriting tfp_predict_script.py


## Does this work Locally in the notebook?

In [17]:
from tfp_predict_script import *
model = load_model('./')

1. Listing files in modelpath
['reg1.h5', 'generate_cifar10_tfrecords.py', 'src', 'tensorflow-serving-TF-probability-python-sdk.ipynb', 'model1.tar.gz', 'model1.h5', '.ipynb_checkpoints', 'code', 'models', 'tfp_predict_script.py', 'sample-img', 'data', '__pycache__']
2. Loading h5 file
3. Loading weights
4. Loaded model successfully


In [18]:
predict(model,x_tst.tobytes())

['{"output": {"mean": [[2.7494661808013916, 2.8154118061065674, 2.881357431411743, 2.947303295135498, 3.013248920440674, 3.0791947841644287, 3.1451401710510254, 3.211085796356201, 3.277031660079956, 3.342977285385132, 3.4089231491088867, 3.4748687744140625, 3.5408143997192383, 3.606760025024414, 3.672705888748169, 3.7386512756347656, 3.8045971393585205, 3.8705430030822754, 3.936488628387451, 4.002434253692627, 4.068379878997803, 4.1343255043029785, 4.200271129608154, 4.266217231750488, 4.332162857055664, 4.39810848236084, 4.464054107666016, 4.529999732971191, 4.595945358276367, 4.661890983581543, 4.727836608886719, 4.793782711029053, 4.8597283363342285, 4.925673961639404, 4.99161958694458, 5.057565212249756, 5.123510837554932, 5.189456462860107, 5.255402565002441, 5.321348190307617, 5.387293815612793, 5.453239440917969, 5.5191850662231445, 5.58513069152832, 5.651076316833496, 5.717021942138672, 5.782968044281006, 5.848913669586182, 5.914859294891357, 5.980804920196533, 6.04675054550170

## Deploy locally in a container

In [19]:
import ezsmdeploy

Match versions used in your notebook to make sure the endpoint deploys correctly!

In [20]:
# !pip show numpy tensorflow tensorflow-probability  h5py

In [21]:
ez = ezsmdeploy.Deploy(model = 'reg1.h5', 
                  script = 'tfp_predict_script.py',
                  requirements = ['numpy','tensorflow>=2.5','tensorflow-probability','h5py'], #or pass in the path to requirements.txt
                  instance_type = 'local',
                  wait = True)

[K0:00:00.003602 | compressed model(s)
[K0:00:00.075285 | uploaded model tarball(s) ; check returned modelpath
[K0:00:00.076126 | added requirements file
[K0:00:00.077954 | added source file
[K0:00:00.079299 | added Dockerfile
[K0:00:00.081173 | added model_handler and docker utils
[K0:00:00.081804 | building docker container
[K0:01:31.233938 | built docker container
[K

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


[K0:01:31.349462 | created model(s). Now deploying on local
[32m∙●∙[0m [KAttaching to tmp6serw1sh_algo-1-g8yvq_1
[36malgo-1-g8yvq_1  |[0m Starting the inference server with 16 workers.
[32m∙∙∙[0m [K[36malgo-1-g8yvq_1  |[0m [2021-09-01 03:39:13 +0000] [9] [INFO] Starting gunicorn 20.1.0
[36malgo-1-g8yvq_1  |[0m [2021-09-01 03:39:13 +0000] [9] [INFO] Listening at: unix:/tmp/gunicorn.sock (9)
[36malgo-1-g8yvq_1  |[0m [2021-09-01 03:39:13 +0000] [9] [INFO] Using worker: gevent
[36malgo-1-g8yvq_1  |[0m [2021-09-01 03:39:13 +0000] [13] [INFO] Booting worker with pid: 13
[36malgo-1-g8yvq_1  |[0m [2021-09-01 03:39:13 +0000] [14] [INFO] Booting worker with pid: 14
[32m∙∙∙[0m [K[36malgo-1-g8yvq_1  |[0m [2021-09-01 03:39:13 +0000] [15] [INFO] Booting worker with pid: 15
[36malgo-1-g8yvq_1  |[0m [2021-09-01 03:39:13 +0000] [16] [INFO] Booting worker with pid: 16
[32m●∙∙[0m [K[36malgo-1-g8yvq_1  |[0m [2021-09-01 03:39:13 +0000] [17] [INFO] Booting worker with pid: 17


[32m∙∙∙[0m [K[36malgo-1-g8yvq_1  |[0m 2021-09-01 03:39:18.708974: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
[36malgo-1-g8yvq_1  |[0m 2021-09-01 03:39:18.709037: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
[36malgo-1-g8yvq_1  |[0m 2021-09-01 03:39:18.709079: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (67620077354d): /proc/driver/nvidia/version does not exist
[36malgo-1-g8yvq_1  |[0m 2021-09-01 03:39:18.709449: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
[36malgo-1-g8yvq_1  |[0m To enable them in other operations, rebuild TensorF

Debug any docker build instructions

In [22]:
# !./src/build-docker.sh test

In [23]:
out = ez.predictor.predict(x_tst.tobytes()).decode()
out

[36malgo-1-g8yvq_1  |[0m received input data
[36malgo-1-g8yvq_1  |[0m b'\x00\x00\xa0\xc1g\xb4\x9b\xc1\xceh\x97\xc15\x1d\x93\xc1\x9c\xd1\x8e\xc1\x03\x86\x8a\xc1k:\x86\xc1\xd2\xee\x81\xc1qF{\xc1@\xafr\xc1\x0e\x18j\xc1\xdc\x80a\xc1\xaa\xe9X\xc1xRP\xc1F\xbbG\xc1\x15$?\xc1\xe3\x8c6\xc1\xb1\xf5-\xc1\x7f^%\xc1M\xc7\x1c\xc1\x1b0\x14\xc1\xea\x98\x0b\xc1\xb8\x01\x03\xc1\x0c\xd5\xf4\xc0\xa8\xa6\xe3\xc0Ex\xd2\xc0\xe1I\xc1\xc0}\x1b\xb0\xc0\x1a\xed\x9e\xc0\xb6\xbe\x8d\xc0\xa5 y\xc0\xde\xc3V\xc0\x16g4\xc0O\n\x12\xc0\x0f[\xdf\xbf\x81\xa1\x9a\xbf\xe5\xcf+\xbf\x1ds\t\xbe\xac,\xce>s\x89p?H\xfe\xbc?\xeb\xdb\x00@\xb38#@z\x95E@A\xf2g@\x84\'\x85@\xe8U\x96@L\x84\xa7@\xaf\xb2\xb8@\x13\xe1\xc9@w\x0f\xdb@\xda=\xec@>l\xfd@QM\x07A\x83\xe4\x0fA\xb4{\x18A\xe6\x12!A\x18\xaa)AJA2A|\xd8:A\xaeoCA\xdf\x06LA\x11\x9eTAC5]Au\xcceA\xa7cnA\xd8\xfavA\n\x92\x7fA\x9e\x14\x84A7`\x88A\xd0\xab\x8cAi\xf7\x90A\x02C\x95A\x9b\x8e\x99A4\xda\x9dA\xcc%\xa2Aeq\xa6A\xfe\xbc\xaaA\x97\x08\xafA0T\xb3A\xc9\x9f\xb7Ab\xeb\xbbA\xfb6\xc0A\x94\x

'{"output": {"mean": [[2.7494661808013916, 2.8154118061065674, 2.881357431411743, 2.947303295135498, 3.013248920440674, 3.0791947841644287, 3.1451401710510254, 3.211085796356201, 3.277031660079956, 3.342977285385132, 3.4089231491088867, 3.4748687744140625, 3.5408143997192383, 3.606760025024414, 3.672705888748169, 3.7386512756347656, 3.8045971393585205, 3.8705430030822754, 3.936488628387451, 4.002434253692627, 4.068379878997803, 4.1343255043029785, 4.200271129608154, 4.266217231750488, 4.332162857055664, 4.39810848236084, 4.464054107666016, 4.529999732971191, 4.595945358276367, 4.661890983581543, 4.727836608886719, 4.793782711029053, 4.8597283363342285, 4.925673961639404, 4.99161958694458, 5.057565212249756, 5.123510837554932, 5.189456462860107, 5.255402565002441, 5.321348190307617, 5.387293815612793, 5.453239440917969, 5.5191850662231445, 5.58513069152832, 5.651076316833496, 5.717021942138672, 5.782968044281006, 5.848913669586182, 5.914859294891357, 5.980804920196533, 6.046750545501709

[36malgo-1-g8yvq_1  |[0m 172.20.0.1 - - [01/Sep/2021:03:39:19 +0000] "POST /invocations HTTP/1.1" 200 11698 "-" "python-urllib3/1.26.4"


## Great! Now deploy onto a SageMaker endpoint

In [24]:
ezonsm = ezsmdeploy.Deploy(model = 'reg1.h5', 
                  script = 'tfp_predict_script.py',
                  requirements = ['numpy','tensorflow>=2.5','tensorflow-probability','h5py'], #or pass in the path to requirements.txt
                  instance_type = 'ml.m4.xlarge', #... or let ezsmdeploy decide instance type based on model size
                  wait = True)

[K0:00:00.003749 | compressed model(s)
[K0:00:00.158802 | uploaded model tarball(s) ; check returned modelpath
[K0:00:00.159965 | added requirements file
[K0:00:00.162085 | added source file
[K0:00:00.164027 | added Dockerfile
[K0:00:00.165969 | added model_handler and docker utils
[K0:00:00.166121 | building docker container
[K0:01:34.311422 | built docker container
[K2m∙∙∙[0m [K

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


[K0:01:34.436429 | created model(s). Now deploying on ml.m4.xlarge
[K0:05:37.171200 | deployed model
[K0:05:37.172185 | estimated cost is $0.31 per hour
[K[32m0:05:37.172339 | Done! ✔[0m 


In [25]:
out = ezonsm.predictor.predict(x_tst.tobytes()).decode()
out

'{"output": {"mean": [[2.7494661808013916, 2.8154118061065674, 2.881357431411743, 2.947303295135498, 3.013248920440674, 3.0791947841644287, 3.1451401710510254, 3.211085796356201, 3.277031660079956, 3.342977285385132, 3.4089231491088867, 3.4748687744140625, 3.5408143997192383, 3.606760025024414, 3.672705888748169, 3.7386512756347656, 3.8045971393585205, 3.8705430030822754, 3.936488628387451, 4.002434253692627, 4.068379878997803, 4.1343255043029785, 4.200271129608154, 4.266217231750488, 4.332162857055664, 4.39810848236084, 4.464054107666016, 4.529999732971191, 4.595945358276367, 4.661890983581543, 4.727836608886719, 4.793782711029053, 4.8597283363342285, 4.925673961639404, 4.99161958694458, 5.057565212249756, 5.123510837554932, 5.189456462860107, 5.255402565002441, 5.321348190307617, 5.387293815612793, 5.453239440917969, 5.5191850662231445, 5.58513069152832, 5.651076316833496, 5.717021942138672, 5.782968044281006, 5.848913669586182, 5.914859294891357, 5.980804920196533, 6.046750545501709

# Cleanup

To avoid incurring charges due to a stray endpoint, delete the Amazon SageMaker endpoint if you no longer need it:

In [26]:
ezonsm.predictor.delete_endpoint()