In [None]:
import tensorflow as tf

In [None]:
import requests
# Example service that returns your IP address (server address for the Colab notebook)
r = requests.get('https://api.ipify.org?format=json')
j = r.json()
print(j)
# For ML, our server is the same but more complex in that it returns predictions from an ML model

{'ip': '34.87.81.131'}


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import subprocess

from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten, Dropout
from tensorflow.keras.models import Model

In [None]:
fashion_mnist = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
print("x_train.shape:", x_train.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
x_train.shape: (60000, 28, 28)


In [None]:
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train.shape:", x_train.shape)

x_train.shape: (60000, 28, 28, 1)


In [None]:
K = len(set(y_train))
print("number of classes:", K)

number of classes: 10


In [None]:
# Build CNN
i = Input(shape=x_train[0].shape)
x = Conv2D(32, (3, 3), strides=2, activation='relu')(i)
x = Conv2D(64, (3, 3), strides=2, activation='relu')(x)
x = Conv2D(128, (3, 3), strides=2, activation='relu')(x)
x = Flatten()(x)
x = Dropout(0.2)(x)
x = Dense(512, activation='relu')(x)
x = Dense(K, activation='softmax')(x)

model = Model(i, x)
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 13, 13, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 6, 6, 64)          18496     
                                                                 
 conv2d_2 (Conv2D)           (None, 2, 2, 128)         73856     
                                                                 
 flatten (Flatten)           (None, 512)               0         
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 512)               262656

In [None]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
r = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [None]:
# Save the model to a temp directory
# Needs to be saved to a format called protocol buffer (a standardized way of (de)serialzing objects)
# If its saved this way, a model saved in 1 language could be loaded in used in another language
import tempfile

MODEL_DIR = tempfile.gettempdir()
version = 1
export_path = os.path.join(MODEL_DIR, str(version)) # folder: /tmp/1
print("export path:", export_path)

# Delete export path if it already exists
if os.path.isdir(export_path):
  print('\nAlready saved a model, cleaning up\n')
  !rm -r {export_path}

tf.saved_model.save(model, export_path)

print('\nSaved model:')
!ls -l {export_path}

# pb (file extension) stands for 'protocol buffer'

export path: /tmp/1
INFO:tensorflow:Assets written to: /tmp/1/assets


INFO:tensorflow:Assets written to: /tmp/1/assets



Saved model:
total 148
drwxr-xr-x 2 root root   4096 Jul 29 23:09 assets
-rw-r--r-- 1 root root 142737 Jul 29 23:09 saved_model.pb
drwxr-xr-x 2 root root   4096 Jul 29 23:09 variables


In [None]:
# Print out some info about our saved model
!saved_model_cli show --dir {export_path} --all


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['__saved_model_init_op']:
  The given SavedModel SignatureDef contains the following input(s):
  The given SavedModel SignatureDef contains the following output(s):
    outputs['__saved_model_init_op'] tensor_info:
        dtype: DT_INVALID
        shape: unknown_rank
        name: NoOp
  Method name is: 

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input_1'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 28, 28, 1)
        name: serving_default_input_1:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['dense_1'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 10)
        name: StatefulPartitionedCall:0
  Method name is: tensorflow/serving/predict

Concrete Functions:
  Function Name: '__call__'
    Option #1
      Callable with:
        Argument #1
          input_1: TensorSp

Notice above the input and output info, which is correct

In [None]:
# Since notebook runs on linux machine, the usual linux commands work
# Add TensorFlow Serving distribution URI as a package source (one time setup) (aptitude package manager)
# This is the same as you would do from your command line, but without the [arch=amd64], and no sudo
# You would instead do:
# echo "deb [arch=amd64] http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | sudo tee /etc/apt/sources.list.d/tensorflow-serving.list && \
# curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | sudo apt-key add -

!echo "deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | tee /etc/apt/sources.list.d/tensorflow-serving.list && \
curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -
# Install and update TensorFlow ModelServer
!apt update

In [None]:
!apt-get install tensorflow-model-server

In [None]:
os.environ["MODEL_DIR"] = MODEL_DIR

In [None]:
# Start up the server using the command tensorflow_model_server
# nohub tells the program to ignore "hang up" and other signals, so it continues running when we close window/browser
%%bash --bg
nohup tensorflow_model_server \
  --rest-api-port=8501 \ # send port to 8501
  --model_name=fashion_model \ # whats our model named?
  --model_base_path="${MODEL_DIR}" >server.log 2>&1 # outputs of program will be written to /tmp/1/server.log

Starting job # 0 in a separate thread.


In [None]:
!tail server.log

In [None]:
# Now that server is running, make some requests in python to this server
# In reality, client and server would be on different machine
labels = '''T-shirt/top
Trouser
Pullover
Dress
Coat
Sandal
Shirt
Sneaker
Bag
Ankle boot'''.split("\n")

In [None]:
# Plot a random sample from our test set
def show(idx, title):
  plt.figure()
  plt.imshow(x_test[idx].reshape(28,28), cmap='gray')
  plt.axis('off')
  plt.title('\n\n{}'.format(title), fontdict={'size': 16})

i = np.random.randint(0, len(x_test))
show(i, labels[y_test[i]])

In [None]:
# Format some data to pass to the server (how to use the API)
'''
{
  "signature_name": "serving_default",
  "instances": [an N x H x W x C list]
}
'''
import json
data = json.dumps({ # dumps = object -> string
    "signature_name": "serving_default",
    "instances": x_test[0:3].tolist() # convert numpy array to list since json cant represent numpy array
})

In [None]:
# Make HTTP request. Its a POST not a GET since we're sending input data for predictions
headers = {"content_type": "application/json"}
# FINISH THIS LINE WITH REQUESTED CODE
r = requests.post('http://localhost:8501/v1/models/fashion_model:predict', data=data, headers)
j = r.json()
# Printing the keys is not as overwhelming especially if many keys
print(j.keys())
print(j)
# The json has 1 key which is 'predictions', with value being an array of predictions
# Not so obvious what these predictions mean

In [None]:
# Convert predictions to numpy array to check shape
pred = np.array(j['predictions'])
print(pred.shape)
# This is the N x K output array from the model
# pred[n,k] is the probability that we believe the nth sample belongs to the kth class

In [None]:
# Get the predicted classes (by taking the argmax over the columns)
pred = pred.argmax(axis=1)

In [None]:
# Since they are numbers, lets convert them back to strings (labels)
pred = [labels[i] for i in pred]
print(pred)

In [None]:
# Get the true labels
actual = [labels[i] for i in y_test[:3]]
print(actual)

In [None]:
for i in range(0,3):
  show(i, f"True: {actual[i]}, Predicted: {pred[i]}")

In [None]:
# Another method of calling API: versioning
# Can have multiple models running simulataneously, e.g. during an A/B test
# FINISH THIS LINE WITH REQUESTED CODE
r = requests.post('http://localhost:8501/v1/models/fashion_model/versions/1:predict', data=data, headers)
headers = {"content_type": "application/json"}
j = r.json()
pred = np.array(j['predictions'])
pred = pred.argmax(axis=1)
pred = [labels[i] for i in pred]
for i in range(0,3):
  show(i, f"True: {actual[i]}, Predicted: {pred[i]}")

- If you trained a new model later in this script and saved it to /tmp/2 (as version 2), you would not need to restart, i.e. TensorFlow would know about it
- If you tried a request for a server that didnt exist, youd get 'error': 'Servable not found for request'
- Be default, TF uses the most up-to-date/newly released version of the model if you dont specify the version in the request
- DevOps people would be responsible for getting this onto an actual production environment like EC2 or GCP, using Docker, Kubernetes, Nginx, Unicorn, etc. SWE people (backend) would do the requesting (like in this code), and the ML people would create, test, and adjust the ML model (also like in this code)
- GCP has special tooling for Tensorflow serving