## Deployment

In [1]:
## Setup
# Packages you will need in the Deployment Stage

import lab_utils
import tensorflow as tf
import pandas as pd
import os
import shutil
import requests
import json
import sqlite3
import numpy as np
import matplotlib.pyplot as plt

Install tensorflow serving Model server in your system by following instructions on the below link. <br>
[Tensorflow Serving installation](https://www.tensorflow.org/tfx/serving/setup)

In [2]:
# # Uncomment the lines below if you ran this section before and want to DELETE all models in the serving directory
# SERVING_DIR = f'{os.getcwd()}/serving'
# os.environ["SERVING_DIR"] = SERVING_DIR
# os.system('find $SERVING_DIR -maxdepth 1 -mindepth 1 -type d -exec rm -rf {} \;')

In [3]:
## Setting the environment variable for serving directory
SERVING_DIR = f'{os.getcwd()}/serving'
os.environ['SERVING_DIR'] = SERVING_DIR

print(f'SERVING_DIR: {SERVING_DIR}')
print(f'os.environ["SERVING_DIR"]: {os.environ["SERVING_DIR"]}')

SERVING_DIR: /home/jovyan/work/serving
os.environ["SERVING_DIR"]: /home/jovyan/work/serving


In [4]:
#Now copy the model in store2 into the serving directory under a folder named 1.
os.makedirs(f'{SERVING_DIR}/1', exist_ok=True)
shutil.copytree('./store2/model/', f'{SERVING_DIR}/1', dirs_exist_ok=True)

'/home/jovyan/work/serving/1'

In [None]:
## NOTE: Copy and uncomment this on a new cell.

# command = (
#     'docker run -p 8501:8501 --mount type=bind,source="${SERVING_DIR}",target=/models/newsapp_model '+
#     '-e MODEL_NAME=newsapp_model --name=tensorflow-serving -t tensorflow/serving &'
# )

# os.system(command)

In [None]:
%%bash --bg 
nohup tensorflow_model_server \
  --rest_api_port=8501 \
  --model_name=newsapp_model \
  --model_base_path="${SERVING_DIR}" > ./serving/server.log 2>&1

In [None]:
command = (
    'curl -d \'{"instances": [[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]]}\' \\' +
    '-X POST http://localhost:8501/v1/models/newsapp_model:predict'
)

os.system(command)

You can also send requests programmatically using the code snippet below. This example demonstrates how to load the title preprocessor to transform raw strings into a format the model can process. While this approach will work, there might be a potential issue. Can you identify the problem?

In [None]:
# Used by the team on their prototype
MAX_LENGTH = 20
VOCAB_SIZE = 10000

# Instantiate a layer for text preprocessing
title_preprocessor = tf.keras.layers.TextVectorization(max_tokens=VOCAB_SIZE, output_sequence_length=MAX_LENGTH)

# Load the vocabulary file
title_preprocessor.load_assets('./E2/vocab')

# Sample input
sample_input = 'Sample title'

# Preprocess the string
sample_input_ds = title_preprocessor(sample_input)

# Add a batch dimension
sample_input_ds = tf.expand_dims(sample_input_ds, axis=0)

# Compose the data
data = json.dumps({"instances": sample_input_ds.numpy().tolist()})

# Define the headers
headers = {"content-type": "application/json"}

# Send the request
json_response = requests.post('http://localhost:8501/v1/models/newsapp_model:predict', data=data, headers=headers)

# Get the predictions
predictions = json.loads(json_response.text)['predictions']

print(predictions)

When serving models, it's crucial to ensure that data transformations are consistent with those used during training. The code above risks errors in loading the vocabulary, especially if you’ve edited it for multiple experiments. For example, you might accidentally load the wrong vocabulary file (e.g., from Experiment 3 instead of Experiment 2), leading to unreliable predictions because the titles are being preprocessed differently.
<br><br>
To prevent this, frameworks and tools often provide mechanisms to link preprocessing steps directly to the model. In TensorFlow, you can attach the title preprocessor to the model's input. This ensures the correct vocabulary is always associated with the model, allowing you to pass strings directly without worrying about mismatches. You'll implement this approach below.
<br>
Note that this is usually done after you've trained the model and are ready to deploy. If you attach it before that, it will likely slow down the training especially if you're using GPUs.

In [None]:
# working folder for the experiment
BASE_DIR = './store2'

# get the subdirectories that contain the experiment files
data_dir, model_dir, vocab_dir = lab_utils.set_experiment_dirs(BASE_DIR)

# Load the model
model = tf.keras.models.load_model(model_dir)

# Instantiate a layer for text preprocessing
title_preprocessor = tf.keras.layers.TextVectorization(max_tokens=VOCAB_SIZE, output_sequence_length=MAX_LENGTH)

# Load the vocabulary file
title_preprocessor.load_assets(vocab_dir)

# Attach the preprocessing layer to the trained model
model_with_preprocessor = tf.keras.Sequential([
    title_preprocessor,
    model
])

# String input
sample_input = "Sample Title"

# Feed the string input directly to the model
model_with_preprocessor.predict([sample_input])

In [None]:
'''
can now deploy this version of the model as well. Aside from model.save(), you can also use model.export() 
which saves a lightweight version of the model dedicated for serving. 
Run the cell below to save this in the serving directory under the 2 folder.
'''
model_with_preprocessor.export(f'{SERVING_DIR}/2')

In [None]:
##Tensorflow Serving will detect this latest model and you can see below that you can just send string inputs to the model. 
##If you get an error, please wait one minute before running the cell again.

data = json.dumps({"instances": ["sample title"]})

headers = {"content-type": "application/json"}
json_response = requests.post('http://localhost:8501/v1/models/newsapp_model:predict', data=data, headers=headers)
predictions = json.loads(json_response.text)['predictions']

print(predictions)

In [None]:
# # NOTE: Uncomment and run this cell if you're running on your own device and want to use Docker instead.
# !docker stop tensorflow-serving

In [None]:
## Kill model server
!kill $(ps aux | grep 'tensorflow_model_server' | awk '{print $2}')