# Option A: Local Serverless Simulation (FastAPI + Local Model Registry)
- Deploy a model as a REST API
- Submit inference requests asynchronously
- Serverless abstraction = “model service” independent of training
- Analogous to Vertex AI from book that runs in cloud environment
  
First ensure you have a versioned model directory (e.g., model_registry/v_20260215_1530/) from the 07c notebook

In [5]:
# FastAPI Server (put the following in a new file called server.py in the 07_training folder)
###### MAKE SURE YOU REPLACE THE FILE PATH FOR MODEL_DIR #######

from fastapi import FastAPI, UploadFile, File
import tensorflow as tf
import numpy as np
from PIL import Image
import io
import os
import json

app = FastAPI()

# Load latest model
MODEL_DIR = "model_registry/v_20260215_185521"  # Replace with your saved model folder
model = tf.keras.models.load_model(MODEL_DIR)

# Load metadata
with open(os.path.join(MODEL_DIR, "metadata.json")) as f:
    metadata = json.load(f)
IMG_SIZE = metadata["img_size"]
CLASS_NAMES = metadata["class_names"]

def preprocess_image(file_bytes):
    img = Image.open(io.BytesIO(file_bytes)).convert("RGB")
    img = img.resize((IMG_SIZE, IMG_SIZE))
    img_array = np.array(img)/255.0
    return np.expand_dims(img_array, axis=0)

@app.post("/predict/")
async def predict(file: UploadFile = File(...)):
    bytes_data = await file.read()
    img_array = preprocess_image(bytes_data)
    preds = model.predict(img_array)
    pred_class = CLASS_NAMES[np.argmax(preds)]
    return {"prediction": pred_class}


In [None]:
# Run these commands in a new Anaconda PowerShell terminal to create a server with a REST API

conda activate cv
cd Documents\cv\07_training
uvicorn server:app --reload --port 8000


In [1]:
# Test Predictions (upload flower to server via the REST API, get result in JSON format)
# This can be executed directly from this notebook!

import requests

file_path = "test_flower.jpg"
with open(file_path, "rb") as f:
    response = requests.post("http://127.0.0.1:8000/predict/", files={"file": f})

print(response.json())


{'prediction': 'daisy'}


# Option B: WSL + Container Simulation (Advanced)
- Simulate cloud-like environment using Docker inside a WSL (Windows Subsystem for Linux) virtual machine of Ubuntu Linux
  - We package the same FastAPI app and run it as a Docker container on WSL/Ubuntu — mimicking how Vertex AI jobs run
  - We must first install WSL/Ubuntu, then install Docker in Ubuntu
- The detailed steps and commands used to set all of this up and utilize the files below are in the class slide deck!


In [None]:
# Create Dockerfile (no file extension!) with the following contents in your 07_training folder:

FROM python:3.10-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY server.py .
COPY model_registry/ ./model_registry/

EXPOSE 8000

CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"]


In [None]:
# Create requirements.txt in your 07_training folder (note the locked versions match what we currently use):

fastapi
uvicorn
tensorflow==2.9
tensorflow_datasets==4.9
protobuf==3.20
pillow
numpy==1.26
python-multipart


In [3]:
# Build and run the container using the following commands in your 07_training folder:

docker build -t local-serverless-ml .  # Don't forget the trailing period, which searches for the Dockerfile in the current (.) directory
docker run -d -p 8000:8000 --name serverlessml local-serverless-ml


In [4]:
# Client Requests (Same as Option A)

import requests

file_path = "test_flower.jpg"
with open(file_path, "rb") as f:
    response = requests.post("http://127.0.0.1:8000/predict/", files={"file": f})

print(response.json())


{'prediction': 'daisy'}
