# Serve model deploy to vertex AI

In [1]:
import os
import shutil

# Lấy đường dẫn tới thư mục cache của Hugging Face
cache_dir = os.path.expanduser("~/.cache/huggingface/hub")

# Xóa toàn bộ cache mô hình đã tải
if os.path.exists(cache_dir):
    shutil.rmtree(cache_dir)
    print(f"Đã xóa toàn bộ cache trong: {cache_dir}")
else:
    print(f"Thư mục cache không tồn tại: {cache_dir}")


Đã xóa toàn bộ cache trong: C:\Users\datkt/.cache/huggingface/hub


**Setup environment**

In [None]:
%%writefile requirements.txt

uvicorn[standard]==0.20.0
gunicorn==23.0.0
fastapi[all]==0.88.0
pydantic==1.10.18

In [3]:
!python -m venv .venv
!.venv\Scripts\activate
!conda deactivate
!python -m pip install --upgrade pip
!pip install --no-cache-dir -r ./requirements.txtm

'.venv' is not recognized as an internal or external command,
operable program or batch file.


Collecting uvicorn==0.20.0 (from uvicorn[standard]==0.20.0->-r ./requirements.txt (line 1))

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pandantic 0.3.0 requires pydantic<3.0.0,>=2.0.0, but you have pydantic 1.10.18 which is incompatible.



  Downloading uvicorn-0.20.0-py3-none-any.whl.metadata (6.2 kB)
Collecting gunicorn==23.0.0 (from -r ./requirements.txt (line 2))
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting fastapi==0.88.0 (from fastapi[all]==0.88.0->-r ./requirements.txt (line 3))
  Downloading fastapi-0.88.0-py3-none-any.whl.metadata (24 kB)
Collecting pydantic (from -r ./requirements.txt (line 4))
  Downloading pydantic-1.10.18-cp311-cp311-win_amd64.whl.metadata (153 kB)
Collecting starlette==0.22.0 (from fastapi==0.88.0->fastapi[all]==0.88.0->-r ./requirements.txt (line 3))
  Downloading starlette-0.22.0-py3-none-any.whl.metadata (5.8 kB)
Collecting orjson>=3.2.1 (from fastapi[all]==0.88.0->-r ./requirements.txt (line 3))
  Downloading orjson-3.10.7-cp311-none-win_amd64.whl.metadata (51 kB)
Collecting ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 (from fastapi[all]==0.88.0->-r ./requirements.txt (line 3))
  Downloading ujson-5.10.0-cp311-cp311-win_amd64.whl.metadata 

**Build example model with FastAPI**

In [1]:
%%writefile main.py

import random
from typing import List, Optional

import uvicorn  # noqa: F401
from fastapi import FastAPI, Request
from pydantic import BaseModel

# Initialize FastAPI app
app = FastAPI(title="Sentiment Analysis API")


# Pydantic models for prediction results
class Prediction(BaseModel):
    sentiment: str
    confidence: Optional[float]


class Predictions(BaseModel):
    predictions: List[Prediction]


def get_prediction(instances):
    res = []
    for text in instances:
        if len(text) > 30:
            res.append(
                Prediction(
                    sentiment="Tích cực",
                    confidence=random.randrange(80, 100, step=2) / 10,
                )
            )
        elif len(text) > 10:
            res.append(
                Prediction(
                    sentiment="Tiêu cực",
                    confidence=random.randrange(80, 100, step=2) / 10,
                )
            )
        else:
            res.append(
                Prediction(
                    sentiment="Trung lập",
                    confidence=random.randrange(80, 100, step=2) / 10,
                )
            )
    return Predictions(predictions=res)


# Health check route
@app.get("/health", status_code=200)
async def health():
    return {"health": "ok"}


# Prediction route to handle batch requests
@app.post(
    "/predict",
    response_model=Predictions,
    response_model_exclude_unset=True,
)
async def predict(request: Request):
    # Extract the JSON body from the request
    body = await request.json()

    # Extract the instances (texts) from the request
    instances = [x["text"] for x in body["instances"]]

    output = get_prediction(instances)
    # Return the predictions
    return output


# Main function to run the FastAPI app
if __name__ == "__main__":
    uvicorn.run("main:app", host="0.0.0.0", port=8080)


Overwriting main.py


**Write dockerfile**

In [4]:
%%writefile Dockerfile

FROM tiangolo/uvicorn-gunicorn:python3.11-slim

WORKDIR /app

COPY main.py ./main.py
COPY requirements.txt ./requirements.txt

RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r ./requirements.txt

EXPOSE 8080
CMD ["python", "main.py"]

Overwriting Dockerfile


**Build docker image**

In [None]:
!docker build -t asia-southeast1-docker.pkg.dev/ext-pinetree-dw/dev-aiml-model/sentiment-fast-api .

In [None]:
!gcloud ai models upload \
  --container-ports=80 \
  --container-predict-route="/predict" \
  --container-health-route="/health" \
  --region=asia-southeast1 \
  --display-name=sentiment-fast-api \
  --container-image-uri=gcr.io/sascha-playground-doit/sentiment-fast-api

In [None]:
!gcloud ai endpoints create \
  --project=ext-pinetree-dw \
  --region=us-central1 \
  --display-name=sentiment-fast-api-test

In [5]:
from google.cloud import aiplatform

project = 'ext-pinetree-dw'
location = 'asia-southeast1'

aiplatform.init(project=project,
                location=location)

In [8]:
instances = [
      {
         "text":"DoiT is a great company."
      },
      {
         "text":"The beach was nice but overall the hotel was very bad."
      }
   ]


endpoint = aiplatform.Endpoint("projects/723874410918/locations/asia-southeast1/endpoints/4126673847229349888")

prediction = endpoint.predict(instances=instances)
print(prediction)

Prediction(predictions=[{'sentiment': 'Tiêu cực', 'confidence': 8.0}, {'sentiment': 'Tích cực', 'confidence': 8.6}], deployed_model_id='8408136941417005056', metadata=None, model_version_id='1', model_resource_name='projects/723874410918/locations/asia-southeast1/models/sentiment-fast-api-test', explanations=None)


In [24]:
%%writefile cloudbuild.yaml
steps:
# Download the model to embed it into the image
# - name: 'gcr.io/cloud-builders/gsutil'
#   args: ['cp', '-r', 'gs://dev-joyas-recommendation/models/sentiment', '.']
#   id: 'download-model'

# Build the container image
- name: 'gcr.io/cloud-builders/docker'
  args: ['build', '-t', 'asia-southeast1-docker.pkg.dev/joyas-vietnam/dev-aiml-model/sentiment-fast-api', '.']
  # waitFor: ['download-model']

# Push the container image to Artifact Registry
- name: 'gcr.io/cloud-builders/docker'
  args: ['push', 'asia-southeast1-docker.pkg.dev/joyas-vietnam/dev-aiml-model/sentiment-fast-api']

images:
- asia-southeast1-docker.pkg.dev/joyas-vietnam/dev-aiml-model/sentiment-fast-api

Overwriting cloudbuild.yaml


In [19]:
!gcloud artifacts repositories create dev-aiml-model \
  --repository-format=docker \
  --location=asia-southeast1 \
  --description="My Docker repository"

Create request issued for: [dev-aiml-model]
Waiting for operation [projects/joyas-vietnam/locations/asia-southeast1/operations/c10a00b3-2f8b-45bf-b923-45e42675f358] to complete...
.................done.
Created repository [dev-aiml-model].


In [25]:
!gcloud builds submit --config cloudbuild.yaml .

^C


In [26]:
!docker build -t asia-southeast1-docker.pkg.dev/joyas-vietnam/dev-aiml-model/sentiment-fast-api .

#0 building with "desktop-linux" instance using docker driver

#1 [internal] load build definition from Dockerfile
#1 transferring dockerfile: 278B 0.0s done
#1 DONE 0.0s

#2 [internal] load metadata for docker.io/tiangolo/uvicorn-gunicorn-fastapi:python3.8-slim
#2 ...

#3 [auth] tiangolo/uvicorn-gunicorn-fastapi:pull token for registry-1.docker.io
#3 DONE 0.0s

#2 [internal] load metadata for docker.io/tiangolo/uvicorn-gunicorn-fastapi:python3.8-slim
#2 DONE 2.2s

#4 [internal] load .dockerignore
#4 transferring context: 2B done
#4 DONE 0.0s

#5 [1/5] FROM docker.io/tiangolo/uvicorn-gunicorn-fastapi:python3.8-slim@sha256:cce370ade672f3bfcac80d0c80314fc6b6530d3c623dab384af12da76cd2db6b
#5 DONE 0.0s

#6 [internal] load build context
#6 transferring context: 574B done
#6 DONE 0.0s

#7 [2/5] COPY main.py ./main.py
#7 CACHED

#8 [3/5] COPY requirements.txt ./requirements.txt
#8 CACHED

#9 [4/5] COPY models ./models
#9 DONE 5.7s

#10 [5/5] RUN pip install --no-cache-dir -r ./requirements.tx