In [1]:
import os
import shutil

# Lấy đường dẫn tới thư mục cache của Hugging Face
cache_dir = os.path.expanduser("~/.cache/huggingface/hub")

# Xóa toàn bộ cache mô hình đã tải
if os.path.exists(cache_dir):
    shutil.rmtree(cache_dir)
    print(f"Đã xóa toàn bộ cache trong: {cache_dir}")
else:
    print(f"Thư mục cache không tồn tại: {cache_dir}")


Đã xóa toàn bộ cache trong: C:\Users\datkt/.cache/huggingface/hub


In [2]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer


checkpoint = "mr4/phobert-base-vi-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(
    checkpoint,
    clean_up_tokenization_spaces=True,
)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

storage_path = "models/"
tokenizer.save_pretrained(storage_path + "tokenizer/")
model.save_pretrained(storage_path + "model/")

  from .autonotebook import tqdm as notebook_tqdm
Downloading: 100%|██████████| 308/308 [00:00<00:00, 308kB/s]
Downloading: 100%|██████████| 874k/874k [00:00<00:00, 3.57MB/s]
Downloading: 100%|██████████| 1.08M/1.08M [00:00<00:00, 2.65MB/s]
Downloading: 100%|██████████| 22.0/22.0 [00:00<?, ?B/s]
Downloading: 100%|██████████| 167/167 [00:00<00:00, 292kB/s]
Downloading: 100%|██████████| 993/993 [00:00<?, ?B/s] 
Downloading: 100%|██████████| 515M/515M [01:36<00:00, 5.60MB/s]    


In [6]:
# test prediction
import numpy as np

from typing import List, Optional
from pydantic import BaseModel
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Pydantic models for prediction results
class Prediction(BaseModel):
    sentiment: str
    confidence: Optional[float]


class Predictions(BaseModel):
    predictions: List[Prediction]
    
# # Load tokenizer và model
storage_path = "models/"
tokenizer = AutoTokenizer.from_pretrained(storage_path + "tokenizer/")
model = AutoModelForSequenceClassification.from_pretrained(
    storage_path + "model/"
)
    
def predict(body):
    # Extract the instances (texts) from the request
    instances = [x["text"] for x in body["instances"]]

    # Tokenize văn bản cho mô hình
    tf_batch = tokenizer(
        instances,
        # max_length=128,
        padding=True,
        truncation=True,
        return_tensors="pt",  # Chuyển thành tensor Pytorch
    )

    # Lấy kết quả dự đoán từ mô hình
    with torch.no_grad():
        tf_outputs = model(**tf_batch)

    # Áp dụng hàm softmax để lấy xác suất (điểm tự tin)
    softmax = torch.nn.functional.softmax(tf_outputs.logits, dim=-1).numpy()

    # Tìm chỉ số của xác suất cao nhất (dự đoán cảm xúc)
    indices = np.argmax(softmax, axis=-1)

    # Lấy giá trị confidence cao nhất cho mỗi dự đoán
    confidences = np.max(softmax, axis=-1)

    # Prepare the output
    outputs = []
    for index, confidence in zip(indices, confidences):
        sentiment = model.config.id2label[index]
        outputs.append(
            Prediction(sentiment=sentiment, confidence=float(confidence))
        )

    # Return the predictions
    return Predictions(predictions=outputs)

body = {
    "instances":[
        {"text": "Trời hôm nay đẹp quá!"},
        {"text": "món ăn này không ngon"}
    ]
}

predict(body)

Predictions(predictions=[Prediction(sentiment='Tích cực', confidence=0.9900698065757751), Prediction(sentiment='Tiêu cực', confidence=0.9821959733963013)])

In [None]:
%% writefile main.py
import os
from typing import List, Optional

import numpy as np
import torch
import uvicorn  # noqa: F401
from fastapi import FastAPI, Request
from pydantic import BaseModel
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# # Load tokenizer và model
storage_path = "models/"
tokenizer = AutoTokenizer.from_pretrained(storage_path + "tokenizer/")
model = AutoModelForSequenceClassification.from_pretrained(
    storage_path + "model/"
)

# Initialize FastAPI app
app = FastAPI(title="Sentiment Analysis API")

# Define health and prediction routes based on environment variables or defaults
AIP_HEALTH_ROUTE = os.environ.get("AIP_HEALTH_ROUTE", "/health")
AIP_PREDICT_ROUTE = os.environ.get("AIP_PREDICT_ROUTE", "/predict")


# Pydantic models for prediction results
class Prediction(BaseModel):
    sentiment: str
    confidence: Optional[float]


class Predictions(BaseModel):
    predictions: List[Prediction]


# Health check route
@app.get(AIP_HEALTH_ROUTE, status_code=200)
async def health():
    return {"health": "ok"}


# Prediction route to handle batch requests
@app.post(
    AIP_PREDICT_ROUTE,
    response_model=Predictions,
    response_model_exclude_unset=True,
)
async def predict(request: Request):
    # Extract the JSON body from the request
    body = await request.json()

    # Extract the instances (texts) from the request
    instances = [x["text"] for x in body["instances"]]

    # Tokenize văn bản cho mô hình
    tf_batch = tokenizer(
        instances,
        # max_length=128,
        padding=True,
        truncation=True,
        return_tensors="pt",  # Chuyển thành tensor Pytorch
    )

    # Lấy kết quả dự đoán từ mô hình
    with torch.no_grad():
        tf_outputs = model(**tf_batch)

    # Áp dụng hàm softmax để lấy xác suất (điểm tự tin)
    softmax = torch.nn.functional.softmax(tf_outputs.logits, dim=-1).numpy()

    # Tìm chỉ số của xác suất cao nhất (dự đoán cảm xúc)
    indices = np.argmax(softmax, axis=-1)

    # Lấy giá trị confidence cao nhất cho mỗi dự đoán
    confidences = np.max(softmax, axis=-1)

    # Prepare the output
    outputs = []
    for index, confidence in zip(indices, confidences):
        sentiment = model.config.id2label[index]
        outputs.append(
            Prediction(sentiment=sentiment, confidence=float(confidence))
        )

    # Return the predictions
    return Predictions(predictions=outputs)


# Main function to run the FastAPI app
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8080)

In [9]:
%%writefile Dockerfile
FROM tiangolo/uvicorn-gunicorn-fastapi:python3.8-slim

WORKDIR /app

COPY main.py ./main.py
COPY requirements.txt ./requirements.txt
COPY models ./models

RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r ./requirements.txt

EXPOSE 8080
CMD ["python", "main.py"]

Overwriting Dockerfile


In [None]:
LOCATION = "asia-southeast1"
PROJECT = "joyas-vietnam"
REPOSITORY_NAME = "dev-aiml-model"

In [24]:
%%writefile cloudbuild.yaml
steps:
# Download the model to embed it into the image
# - name: 'gcr.io/cloud-builders/gsutil'
#   args: ['cp', '-r', 'gs://dev-joyas-recommendation/models/sentiment', '.']
#   id: 'download-model'

# Build the container image
- name: 'gcr.io/cloud-builders/docker'
  args: ['build', '-t', 'asia-southeast1-docker.pkg.dev/joyas-vietnam/dev-aiml-model/sentiment-fast-api', '.']
  # waitFor: ['download-model']

# Push the container image to Artifact Registry
- name: 'gcr.io/cloud-builders/docker'
  args: ['push', 'asia-southeast1-docker.pkg.dev/joyas-vietnam/dev-aiml-model/sentiment-fast-api']

images:
- asia-southeast1-docker.pkg.dev/joyas-vietnam/dev-aiml-model/sentiment-fast-api

Overwriting cloudbuild.yaml


In [20]:
!gcloud auth login

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=32555940559.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8085%2F&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fappengine.admin+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcompute+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Faccounts.reauth&state=1T3Qk9s5eHVpuz53ibnkXfQ9kFSCGP&access_type=offline&code_challenge=_KVBAaKJGkilFzXi78d8TNjFFEIBKf8G2EPS9xr6l3o&code_challenge_method=S256


You are now logged in as [datkt.joyas@gmail.com].
Your current project is [ext-pinetree-dw].  You can change this setting by running:
  $ gcloud config set project PROJECT_ID


In [21]:
!gcloud config set project joyas-vietnam


To update your Application Default Credentials quota project, use the `gcloud auth application-default set-quota-project` command.
Updated property [core/project].


In [16]:
!gcloud services enable artifactregistry.googleapis.com

In [19]:
!gcloud artifacts repositories create dev-aiml-model \
  --repository-format=docker \
  --location=asia-southeast1 \
  --description="My Docker repository"

Create request issued for: [dev-aiml-model]
Waiting for operation [projects/joyas-vietnam/locations/asia-southeast1/operations/c10a00b3-2f8b-45bf-b923-45e42675f358] to complete...
.................done.
Created repository [dev-aiml-model].


In [25]:
!gcloud builds submit --config cloudbuild.yaml .

^C


In [26]:
!docker build -t asia-southeast1-docker.pkg.dev/joyas-vietnam/dev-aiml-model/sentiment-fast-api .

#0 building with "desktop-linux" instance using docker driver

#1 [internal] load build definition from Dockerfile
#1 transferring dockerfile: 278B 0.0s done
#1 DONE 0.0s

#2 [internal] load metadata for docker.io/tiangolo/uvicorn-gunicorn-fastapi:python3.8-slim
#2 ...

#3 [auth] tiangolo/uvicorn-gunicorn-fastapi:pull token for registry-1.docker.io
#3 DONE 0.0s

#2 [internal] load metadata for docker.io/tiangolo/uvicorn-gunicorn-fastapi:python3.8-slim
#2 DONE 2.2s

#4 [internal] load .dockerignore
#4 transferring context: 2B done
#4 DONE 0.0s

#5 [1/5] FROM docker.io/tiangolo/uvicorn-gunicorn-fastapi:python3.8-slim@sha256:cce370ade672f3bfcac80d0c80314fc6b6530d3c623dab384af12da76cd2db6b
#5 DONE 0.0s

#6 [internal] load build context
#6 transferring context: 574B done
#6 DONE 0.0s

#7 [2/5] COPY main.py ./main.py
#7 CACHED

#8 [3/5] COPY requirements.txt ./requirements.txt
#8 CACHED

#9 [4/5] COPY models ./models
#9 DONE 5.7s

#10 [5/5] RUN pip install --no-cache-dir -r ./requirements.tx