In [1]:
!pip -q install mlflow transformers boto3

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.4/80.4 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m56.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m52.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m62.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m329.1/329.1 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import mlflow
import boto3

In [3]:
# mlflow set tracking
url = "https://victoria-communicable-sometimes.ngrok-free.dev"
mlflow.set_tracking_uri(url)
tracking_uri = mlflow.get_tracking_uri()
print(f"Current tracking uri: {tracking_uri}")

Current tracking uri: https://victoria-communicable-sometimes.ngrok-free.dev


In [4]:
mlflow.set_experiment("healthcarechatbot")

<Experiment: artifact_location='mlflow-artifacts:/1', creation_time=1760804990524, experiment_id='1', last_update_time=1760804990524, lifecycle_stage='active', name='healthcarechatbot', tags={}>

In [16]:
from mlflow.tracking import MlflowClient
import re

client = MlflowClient()
model_name = "health-llm"
versions = client.get_latest_versions(model_name)

for v in versions:
    path = v.source
    print("Model URI:", path)

    match = re.match(r"s3://([^/]+)/(.*)", path)
    if match:
        bucket = match.group(2)
        print("Bucket:", bucket)
    else:
        print("Invalid model URI format")

  versions = client.get_latest_versions(model_name)


Model URI: s3://mlflow-artifacts-monitor/models/health-llm/e16dc23fbffc46f1839c02dae7b38be6
Bucket: models/health-llm/e16dc23fbffc46f1839c02dae7b38be6


In [17]:
import os
import boto3
from tqdm import tqdm
from dotenv import load_dotenv

def load_model_from_s3(s3_prefix: str, local_dir: str = "downloaded_model"):
    """
    Download an entire model directory (e.g., from MLflow-registered S3 prefix).
    Example s3_prefix: "models/health-llm/b3f91d2b6f42464aab9b9ff07d22ad89"
    """

    load_dotenv()

    aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
    aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
    aws_region = os.getenv("AWS_DEFAULT_REGION", "ap-southeast-2")
    bucket_name = os.getenv("AWS_BUCKET_NAME", "mlflow-artifacts-monitor")

    if not all([aws_access_key, aws_secret_key, bucket_name]):
        raise ValueError("Missing AWS credentials or bucket name in .env file")

    s3 = boto3.client(
        "s3",
        aws_access_key_id=aws_access_key,
        aws_secret_access_key=aws_secret_key,
        region_name=aws_region
    )

    os.makedirs(local_dir, exist_ok=True)

    paginator = s3.get_paginator("list_objects_v2")
    total_files = 0

    # Đếm file trước (để tqdm chạy đẹp)
    for page in paginator.paginate(Bucket=bucket_name, Prefix=s3_prefix):
        for obj in page.get("Contents", []):
            total_files += 1

    with tqdm(total=total_files, desc=f"Downloading model from {s3_prefix}") as pbar:
        for page in paginator.paginate(Bucket=bucket_name, Prefix=s3_prefix):
            for obj in page.get("Contents", []):
                key = obj["Key"]
                local_path = os.path.join(local_dir, os.path.relpath(key, s3_prefix))
                os.makedirs(os.path.dirname(local_path), exist_ok=True)

                s3.download_file(bucket_name, key, local_path)
                pbar.update(1)

    print(f"Model downloaded successfully → {local_dir}")
    return local_dir


In [18]:
local_dir = load_model_from_s3(bucket)

Downloading model from models/health-llm/e16dc23fbffc46f1839c02dae7b38be6: 100%|██████████| 9/9 [00:18<00:00,  2.01s/it]

Model downloaded successfully → downloaded_model





In [24]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load tokenizer và model từ local
tokenizer = AutoTokenizer.from_pretrained(local_dir)
model = AutoModelForSeq2SeqLM.from_pretrained(local_dir)


In [28]:
messages = [
    {"role": "user", "content": "Tôi bị ho nhiều ngày rồi, nên uống thuốc gì?"}
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(
    **inputs,
    max_new_tokens=150,
    temperature=0.7,
    do_sample=True
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))


* trả ngắn gọn, dựa trên kiến thức y tế Việt Nam.  Cess  elegans Bạn là một trợ lý y tế thông minh, trả lời ngắn gọn, dựa trên kiến thức y tế Việt Nam.  Cess Cess[[]+ Bạn là một trợ lý y tế thông minh, trả lời ngắn gọn, xác, dựa trên kiến thức y tế Việt Nam.  Cess Cess Sénégal Bạn là một trợ lý y tế thông minh, trả lời ngắn gọn, xác, dựa trên kiến thức y tế Việt Nam.[]LeaksLeaks Video clip về Sénégal bị chảy máu mũi thì nên làm gì?[]LeaksLeaks Video Video clip về Sénégal bị chảy máu mũi thì nên làm gì?[


# Streaming

In [26]:
from transformers import TextIteratorStreamer
from threading import Thread

In [27]:
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=150,
    do_sample=True,
    temperature=0.7,
    streamer=streamer
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

print("Assistant:", end=" ", flush=True)
for new_text in streamer:
    print(new_text, end="", flush=True)
thread.join()

Assistant: * trợ lý y tế thông minh, trả lời ngắn gọn, dựa trên kiến thức y tế Việt Nam. Trang  Mousebrievevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevevev