# Fetch model from Hugging face hub, tag and store to mlflow UC registry

In [0]:
%pip install --upgrade einops mlflow-skinny timm torch transformers


%restart_python

In [0]:
%pip install --upgrade accelerate mlflow-skinny torch torchvision transformers


%restart_python

In [0]:
# %pip install --upgrade accelerate mlflow-skinny optree>=0.13.0 torch torchvision transformers


# %restart_python

In [0]:
PIP_REQUIREMENTS = (
    "openai vllm>=0.7.2 httpx==0.27.2 "
    # "transformers==4.46.3 accelerate==1.0.0 "
    # "git+https://github.com/huggingface/transformers accelerate "
    "git+https://github.com/huggingface/transformers.git@336dc69d63d56f232a183a3e7f52790429b871ef "
    "mlflow==2.19.0 "
    "git+https://github.com/stikkireddy/mlflow-extensions.git@v0.17.0 "
    "qwen-vl-utils[decord] "
    "torch "
    "torchvision "
    "optree>=0.13.0 "
)
%pip install --upgrade accelerate {PIP_REQUIREMENTS}


%restart_python

In [0]:
import mlflow
from mlflow import MlflowClient


mlflow.set_registry_uri("databricks-uc")
client  = MlflowClient()

In [0]:
CATALOG = "amine_elhelou" # Change This/Point to an existing catalog
SCHEMA = "ray_gtm_examples" # Point to an existing schema
VOLUME = "fashion-images"

Mini-InternVL

In [0]:
from mlflow.models.signature import ModelSignature
from mlflow.types.schema import Schema, ColSpec, TensorSpec
import numpy as np


input_video_example_path = f"/Volumes/{CATALOG}/{SCHEMA}/{VOLUME}/data/Apparel/Boys/Images/images_with_product_ids/10054.jpg"

MODEL_NAME = "qwen2_5_vl-7b"

# Define the input schema
input_schema = Schema([
    ColSpec("string", "text_input"),  # User query
    ColSpec("string", "video_path"),  # Path to the input video file (URI)
    ColSpec("integer", "max_pixels"),  # Max pixel resolution for processing
    ColSpec("float", "fps")  # Frames per second for processing
])

# Define the output schema (generated text response)
output_schema = Schema([
    ColSpec("string", "generated_text")
])

# Create the model signature
signature = ModelSignature(inputs=input_schema, outputs=output_schema)

# Log the pipeline
with mlflow.start_run(run_name="qwen-video-log-pipeline"):
    model_info = mlflow.transformers.log_model(
        transformers_model=qwen_pipe,
        artifact_path="qwen_pipeline",
        # input_example=input_video_example_path,
        signature=signature,
        registered_model_name=f"{CATALOG}.{SCHEMA}.{QWEN_MODEL_NAME}",
    )

## Qwen2.5 VL 7B Model

In [0]:
model_id = "Qwen/Qwen2.5-VL-7B-Instruct"

In [0]:
import torch


torch.cuda.empty_cache()

In [0]:
from transformers import AutoProcessor, pipeline



In [0]:
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor, pipeline
# from qwen_vl_utils import process_vision_info


device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float32 #.float16 for model size reduction
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    model_id,
    torch_dtype="auto",
    device_map="auto",
    low_cpu_mem_usage=True,
    use_safetensors=True
)

model.to(device)

# TO-DO: need to create a custom pipeline https://huggingface.co/docs/transformers/en/add_new_pipeline

processor = AutoProcessor.from_pretrained(model_id)
# model_kwargs = {
# "max_model_len":10000,
# "max_num_seqs":5,
# "min_pixels" : 28 * 28,
# "max_pixels" : 1280 * 28 * 28,
# "fps" : 1
# }

qwen_pipe = pipeline(
    "object-detection",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.image_processor,
    torch_dtype=torch_dtype,
    # device=device,
    # model_kwargs=model_kwargs
)

In [0]:
input_video_example_path = f"/Volumes/{CATALOG}/{SCHEMA}/{VOLUME}/download.mp4"

QWEN_MODEL_NAME = "qwen2_5_vl-7b"

from mlflow.models.signature import ModelSignature
from mlflow.types.schema import Schema, ColSpec, TensorSpec
import numpy as np

# Define the input schema
input_schema = Schema([
    ColSpec("string", "text_input"),  # User query
    ColSpec("string", "video_path"),  # Path to the input video file (URI)
    ColSpec("integer", "max_pixels"),  # Max pixel resolution for processing
    ColSpec("float", "fps")  # Frames per second for processing
])

# Define the output schema (generated text response)
output_schema = Schema([
    ColSpec("string", "generated_text")
])

# Create the model signature
signature = ModelSignature(inputs=input_schema, outputs=output_schema)

# Log the pipeline
with mlflow.start_run(run_name="qwen-video-log-pipeline"):
    model_info = mlflow.transformers.log_model(
        transformers_model=qwen_pipe,
        artifact_path="qwen_pipeline",
        # input_example=input_video_example_path,
        signature=signature,
        registered_model_name=f"{CATALOG}.{SCHEMA}.{QWEN_MODEL_NAME}",
    )

In [0]:
client.set_registered_model_alias(
  name=f"{CATALOG}.{SCHEMA}.{QWEN_MODEL_NAME}",
  version=model_info.registered_model_version,
  alias="production",
)