In [1]:
import mlflow
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from mlflow.models import infer_signature

# Define Model Name and MLflow Tracking URI
HF_MODEL_NAME = "HuggingFaceTB/SmolLM2-360M-Instruct"  # Model from Hugging Face
MLFLOW_TRACKING_URI = "http://localhost:5000"  # Adjust if MLflow is running elsewhere

# Set MLflow Tracking Server
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

# Load Model and Tokenizer from Hugging Face
model = AutoModelForCausalLM.from_pretrained(HF_MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_NAME)

# Save Model Locally Before Logging to MLflow
MODEL_DIR = "smollm2_model"
model.save_pretrained(MODEL_DIR)
tokenizer.save_pretrained(MODEL_DIR)

('smollm2_model/tokenizer_config.json',
 'smollm2_model/special_tokens_map.json',
 'smollm2_model/vocab.json',
 'smollm2_model/merges.txt',
 'smollm2_model/added_tokens.json',
 'smollm2_model/tokenizer.json')

In [5]:
import os

os.environ["MLFLOW_HTTP_REQUEST_MAX_SIZE"] = str(256 * 1024 * 1024)  # 256MB
os.environ["MLFLOW_UPLOAD_BUFFER_SIZE"] = str(128 * 1024 * 1024)  # 128MB
os.environ["MLFLOW_MAX_ARTIFACT_SIZE"] = str(1 * 1024 * 1024 * 1024)  # 1GB
os.environ["MLFLOW_HTTP_REQUEST_TIMEOUT"] = "600"  # 10 minutes
os.environ["MLFLOW_TCP_KEEPALIVE"] = "1"

In [7]:
# Define Experiment Name (Matching Hugging Face Model Name)
EXPERIMENT_NAME = "SmolLM2-360M-Instruct"
RUN_NAME = "HuggingFaceTB-SmolLM2"

# Create (or Get) Experiment in MLflow
mlflow.set_experiment(EXPERIMENT_NAME)


with mlflow.start_run(run_name=RUN_NAME) as run:
    # Log Model Directory as Artifacts
    mlflow.log_artifacts(MODEL_DIR, artifact_path="smollm2_artifacts")

    print(f"✅ Model '{HF_MODEL_NAME}' successfully saved and logged as artifacts in MLflow!")
    print(f"🧪 Experiment: {EXPERIMENT_NAME}")
    print(f"📌 Run Name: {RUN_NAME}")
    print(f"🔗 Run ID: {run.info.run_id}")

2025/01/22 00:40:46 INFO mlflow.tracking.fluent: Experiment with name 'SmolLM2-360M-Instruct' does not exist. Creating a new experiment.


✅ Model 'HuggingFaceTB/SmolLM2-360M-Instruct' successfully saved and logged as artifacts in MLflow!
🧪 Experiment: SmolLM2-360M-Instruct
📌 Run Name: HuggingFaceTB-SmolLM2
🔗 Run ID: cd70efaaea824cf280ad266a88475579
🏃 View run HuggingFaceTB-SmolLM2 at: http://localhost:5000/#/experiments/1/runs/cd70efaaea824cf280ad266a88475579
🧪 View experiment at: http://localhost:5000/#/experiments/1
