In [1]:
import os
import json
import random
import requests
from openai import OpenAI
from nemo_microservices import NeMoMicroservices

In [2]:
from config import *

# Initialize NeMo Microservices SDK client
nemo_client = NeMoMicroservices(
    base_url=NEMO_URL,
    inference_base_url=NIM_URL,
)

In [3]:
print(f"Data Store endpoint: {NDS_URL}")
print(f"Entity Store, Customizer, Evaluator endpoint: {NEMO_URL}")
print(f"NIM endpoint: {NIM_URL}")
print(f"Namespace: {NMS_NAMESPACE}")
print(f"Base Model for Customization: {BASE_MODEL}@{BASE_MODEL_VERSION}")

Data Store endpoint: http://data-store.test
Entity Store, Customizer, Evaluator endpoint: http://nemo.test
NIM endpoint: http://nim.test
Namespace: xlam-tutorial-ns
Base Model for Customization: meta/llama-3.2-1b-instruct@v1.0.0+A100


In [4]:
# Path where data preparation notebook saved finetuning and evaluation data
DATA_ROOT = os.path.join(os.getcwd(), "data")
# CUSTOMIZATION_DATA_ROOT = os.path.join(DATA_ROOT, "customization")
# VALIDATION_DATA_ROOT = os.path.join(DATA_ROOT, "validation")
# EVALUATION_DATA_ROOT = os.path.join(DATA_ROOT, "evaluation")

# Sanity checks
train_fp = f"{DATA_ROOT}/train.jsonl"
assert os.path.exists(train_fp), f"The training data at '{train_fp}' does not exist. Please ensure that the data was prepared successfully."

val_fp = f"{DATA_ROOT}/val.jsonl"
assert os.path.exists(val_fp), f"The validation data at '{val_fp}' does not exist. Please ensure that the data was prepared successfully."

test_fp = f"{DATA_ROOT}/test.jsonl"
assert os.path.exists(test_fp), f"The test data at '{test_fp}' does not exist. Please ensure that the data was prepared successfully."

In [5]:
def create_namespaces(nemo_client, ds_host, namespace):
    # Create namespace in Entity Store
    try:
        namespace_obj = nemo_client.namespaces.create(id=namespace)
        print(f"Created namespace in Entity Store: {namespace_obj.id}")
    except Exception as e:
        # Handle if namespace already exists
        if "409" in str(e) or "422" in str(e):
            print(f"Namespace {namespace} already exists in Entity Store")
        else:
            raise e

    # Create namespace in Data Store (still using requests as SDK doesn't cover Data Store)
    nds_url = f"{ds_host}/v1/datastore/namespaces"
    resp = requests.post(nds_url, data={"namespace": namespace})
    assert resp.status_code in (200, 201, 409, 422), \
        f"Unexpected response from Data Store during namespace creation: {resp.status_code}"
    print(f"Data Store namespace creation response: {resp}")

In [6]:
create_namespaces(nemo_client=nemo_client, ds_host=NDS_URL, namespace=NMS_NAMESPACE)

Created namespace in Entity Store: xlam-tutorial-ns
Data Store namespace creation response: <Response [201]>


In [7]:
 # Verify Namespace in Data Store (using requests as SDK doesn't cover Data Store)
response = requests.get(f"{NDS_URL}/v1/datastore/namespaces/{NMS_NAMESPACE}")
print(f"Data Store - Status Code: {response.status_code}\nResponse JSON: {response.json()}")

# Verify Namespace in Entity Store
namespace_obj = nemo_client.namespaces.retrieve(namespace_id=NMS_NAMESPACE)
print(f"\nEntity Store - Namespace: {namespace_obj.id}")
print(f"Created at: {namespace_obj.created_at}")
print(f"Description: {namespace_obj.description}")
print(f"Project: {namespace_obj.project}")

Data Store - Status Code: 201
Response JSON: {'namespace': 'xlam-tutorial-ns', 'created_at': '2025-08-11T18:37:54Z', 'updated_at': '2025-08-11T18:37:54Z'}

Entity Store - Namespace: xlam-tutorial-ns
Created at: 2025-08-11 18:37:54.260547
Description: None
Project: None


In [8]:
repo_id = f"{NMS_NAMESPACE}/{DATASET_NAME}" 

In [9]:
from huggingface_hub import HfApi

hf_api = HfApi(endpoint=f"{NDS_URL}/v1/hf", token="")

# Create repo
hf_api.create_repo(
    repo_id=repo_id,
    repo_type='dataset',
)

  from .autonotebook import tqdm as notebook_tqdm


RepoUrl('datasets/xlam-tutorial-ns/xlam-ft-dataset', endpoint='http://data-store.test/v1/hf', repo_type='dataset', repo_id='xlam-tutorial-ns/xlam-ft-dataset')

In [10]:
hf_api.upload_file(path_or_fileobj=train_fp,
    path_in_repo="training/train.jsonl",
    repo_id=repo_id,
    repo_type='dataset',
)

hf_api.upload_file(path_or_fileobj=val_fp,
    path_in_repo="validation/val.jsonl",
    repo_id=repo_id,
    repo_type='dataset',
)

hf_api.upload_file(path_or_fileobj=test_fp,
    path_in_repo="testing/test.jsonl",
    repo_id=repo_id,
    repo_type='dataset',
)

train.jsonl:   0%|          | 0.00/61.8M [00:00<?, ?B/s]

train.jsonl: 100%|██████████| 61.8M/61.8M [00:00<00:00, 422MB/s]
val.jsonl: 100%|██████████| 15.4M/15.4M [00:00<00:00, 363MB/s]
test.jsonl: 100%|██████████| 19.3M/19.3M [00:00<00:00, 363MB/s]


CommitInfo(commit_url='', commit_message='Upload testing/test.jsonl with huggingface_hub', commit_description='', oid='fc6742ef802be72e0ab014568addf6bfada0eec7', pr_url=None, repo_url=RepoUrl('', endpoint='https://huggingface.co', repo_type='model', repo_id=''), pr_revision=None, pr_num=None)

In [11]:
 # Create dataset
dataset = nemo_client.datasets.create(
    name=DATASET_NAME,
    namespace=NMS_NAMESPACE,
    description="Tool calling xLAM dataset in OpenAI ChatCompletions format",
    files_url=f"hf://datasets/{NMS_NAMESPACE}/{DATASET_NAME}",
    project="tool_calling",
)
print(f"Created dataset: {dataset.namespace}/{dataset.name}")
dataset

Created dataset: xlam-tutorial-ns/xlam-ft-dataset


Dataset(files_url='hf://datasets/xlam-tutorial-ns/xlam-ft-dataset', id='dataset-3nVot8VEHmjivMRoC9hg6Q', created_at=datetime.datetime(2025, 8, 11, 18, 38, 12, 94319), custom_fields={}, description='Tool calling xLAM dataset in OpenAI ChatCompletions format', format=None, hf_endpoint=None, limit=None, name='xlam-ft-dataset', namespace='xlam-tutorial-ns', project='tool_calling', split=None, updated_at=datetime.datetime(2025, 8, 11, 18, 38, 12, 94321))

In [12]:
# Sanity check to validate dataset
dataset_obj = nemo_client.datasets.retrieve(namespace=NMS_NAMESPACE, dataset_name=DATASET_NAME)

print("Files URL:", dataset_obj.files_url)
assert dataset_obj.files_url == f"hf://datasets/{repo_id}"

Files URL: hf://datasets/xlam-tutorial-ns/xlam-ft-dataset


In [4]:
# Create customization job
# If WANDB_API_KEY is set, we send it in the request header, which will report the training metrics to Weights & Biases (WandB).
if WANDB_API_KEY:
    client_with_wandb = nemo_client.with_options(default_headers={"wandb-api-key": WANDB_API_KEY})
else:
    client_with_wandb = nemo_client

customization = client_with_wandb.customization.jobs.create(
    name="llama-3.2-1b-xlam-ft",
    output_model=CUSTOM_MODEL,
    config=f"{BASE_MODEL}@{BASE_MODEL_VERSION}",
    dataset={"name": DATASET_NAME, "namespace": NMS_NAMESPACE},
    hyperparameters={
        "training_type": "sft",
        "finetuning_type": "lora",
        "epochs": 2,
        "batch_size": 16,
        "learning_rate": 0.0001,
        "lora": {
            "adapter_dim": 32,
            "adapter_dropout": 0.1
        }
    }
)
print(f"Created customization job: {customization.id}")
customization

Created customization job: cust-AbFtZ5ABe9SoWa7wAxr1Re




In [6]:
# To track status
JOB_ID = customization.id

customization = nemo_client.customization.jobs.retrieve(JOB_ID)

# This will be the name of the model that will be used to send inference queries to
CUSTOMIZED_MODEL = customization.output_model

In [21]:
 # Get job status
job_status = nemo_client.customization.jobs.status(job_id=JOB_ID)

print("Percentage done:", job_status.percentage_done)
print("Job Status:", json.dumps(job_status.model_dump(), indent=2, default=str))

Percentage done: 0.0
Job Status: {
  "created_at": "2025-08-11 18:05:51.648719",
  "status": "running",
  "updated_at": "2025-08-11 18:05:51.648719",
  "best_epoch": null,
  "elapsed_time": 0.0,
  "epochs_completed": 0,
  "metrics": null,
  "percentage_done": 0.0,
  "status_logs": [
    {
      "updated_at": "2025-08-11 18:05:51",
      "detail": null,
      "message": "PVCCreated"
    },
    {
      "updated_at": "2025-08-11 18:05:51",
      "detail": null,
      "message": "EntityHandler_0_Created"
    },
    {
      "updated_at": "2025-08-11 18:05:51.648719",
      "detail": null,
      "message": "created"
    },
    {
      "updated_at": "2025-08-11 18:06:01",
      "detail": null,
      "message": "EntityHandler_0_Pending"
    },
    {
      "updated_at": "2025-08-11 18:06:01",
      "detail": null,
      "message": "EntityHandler_0_Completed"
    },
    {
      "updated_at": "2025-08-11 18:06:01",
      "detail": null,
      "message": "TrainingJobCreated"
    },
    {
      "up

In [7]:
 # Add wait job function to wait for the customization job to complete

from time import sleep, time

def wait_job(nemo_client, job_id: str, polling_interval: int = 10, timeout: int = 6000):
    """Helper for waiting an eval job using SDK."""
    start_time = time()
    job = nemo_client.customization.jobs.retrieve(job_id=job_id)
    status = job.status

    while (status in ["pending", "created", "running"]):
        # Check for timeout
        if time() - start_time > timeout:
            raise RuntimeError(f"Took more than {timeout} seconds.")

        # Sleep before polling again
        sleep(polling_interval)

        # Fetch updated status and progress
        job = nemo_client.customization.jobs.retrieve(job_id=job_id)
        status = job.status
        progress = 0.0
        if status == "running" and job.status_details:
            progress = job.status_details.percentage_done or 0.0
        elif status == "completed":
            progress = 100

        print(f"Job status: {status} after {time() - start_time:.2f} seconds. Progress: {progress}%")


    return job

job = wait_job(nemo_client, JOB_ID, polling_interval=5, timeout=2400)

# Wait for 2 minutes, because sometimes, the job is finished, but the finetuned model is not ready in NIM yet.
sleep(120)

Job status: running after 5.11 seconds. Progress: 0.0%
Job status: running after 10.13 seconds. Progress: 0.0%
Job status: running after 15.27 seconds. Progress: 0.0%
Job status: running after 20.28 seconds. Progress: 0.0%
Job status: running after 25.30 seconds. Progress: 0.0%
Job status: running after 30.56 seconds. Progress: 0.0%
Job status: running after 35.58 seconds. Progress: 0.0%
Job status: running after 40.60 seconds. Progress: 0.0%
Job status: running after 45.74 seconds. Progress: 0.0%
Job status: running after 50.76 seconds. Progress: 0.0%
Job status: running after 55.78 seconds. Progress: 0.0%
Job status: running after 60.92 seconds. Progress: 0.0%
Job status: running after 65.94 seconds. Progress: 0.0%
Job status: running after 70.96 seconds. Progress: 0.0%
Job status: running after 76.13 seconds. Progress: 0.0%
Job status: running after 81.15 seconds. Progress: 0.0%
Job status: running after 86.16 seconds. Progress: 0.0%
Job status: running after 91.31 seconds. Progress