In [22]:
import json
from time import sleep, time
from nemo_microservices import NeMoMicroservices

In [2]:
from config import *

# Initialize NeMo Microservices SDK client
nemo_client = NeMoMicroservices(
    base_url=NEMO_URL,
    inference_base_url=NIM_URL,
)

In [3]:
print(f"Data Store endpoint: {NDS_URL}")
print(f"Entity Store, Customizer, Evaluator endpoint: {NEMO_URL}")
print(f"NIM endpoint: {NIM_URL}")
print(f"Namespace: {NMS_NAMESPACE}")
print(f"Base Model: {BASE_MODEL}")

Data Store endpoint: http://data-store.test
Entity Store, Customizer, Evaluator endpoint: http://nemo.test
NIM endpoint: http://nim.test
Namespace: lora-tutorial-ns
Base Model: meta/llama-3.2-1b-instruct


In [6]:
CUSTOMIZED_MODEL = "lora-tutorial-ns/llama-3.2-1b-xlam-run1@v4" # paste from the previous notebook 

In [7]:
 # Check if the custom LoRA model is hosted by NVIDIA NIM
models = nemo_client.inference.models.list()
model_names = [model.id for model in models.data]

assert CUSTOMIZED_MODEL in model_names, \
    f"Model {CUSTOMIZED_MODEL} not found"

In [8]:
# Sanity check to validate dataset
dataset = nemo_client.datasets.retrieve(namespace=NMS_NAMESPACE, dataset_name=DATASET_NAME)
print("Files URL:", dataset.files_url)

Files URL: hf://datasets/lora-tutorial-ns/news-lora-dataset


In [45]:
simple_label_eval_config ={
  "type": "custom",
  "params": {
    "parallelism": 8
  },
  "tasks": {
    "qa": {
      "type": "chat-completion",
      "params": {
        "template": {
          "messages": [
            {"role": "user", "content": "{{item.prompt}}"},
          ],
          "max_tokens": 20,
          "temperature": 0.7,
          "top_p": 0.9
        }
      },
      "metrics": {
        "bleu": {
          "type": "bleu",
          "params": {
            "references": ["{{item.completion | trim}}"]
          }
        },
        "rouge": {
          "type": "rouge",
          "params": {
            "ground_truth": "{{item.completion | trim}}"
          }
        },
        "string-check": {
          "type": "string-check",
          "params": {
            "check": [
              "{{item.reference_answer | trim}}",
              "equals",
              "{{output_text | trim}}"
            ]
          }
        }
      },
      "dataset": {
            "files_url": f"hf://datasets/{NMS_NAMESPACE}/{DATASET_NAME}/testing/test.jsonl",
            "limit": 50      
      }
    }
  }
}

In [46]:
# Create evaluation job for the base model
eval_job = nemo_client.evaluation.jobs.create(
    config=simple_label_eval_config,
    target={"type": "model", "model": BASE_MODEL}
)

base_eval_job_id = eval_job.id
print(f"Created evaluation job: {base_eval_job_id}")
eval_job

Created evaluation job: eval-EpzvifaVZbE6p4G4grYiUi


EvaluationJob(config=EvaluationConfig(type='custom', id='eval-config-VtRHws4tXKCKFSdJBzmDGA', created_at=datetime.datetime(2025, 8, 13, 18, 58, 46, 728839), custom_fields={}, description=None, groups=None, name='eval-config-VtRHws4tXKCKFSdJBzmDGA', namespace='default', ownership=None, params=EvaluationParams(extra=None, limit_samples=None, max_retries=None, max_tokens=None, parallelism=8, request_timeout=None, stop=None, temperature=None, top_p=None), project=None, schema_version='1.0', tasks={'qa': TaskConfig(type='chat-completion', dataset=DatasetEv(files_url='hf://datasets/lora-tutorial-ns/news-lora-dataset/testing/test.jsonl', id='dataset-St5jA5XNNKcbj6Hy5SEQVw', created_at=datetime.datetime(2025, 8, 13, 18, 58, 46, 728909), custom_fields={}, description=None, format=None, hf_endpoint=None, limit=50, name='dataset-St5jA5XNNKcbj6Hy5SEQVw', namespace='default', ownership=None, project=None, schema_version='1.0', split=None, type_prefix=None, updated_at=datetime.datetime(2025, 8, 13, 

In [47]:
job_status = nemo_client.evaluation.jobs.retrieve(job_id=base_eval_job_id)
print("Job Status:", json.dumps(job_status.model_dump(), indent=2, default=str))

Job Status: {
  "config": {
    "type": "custom",
    "id": "eval-config-VtRHws4tXKCKFSdJBzmDGA",
    "created_at": "2025-08-13 18:58:46.728839",
    "custom_fields": {},
    "description": null,
    "groups": null,
    "name": "eval-config-VtRHws4tXKCKFSdJBzmDGA",
    "namespace": "default",
    "ownership": null,
    "params": {
      "extra": null,
      "limit_samples": null,
      "max_retries": null,
      "max_tokens": null,
      "parallelism": 8,
      "request_timeout": null,
      "stop": null,
      "temperature": null,
      "top_p": null
    },
    "project": null,
    "schema_version": "1.0",
    "tasks": {
      "qa": {
        "type": "chat-completion",
        "dataset": {
          "files_url": "hf://datasets/lora-tutorial-ns/news-lora-dataset/testing/test.jsonl",
          "id": "dataset-St5jA5XNNKcbj6Hy5SEQVw",
          "created_at": "2025-08-13 18:58:46.728909",
          "custom_fields": {},
          "description": null,
          "format": null,
          "hf_

In [48]:
def wait_eval_job(nemo_client, job_id: str, polling_interval: int = 10, timeout: int = 6000):
    """Helper for waiting an eval job."""
    start_time = time()
    job = nemo_client.evaluation.jobs.retrieve(job_id=job_id)
    status = job.status
    print(status)
    while (status in ["pending", "created", "running"]):
        # Check for timeout
        if time() - start_time > timeout:
            raise RuntimeError(f"Took more than {timeout} seconds.")

        # Sleep before polling again
        sleep(polling_interval)

        # Fetch updated status and progress
        job = nemo_client.evaluation.jobs.retrieve(job_id=job_id)
        status = job.status

        # Progress details (only fetch if status is "running")
        progress = 0
        if status == "running" and job.status_details:
            progress = job.status_details.progress or 0
        elif status == "completed":
            progress = 100

        print(f"Job status: {status} after {time() - start_time:.2f} seconds. Progress: {progress}%")

    return job

In [49]:
# Poll
job = wait_eval_job(nemo_client, base_eval_job_id, polling_interval=5, timeout=600)

failed
