In [44]:
import json
from time import sleep, time
from nemo_microservices import NeMoMicroservices

In [45]:
from config import *

# Initialize NeMo Microservices SDK client
nemo_client = NeMoMicroservices(
    base_url=NEMO_URL,
    inference_base_url=NIM_URL,
)

In [46]:
print(f"Data Store endpoint: {NDS_URL}")
print(f"Entity Store, Customizer, Evaluator endpoint: {NEMO_URL}")
print(f"NIM endpoint: {NIM_URL}")
print(f"Namespace: {NMS_NAMESPACE}")
print(f"Base Model: {BASE_MODEL}")

Data Store endpoint: http://data-store.test
Entity Store, Customizer, Evaluator endpoint: http://nemo.test
NIM endpoint: http://nim.test
Namespace: lora-tutorial-ns
Base Model: meta/llama-3.2-1b-instruct


In [47]:
# CUSTOMIZED_MODEL = "lora-tutorial-ns/llama-3.2-1b-xlam-run1@v4" # paste from the previous notebook 
CUSTOMIZED_MODEL = 'lora-tutorial-ns/llama-3.2-1b-custom@v4'
DATASET_NAME = 'news-lora-dataset-train-50k'  # paste from the previous notebook

In [48]:
 # Check if the custom LoRA model is hosted by NVIDIA NIM
models = nemo_client.inference.models.list()
model_names = [model.id for model in models.data]

assert CUSTOMIZED_MODEL in model_names, \
    f"Model {CUSTOMIZED_MODEL} not found"

In [49]:
model_names

['meta/llama-3.2-1b-instruct',
 'lora-tutorial-ns/llama-3.2-1b-custom@v1',
 'lora-tutorial-ns/llama-3.2-1b-custom@v2',
 'lora-tutorial-ns/llama-3.2-1b-custom@v3',
 'lora-tutorial-ns/llama-3.2-1b-custom@v4',
 'meta/llama-3.1-8b-instruct']

In [50]:
# Sanity check to validate dataset
dataset = nemo_client.datasets.retrieve(namespace=NMS_NAMESPACE, dataset_name=f"{DATASET_NAME}")
print("Files URL:", dataset.files_url)

Files URL: hf://datasets/lora-tutorial-ns/news-lora-dataset-train-50k


In [51]:
config = {
  "type": "custom",
  "params": {
    "parallelism": 8
  },
  "tasks": {
      "qa": {
          "type": "chat-completion",
          "params": {
              "template": {
                  "messages": [
                      {"role": "user", "content": "{{item.prompt}}"},
                  ],
                  "max_tokens": 20,
                  "temperature": 0.7,
                  "top_p": 0.9
              }
          },
          "metrics": {
              "f1": {
                  "type": "f1",
                  "params": {"ground_truth": "{{item.completion | trim}}"}
              },
              "string-check": {
                  "type": "string-check",
                  "params": {
                      "check": ["{{item.completion | trim}}", "equals", "{{output_text | trim}}"]
                  }
              }
          },
      "dataset": {
        "files_url": f"hf://datasets/{NMS_NAMESPACE}/{DATASET_NAME}/testing",
      }
    }
  }
}

In [52]:
# Create evaluation job for the base model
eval_job = nemo_client.evaluation.jobs.create(
    config=config,
    target={"type": "model", "model": CUSTOMIZED_MODEL}
)

base_eval_job_id = eval_job.id
print(f"Created evaluation job: {base_eval_job_id}")
eval_job

Created evaluation job: eval-Tjh13MZuxUKNQRTMTgDuTF


EvaluationJob(config=EvaluationConfig(type='custom', id='eval-config-2bvBXfWse6DrL7k4ntVrXM', created_at=datetime.datetime(2025, 9, 3, 17, 18, 32, 153435), custom_fields={}, description=None, groups=None, name='eval-config-2bvBXfWse6DrL7k4ntVrXM', namespace='default', ownership=None, params=EvaluationParams(extra=None, limit_samples=None, max_retries=None, max_tokens=None, parallelism=8, request_timeout=None, stop=None, temperature=None, top_p=None), project=None, schema_version='1.0', tasks={'qa': TaskConfig(type='chat-completion', dataset=DatasetEv(files_url='hf://datasets/lora-tutorial-ns/news-lora-dataset-train-50k/testing', id='dataset-6YBPLjUxxrpj14MscD9et5', created_at=datetime.datetime(2025, 9, 3, 17, 18, 32, 153482), custom_fields={}, description=None, format=None, hf_endpoint=None, limit=None, name='dataset-6YBPLjUxxrpj14MscD9et5', namespace='default', ownership=None, project=None, schema_version='1.0', split=None, type_prefix=None, updated_at=datetime.datetime(2025, 9, 3, 17

In [53]:
job_status = nemo_client.evaluation.jobs.retrieve(job_id=base_eval_job_id)
print("Job Status:", json.dumps(job_status.model_dump(), indent=2, default=str))

Job Status: {
  "config": {
    "type": "custom",
    "id": "eval-config-2bvBXfWse6DrL7k4ntVrXM",
    "created_at": "2025-09-03 17:18:32.153435",
    "custom_fields": {},
    "description": null,
    "groups": null,
    "name": "eval-config-2bvBXfWse6DrL7k4ntVrXM",
    "namespace": "default",
    "ownership": null,
    "params": {
      "extra": null,
      "limit_samples": null,
      "max_retries": null,
      "max_tokens": null,
      "parallelism": 8,
      "request_timeout": null,
      "stop": null,
      "temperature": null,
      "top_p": null
    },
    "project": null,
    "schema_version": "1.0",
    "tasks": {
      "qa": {
        "type": "chat-completion",
        "dataset": {
          "files_url": "hf://datasets/lora-tutorial-ns/news-lora-dataset-train-50k/testing",
          "id": "dataset-6YBPLjUxxrpj14MscD9et5",
          "created_at": "2025-09-03 17:18:32.153482",
          "custom_fields": {},
          "description": null,
          "format": null,
          "hf_e

In [54]:
def wait_eval_job(nemo_client, job_id: str, polling_interval: int = 10, timeout: int = 6000):
    """Helper for waiting an eval job."""
    start_time = time()
    job = nemo_client.evaluation.jobs.retrieve(job_id=job_id)
    status = job.status
    print(status)
    while (status in ["pending", "created", "running"]):
        # Check for timeout
        if time() - start_time > timeout:
            raise RuntimeError(f"Took more than {timeout} seconds.")

        # Sleep before polling again
        sleep(polling_interval)

        # Fetch updated status and progress
        job = nemo_client.evaluation.jobs.retrieve(job_id=job_id)
        status = job.status

        # Progress details (only fetch if status is "running")
        progress = 0
        if status == "running" and job.status_details:
            progress = job.status_details.progress or 0
        elif status == "completed":
            progress = 100

        print(f"Job status: {status} after {time() - start_time:.2f} seconds. Progress: {progress}%")

    return job

In [55]:
# Poll
job = wait_eval_job(nemo_client, base_eval_job_id, polling_interval=25, timeout=6000)

running
Job status: running after 25.92 seconds. Progress: 1.0040000000000007%
Job status: running after 51.03 seconds. Progress: 2.1259999999999875%
Job status: running after 76.17 seconds. Progress: 3.1619999999998734%
Job status: running after 101.29 seconds. Progress: 4.249999999999754%
Job status: running after 126.35 seconds. Progress: 5.28599999999964%
Job status: running after 152.79 seconds. Progress: 6.355999999999522%
Job status: running after 177.86 seconds. Progress: 7.475999999999399%
Job status: running after 202.96 seconds. Progress: 8.483999999999503%
Job status: running after 228.04 seconds. Progress: 9.597999999999875%
Job status: running after 254.07 seconds. Progress: 10.646000000000225%
Job status: running after 279.43 seconds. Progress: 11.766000000000599%
Job status: running after 306.63 seconds. Progress: 12.88000000000097%
Job status: running after 331.76 seconds. Progress: 13.95400000000133%
Job status: running after 357.28 seconds. Progress: 14.6480000000015

In [56]:
job_status = nemo_client.evaluation.jobs.retrieve(job_id=base_eval_job_id)
print("Job Status:", json.dumps(job_status.model_dump(), indent=2, default=str))

Job Status: {
  "config": {
    "type": "custom",
    "id": "eval-config-2bvBXfWse6DrL7k4ntVrXM",
    "created_at": "2025-09-03 17:18:32.153435",
    "custom_fields": {},
    "description": null,
    "groups": null,
    "name": "eval-config-2bvBXfWse6DrL7k4ntVrXM",
    "namespace": "default",
    "ownership": null,
    "params": {
      "extra": null,
      "limit_samples": null,
      "max_retries": null,
      "max_tokens": null,
      "parallelism": 8,
      "request_timeout": null,
      "stop": null,
      "temperature": null,
      "top_p": null
    },
    "project": null,
    "schema_version": "1.0",
    "tasks": {
      "qa": {
        "type": "chat-completion",
        "dataset": {
          "files_url": "hf://datasets/lora-tutorial-ns/news-lora-dataset-train-50k/testing",
          "id": "dataset-6YBPLjUxxrpj14MscD9et5",
          "created_at": "2025-09-03 17:18:32.153482",
          "custom_fields": {},
          "description": null,
          "format": null,
          "hf_e

## Get the JSON results

In [57]:
results = nemo_client.evaluation.jobs.results(base_eval_job_id)

# Access the results
print(f"Result ID: {results.id}")
print(f"Job ID: {results.job}")
print(f"Tasks: {results.tasks}")
print(f"Groups: {results.groups}")

Result ID: evaluation_result-AMTr7P5BBo3qkz5PqXq2Gh
Job ID: eval-Tjh13MZuxUKNQRTMTgDuTF
Tasks: {'qa': TaskResult(metrics={'f1': MetricResult(scores={'f1_score': Score(value=0.9237920000000003, stats=ScoreStats(count=50000, max=None, mean=0.9237920000000003, min=None, stddev=None, stderr=None, sum=46189.60000000001, sum_squared=None, variance=None))}), 'string-check': MetricResult(scores={'string-check': Score(value=0.92068, stats=ScoreStats(count=50000, max=None, mean=0.92068, min=None, stddev=None, stderr=None, sum=46034.0, sum_squared=None, variance=None))})})}
Groups: {}


## Get the zip results

In [16]:
# Download evaluation results
results_zip = nemo_client.evaluation.jobs.download_results(base_eval_job_id)

# Save to file
results_zip.write_to_file('result.zip')

In [17]:
!unzip result.zip -d result

Archive:  result.zip
 extracting: result/job.json         
 extracting: result/eval-xvey9bteywybybc2eptqq8.log  


 extracting: result/results.json     
 extracting: result/evaluation_results.json  
 extracting: result/.gitattributes   
 extracting: result/.cache/huggingface/.gitignore  
 extracting: result/.cache/huggingface/download/job.json.lock  
 extracting: result/.cache/huggingface/download/.gitattributes.metadata  
 extracting: result/.cache/huggingface/download/eval-xvey9bteywybybc2eptqq8.log.metadata  
 extracting: result/.cache/huggingface/download/evaluation_results.json.metadata  
 extracting: result/.cache/huggingface/download/job.json.metadata  
 extracting: result/.cache/huggingface/download/evaluation_results.json.lock  
 extracting: result/.cache/huggingface/download/results.json.lock  
 extracting: result/.cache/huggingface/download/.gitattributes.lock  
 extracting: result/.cache/huggingface/download/eval-xvey9bteywybybc2eptqq8.log.lock  
 extracting: result/.cache/huggingface/download/results.json.metadata  
