In [60]:
import json
from time import sleep, time
from nemo_microservices import NeMoMicroservices

In [61]:
from config import *

# Initialize NeMo Microservices SDK client
nemo_client = NeMoMicroservices(
    base_url=NEMO_URL,
    inference_base_url=NIM_URL,
)

In [62]:
print(f"Data Store endpoint: {NDS_URL}")
print(f"Entity Store, Customizer, Evaluator endpoint: {NEMO_URL}")
print(f"NIM endpoint: {NIM_URL}")
print(f"Namespace: {NMS_NAMESPACE}")
print(f"Base Model: {BASE_MODEL}")

Data Store endpoint: http://data-store.test
Entity Store, Customizer, Evaluator endpoint: http://nemo.test
NIM endpoint: http://nim.test
Namespace: lora-tutorial-ns
Base Model: meta/llama-3.2-1b-instruct


In [63]:
# CUSTOMIZED_MODEL = "lora-tutorial-ns/llama-3.2-1b-xlam-run1@v4" # paste from the previous notebook 
CUSTOMIZED_MODEL = 'meta/llama-3.1-8b-instruct'

In [64]:
 # Check if the custom LoRA model is hosted by NVIDIA NIM
models = nemo_client.inference.models.list()
model_names = [model.id for model in models.data]

assert CUSTOMIZED_MODEL in model_names, \
    f"Model {CUSTOMIZED_MODEL} not found"

In [65]:
# Sanity check to validate dataset
dataset = nemo_client.datasets.retrieve(namespace=NMS_NAMESPACE, dataset_name=DATASET_NAME)
print("Files URL:", dataset.files_url)

Files URL: hf://datasets/lora-tutorial-ns/news-lora-dataset


In [85]:
config = {
  "type": "custom",
  "params": {
    "parallelism": 8
  },
  "tasks": {
    "qa": {
      "type": "chat-completion",
      "params": {
        "template": {
          "messages": [
            {"role": "user", "content": "{{item.prompt}}"},
          ],
          "max_tokens": 20,
          "temperature": 0.7,
          "top_p": 0.9
        }
      },
      "metrics": {
        "f1": {"type": "f1", "params": { "ground_truth": "{{item.completion | trim}}"} }
      },
      "dataset": {
        "files_url": f"hf://datasets/{NMS_NAMESPACE}/{DATASET_NAME}/testing/test.jsonl",
        "limit": 50
      }
    }
  }
}

In [86]:
# Create evaluation job for the base model
eval_job = nemo_client.evaluation.jobs.create(
    config=config,
    target={"type": "model", "model": CUSTOMIZED_MODEL}
)

base_eval_job_id = eval_job.id
print(f"Created evaluation job: {base_eval_job_id}")
eval_job

Created evaluation job: eval-G3mMMWZP6f5HabQifaGR1s


EvaluationJob(config=EvaluationConfig(type='custom', id='eval-config-3S3FXro3ipFW7aDQuDs3M4', created_at=datetime.datetime(2025, 8, 14, 16, 3, 49, 697435), custom_fields={}, description=None, groups=None, name='eval-config-3S3FXro3ipFW7aDQuDs3M4', namespace='default', ownership=None, params=EvaluationParams(extra=None, limit_samples=None, max_retries=None, max_tokens=None, parallelism=8, request_timeout=None, stop=None, temperature=None, top_p=None), project=None, schema_version='1.0', tasks={'qa': TaskConfig(type='chat-completion', dataset=DatasetEv(files_url='hf://datasets/lora-tutorial-ns/news-lora-dataset/testing/test.jsonl', id='dataset-VZbt3AQpUMpQ22Fb9w9jk8', created_at=datetime.datetime(2025, 8, 14, 16, 3, 49, 697487), custom_fields={}, description=None, format=None, hf_endpoint=None, limit=50, name='dataset-VZbt3AQpUMpQ22Fb9w9jk8', namespace='default', ownership=None, project=None, schema_version='1.0', split=None, type_prefix=None, updated_at=datetime.datetime(2025, 8, 14, 16

In [87]:
job_status = nemo_client.evaluation.jobs.retrieve(job_id=base_eval_job_id)
print("Job Status:", json.dumps(job_status.model_dump(), indent=2, default=str))

Job Status: {
  "config": {
    "type": "custom",
    "id": "eval-config-3S3FXro3ipFW7aDQuDs3M4",
    "created_at": "2025-08-14 16:03:49.697435",
    "custom_fields": {},
    "description": null,
    "groups": null,
    "name": "eval-config-3S3FXro3ipFW7aDQuDs3M4",
    "namespace": "default",
    "ownership": null,
    "params": {
      "extra": null,
      "limit_samples": null,
      "max_retries": null,
      "max_tokens": null,
      "parallelism": 8,
      "request_timeout": null,
      "stop": null,
      "temperature": null,
      "top_p": null
    },
    "project": null,
    "schema_version": "1.0",
    "tasks": {
      "qa": {
        "type": "chat-completion",
        "dataset": {
          "files_url": "hf://datasets/lora-tutorial-ns/news-lora-dataset/testing/test.jsonl",
          "id": "dataset-VZbt3AQpUMpQ22Fb9w9jk8",
          "created_at": "2025-08-14 16:03:49.697487",
          "custom_fields": {},
          "description": null,
          "format": null,
          "hf_

In [88]:
def wait_eval_job(nemo_client, job_id: str, polling_interval: int = 10, timeout: int = 6000):
    """Helper for waiting an eval job."""
    start_time = time()
    job = nemo_client.evaluation.jobs.retrieve(job_id=job_id)
    status = job.status
    print(status)
    while (status in ["pending", "created", "running"]):
        # Check for timeout
        if time() - start_time > timeout:
            raise RuntimeError(f"Took more than {timeout} seconds.")

        # Sleep before polling again
        sleep(polling_interval)

        # Fetch updated status and progress
        job = nemo_client.evaluation.jobs.retrieve(job_id=job_id)
        status = job.status

        # Progress details (only fetch if status is "running")
        progress = 0
        if status == "running" and job.status_details:
            progress = job.status_details.progress or 0
        elif status == "completed":
            progress = 100

        print(f"Job status: {status} after {time() - start_time:.2f} seconds. Progress: {progress}%")

    return job

In [89]:
# Poll
job = wait_eval_job(nemo_client, base_eval_job_id, polling_interval=5, timeout=600)

completed


In [90]:
job_status = nemo_client.evaluation.jobs.retrieve(job_id=base_eval_job_id)
print("Job Status:", json.dumps(job_status.model_dump(), indent=2, default=str))

Job Status: {
  "config": {
    "type": "custom",
    "id": "eval-config-3S3FXro3ipFW7aDQuDs3M4",
    "created_at": "2025-08-14 16:03:49.697435",
    "custom_fields": {},
    "description": null,
    "groups": null,
    "name": "eval-config-3S3FXro3ipFW7aDQuDs3M4",
    "namespace": "default",
    "ownership": null,
    "params": {
      "extra": null,
      "limit_samples": null,
      "max_retries": null,
      "max_tokens": null,
      "parallelism": 8,
      "request_timeout": null,
      "stop": null,
      "temperature": null,
      "top_p": null
    },
    "project": null,
    "schema_version": "1.0",
    "tasks": {
      "qa": {
        "type": "chat-completion",
        "dataset": {
          "files_url": "hf://datasets/lora-tutorial-ns/news-lora-dataset/testing/test.jsonl",
          "id": "dataset-VZbt3AQpUMpQ22Fb9w9jk8",
          "created_at": "2025-08-14 16:03:49.697487",
          "custom_fields": {},
          "description": null,
          "format": null,
          "hf_

## Get the JSON results

In [91]:
results = nemo_client.evaluation.jobs.results(base_eval_job_id)

# Access the results
print(f"Result ID: {results.id}")
print(f"Job ID: {results.job}")
print(f"Tasks: {results.tasks}")
print(f"Groups: {results.groups}")

Result ID: evaluation_result-Qu6sLy1DmAhqBxbQJvqj1X
Job ID: eval-G3mMMWZP6f5HabQifaGR1s
Tasks: {'qa': TaskResult(metrics={'string-check': MetricResult(scores={'string-check': Score(value=0.26, stats=ScoreStats(count=50, max=None, mean=0.26, min=None, stddev=None, stderr=None, sum=13.0, sum_squared=None, variance=None))}), 'f1': MetricResult(scores={'f1_score': Score(value=0.4581904761904763, stats=ScoreStats(count=50, max=None, mean=0.4581904761904763, min=None, stddev=None, stderr=None, sum=22.909523809523815, sum_squared=None, variance=None))})})}
Groups: {}


## Get the zip results

In [92]:
# Download evaluation results
results_zip = nemo_client.evaluation.jobs.download_results(base_eval_job_id)

# Save to file
results_zip.write_to_file('result.zip')

In [93]:
!unzip result.zip -d result

Archive:  result.zip
 extracting: result/job.json         
 extracting: result/results.json     
 extracting: result/evaluation_results.json  
 extracting: result/.gitattributes   
 extracting: result/eval-g3mmmwzp6f5habqifagr1s.log  
 extracting: result/.cache/huggingface/.gitignore  
 extracting: result/.cache/huggingface/download/eval-g3mmmwzp6f5habqifagr1s.log.metadata  
 extracting: result/.cache/huggingface/download/job.json.lock  
 extracting: result/.cache/huggingface/download/.gitattributes.metadata  
 extracting: result/.cache/huggingface/download/eval-g3mmmwzp6f5habqifagr1s.log.lock  
 extracting: result/.cache/huggingface/download/evaluation_results.json.metadata  
 extracting: result/.cache/huggingface/download/job.json.metadata  
 extracting: result/.cache/huggingface/download/evaluation_results.json.lock  
 extracting: result/.cache/huggingface/download/results.json.lock  
 extracting: result/.cache/huggingface/download/.gitattributes.lock  
 extracting: result/.cache/hu