In [1]:
import os
import json
import random
import requests
from openai import OpenAI
from nemo_microservices import NeMoMicroservices

In [7]:
from config import *

# Initialize NeMo Microservices SDK client
nemo_client = NeMoMicroservices(
    base_url=NEMO_URL,
    inference_base_url=NIM_URL,
)
repo_id = f"{NMS_NAMESPACE}/{DATASET_NAME}"

In [45]:
# List customization configs with filters
configs = nemo_client.customization.configs.list(
    page=1,
    page_size=10,
    sort="-created_at",
    filter={
        "training_type": "sft",
        "finetuning_type": "lora",
        "enabled": True
    }
)

print(f"Found {len(configs.data)} configs")
for config in configs.data:
    print(f"Namespace: {config.namespace} Config:{config.name} - {config.description}")

Found 2 configs
Namespace: meta Config:llama-3.2-1b-instruct@v1.0.0+L40 - None
Namespace: meta Config:llama-3.2-1b-instruct@v1.0.0+A100 - None


In [135]:
CUSTOM_MODEL = 'lora-tutorial-ns/llama-3.2-1b-xlam-run1@v5'

In [None]:
# nemo_client.models.delete(namespace= NMS_NAMESPACE,model_name='llama-3.2-1b-xlam-run1@v2')

DeleteResponse(id='model-YUsHWp4sVSgpEBKzmW64JJ', deleted_at=None, message='Resource deleted successfully.')

In [136]:
# Create customization job
# If WANDB_API_KEY is set, we send it in the request header, which will report the training metrics to Weights & Biases (WandB).
if WANDB_API_KEY:
    client_with_wandb = nemo_client.with_options(default_headers={"wandb-api-key": WANDB_API_KEY})
else:
    client_with_wandb = nemo_client

customization = client_with_wandb.customization.jobs.create(
    name="llama-3.2-1b-xlam-ft-seq-packed",
    output_model=CUSTOM_MODEL,
    config=f"{BASE_MODEL}@{BASE_MODEL_VERSION}",
    dataset={"name": DATASET_NAME, "namespace": NMS_NAMESPACE},
    
    hyperparameters={
        "sequence_packing_enabled": True,
        "training_type": "sft",
        "finetuning_type": "lora",
        "epochs": 2,
        "batch_size": 16,
        "learning_rate": 0.0001,
        "lora": {
            "adapter_dim": 16,
            "adapter_dropout": 0.1
        }
    }
)
print(f"Created customization job: {customization.id}")
customization

Created customization job: cust-VJ7FRPk9FLuH6RyMLYDyMb




In [137]:
# To track status
JOB_ID = customization.id

customization = nemo_client.customization.jobs.retrieve(JOB_ID)

# This will be the name of the model that will be used to send inference queries to
CUSTOMIZED_MODEL = customization.output_model

In [74]:
# nemo_client.customization.jobs.cancel(job_id=JOB_ID)

In [138]:
 # Get job status
job_status = nemo_client.customization.jobs.status(job_id=JOB_ID)

print("Percentage done:", job_status.percentage_done)
print("Job Status:", json.dumps(job_status.model_dump(), indent=2, default=str))

Percentage done: 0.0
Job Status: {
  "created_at": "2025-08-13 18:17:19.419953",
  "status": "created",
  "updated_at": "2025-08-13 18:17:19.419953",
  "best_epoch": null,
  "elapsed_time": 0.0,
  "epochs_completed": 0,
  "metrics": null,
  "percentage_done": 0.0,
  "status_logs": [
    {
      "updated_at": "2025-08-13 18:17:19.419953",
      "detail": null,
      "message": "created"
    }
  ],
  "steps_completed": 0,
  "steps_per_epoch": null,
  "train_loss": null,
  "val_loss": null
}


In [141]:
 # Add wait job function to wait for the customization job to complete

from time import sleep, time

def wait_job(nemo_client, job_id: str, polling_interval: int = 10, timeout: int = 6000):
    """Helper for waiting an eval job using SDK."""
    start_time = time()
    job = nemo_client.customization.jobs.retrieve(job_id=job_id)
    status = job.status

    while (status in ["pending", "created", "running"]):
        # Check for timeout
        if time() - start_time > timeout:
            raise RuntimeError(f"Took more than {timeout} seconds.")

        # Sleep before polling again
        sleep(polling_interval)

        # Fetch updated status and progress
        job = nemo_client.customization.jobs.retrieve(job_id=job_id)
        status = job.status
        progress = 0.0
        if status == "running" and job.status_details:
            progress = job.status_details.percentage_done or 0.0
        elif status == "completed":
            progress = 100

        print(f"Job status: {status} after {time() - start_time:.2f} seconds. Progress: {progress}%")


    return job

job = wait_job(nemo_client, JOB_ID, polling_interval=5, timeout=2400)

# Wait for 2 minutes, because sometimes, the job is finished, but the finetuned model is not ready in NIM yet.
sleep(120)

Job status: running after 5.49 seconds. Progress: 100.0%
Job status: completed after 10.55 seconds. Progress: 100%


In [143]:
 # List models with filters
models_page = nemo_client.models.list(
    filter={"namespace": NMS_NAMESPACE},
    sort="-created_at"
)

# Print models information
print(f"Found {len(models_page.data)} models in namespace {NMS_NAMESPACE}:")
for model in models_page.data:
    print(f"\nModel: {model.name}")
    print(f"  Namespace: {model.namespace}")
    print(f"  Base Model: {model.base_model}")
    print(f"  Created: {model.created_at}")
    if model.peft:
        print(f"  Fine-tuning Type: {model.peft.finetuning_type}")

Found 3 models in namespace lora-tutorial-ns:

Model: llama-3.2-1b-xlam-run1@v5
  Namespace: lora-tutorial-ns
  Base Model: meta/llama-3.2-1b-instruct
  Created: 2025-08-13 18:17:19.448554
  Fine-tuning Type: lora

Model: llama-3.2-1b-xlam-run1@v4
  Namespace: lora-tutorial-ns
  Base Model: meta/llama-3.2-1b-instruct
  Created: 2025-08-13 17:02:36.340413
  Fine-tuning Type: lora

Model: llama-3.2-1b-xlam-run1@v1
  Namespace: lora-tutorial-ns
  Base Model: meta/llama-3.2-1b-instruct
  Created: 2025-08-13 14:33:29.640105
  Fine-tuning Type: lora


In [144]:
# CUSTOMIZED_MODEL is constructed as `namespace/model_name`, so we need to extract the model name
model = nemo_client.models.retrieve(namespace=NMS_NAMESPACE, model_name=CUSTOMIZED_MODEL.split("/")[1])

print(f"Model: {model.namespace}/{model.name}")
print(f"Base Model: {model.base_model}")
print(f"Status: {model.artifact.status}")

Model: lora-tutorial-ns/llama-3.2-1b-xlam-run1@v5
Base Model: meta/llama-3.2-1b-instruct
Status: upload_completed


In [145]:
 # Check if the custom LoRA model is hosted by NVIDIA NIM
models = nemo_client.inference.models.list()
model_names = [model.id for model in models.data]

assert CUSTOMIZED_MODEL in model_names, \
    f"Model {CUSTOMIZED_MODEL} not found" 

In [146]:
def read_jsonl(file_path):
    """Reads a JSON Lines file and yields parsed JSON objects"""
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()  # Remove leading/trailing whitespace
            if not line:
                continue  # Skip empty lines
            try:
                yield json.loads(line)
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON: {e}")
                continue


test_data = list(read_jsonl(test_fp))

print(f"There are {len(test_data)} examples in the test set")

There are 7676 examples in the test set


In [147]:
def create_message_batches(data_list, batch_size=100):
    """
    Creates batches of messages from a list of dictionaries.
    
    Args:
        data_list: List of dictionaries containing 'prompt' key
        batch_size: Size of each batch (default: 100)
    
    Returns:
        List of batches, where each batch contains message dictionaries
    """
    batches = []
    
    for i in range(0, len(data_list), batch_size):
        batch = data_list[i:i + batch_size]
        
        # Create messages for this batch
        batch_messages = []
        for test_sample in batch:
            messages = {"role": "user", "content": test_sample['prompt']}
            batch_messages.append(messages)
        
        batches.append(batch_messages)
    
    return batches

# Usage example:
message_batches = create_message_batches(test_data, batch_size=100)

print(f"Created {len(message_batches)} batches")
print(f"First batch has {len(message_batches[0])} messages")
print(f"Sample message from first batch: {message_batches[0][0]}")

Created 77 batches
First batch has 100 messages
Sample message from first batch: {'role': 'user', 'content': "Given the following headline:\n### START HEADLINE ###\n\nHearing Endo Int'l Held Talks to Sell Paladin Labs to Knight Therapeutics\n\n### END HEADLINE ###\n\nWhat event type best classifies it? Choose from the following list:\n\n-analyst rating\n-price targets\n-earnings\n-labour related\n-mergers and acquisitions\n-dividends\n-regulatory\n-stock price movement\n-credit ratings\n-products-services\n-product approval\n-guidance\n-other\n\nProvide only the event type putting it inside double square brackets and in a new line like:\n[[label]]\n\n### START EVENT OUTPUT ###\n\n"}


In [159]:
responses = []
c=0
for batch in message_batches:
    for message in batch:
        completion = nemo_client.chat.completions.create(model = CUSTOMIZED_MODEL,
                                            messages = [message],
                                            temperature = 0.1,
                                            top_p = 0.7,
                                            max_tokens = 512,
                                            stream = False
                                            )
        resp = completion.choices[0].message.content
        responses.append(resp)
        c += 1
        if c % 500 == 0:
            print(f"Processed {c} messages")

Processed 500 messages
Processed 1000 messages
Processed 1500 messages
Processed 2000 messages
Processed 2500 messages
Processed 3000 messages
Processed 3500 messages
Processed 4000 messages
Processed 4500 messages
Processed 5000 messages
Processed 5500 messages
Processed 6000 messages
Processed 6500 messages
Processed 7000 messages
Processed 7500 messages


In [160]:
# Clean responses using lambda to remove square brackets
responses = list(map(lambda x: x.replace('[', '').replace(']', '').replace('-p','p').replace('-e','e'), responses))

In [164]:
responses = list(map(lambda x: x.lower(), responses))

In [156]:
true_labels = [i['completion'] for i in test_data]

In [165]:
set(true_labels)

{'analyst rating',
 'credit ratings',
 'dividends',
 'earnings',
 'guidance',
 'labour issues',
 'mergers and acquisitions',
 'no event',
 'other',
 'price targets',
 'product approval',
 'products-services',
 'regulatory',
 'stock price movement'}

In [166]:
set(responses)

{'analyst rating',
 'credit ratings',
 'dividends',
 'dividers',
 'earnings',
 'growth',
 'guidance',
 'label',
 'labor issues',
 'labour issues',
 'labour related',
 'mergers and acquisitions',
 'other',
 'price targets',
 'product approval',
 'products-services',
 'regulatory',
 'stock price movement'}

In [167]:
from sklearn.metrics import classification_report

In [168]:
classification_report(y_true=true_labels,
    y_pred=responses,
    zero_division=0,
    output_dict=True
)

{'analyst rating': {'precision': 0.1497019715726731,
  'recall': 0.7709563164108618,
  'f1-score': 0.250719907851795,
  'support': 847.0},
 'credit ratings': {'precision': 0.6363636363636364,
  'recall': 0.175,
  'f1-score': 0.27450980392156865,
  'support': 40.0},
 'dividends': {'precision': 0.9591836734693877,
  'recall': 0.8623853211009175,
  'f1-score': 0.9082125603864735,
  'support': 109.0},
 'dividers': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'earnings': {'precision': 0.7695852534562212,
  'recall': 0.334,
  'f1-score': 0.46582984658298465,
  'support': 1000.0},
 'growth': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0.0},
 'guidance': {'precision': 0.15777777777777777,
  'recall': 0.2862903225806452,
  'f1-score': 0.2034383954154728,
  'support': 496.0},
 'label': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 0.0},
 'labor issues': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'labo

In [169]:
print(f"Name of your custom model is: {CUSTOMIZED_MODEL}") 

Name of your custom model is: lora-tutorial-ns/llama-3.2-1b-xlam-run1@v5
