In [1]:
import os
import json
import random
import requests
from openai import OpenAI
from nemo_microservices import NeMoMicroservices

In [2]:
from config import *

DATASET_NAME = "news-lora-dataset-train-100k"

# Initialize NeMo Microservices SDK client
nemo_client = NeMoMicroservices(
    base_url=NEMO_URL,
    inference_base_url=NIM_URL,
)
repo_id = f"{NMS_NAMESPACE}/{DATASET_NAME}"

In [201]:
# List customization configs with filters
configs = nemo_client.customization.configs.list(
    page=1,
    page_size=10,
    sort="-created_at",
    filter={
        "training_type": "sft",
        "finetuning_type": "lora",
        "enabled": True
    }
)

print(f"Found {len(configs.data)} configs")
for config in configs.data:
    print(f"Namespace: {config.namespace} Config:{config.name} - {config.description}")

Found 10 configs
Namespace: lora-tutorial-ns Config:llama-3.1-8b-instruct@v1.0.0+A100-bb - None
Namespace: lora-tutorial-ns Config:llama-3.1-8b-instruct@v1.0.0+A100-batch - None
Namespace: lora-tutorial-ns Config:llama-3.1-8b-instruct@v1.0.0+A100-bigbatch - None
Namespace: lora-tutorial-ns Config:llama-3.1-8b-instruct@v1.0.0+A100-packing - None
Namespace: lora-tutorial-ns Config:llama-3.1-8b-instruct@v1.0.0+A100-optimal - None
Namespace: lora-tutorial-ns Config:llama-3.1-8b-instruct@v1.0.0+A100-new - None
Namespace: meta Config:llama-3.2-1b-instruct@v1.0.0+L40 - None
Namespace: meta Config:llama-3.2-1b-instruct@v1.0.0+A100 - None
Namespace: meta Config:llama-3.1-8b-instruct@v1.0.0+L40 - None
Namespace: meta Config:llama-3.1-8b-instruct@v1.0.0+A100 - None


In [202]:
# Define either here or on config.py
BASE_MODEL = "meta/llama-3.1-8b-instruct"
BASE_MODEL_VERSION = "v1.0.0+A100"

In [203]:
# nemo_client.models.delete(namespace= NMS_NAMESPACE,model_name='llama-3.2-1b-xlam-run1@v2')

# Override default NeMo Microservices config
This allows you to change actual batch size and seq_length

In [52]:
CONFIG_NAME = "llama-3.1-8b-instruct@v1.0.0+A100-bb"  # any unique name in your NS

cfg = nemo_client.customization.configs.create(
    name=CONFIG_NAME,
    namespace=NMS_NAMESPACE,                          
    target="meta/llama-3.1-8b-instruct@2.0",                          # e.g., "meta/llama-3.1-8b-instruct@2.0"
    training_options=[{
        "training_type": "sft",
        "finetuning_type": "lora",
        "num_gpus": 1,
        "micro_batch_size": 32,                        
        "tensor_parallel_size": 1,
        "pipeline_parallel_size": 1,
        "use_sequence_parallel": False
    }],
    training_precision="bf16-mixed",
    max_seq_length=256, # With sequence packing this will be filled with many small texts,
    # max_seq_length=8096, # With sequence packing this will be filled with many small texts
    prompt_template="{prompt} {completion}"
)

## Set the name of your customised model -- must not exist already

In [177]:
# CUSTOM_MODEL = 'lora-tutorial-ns/llama-3.1-8b-xlam-run1@v5'

CUSTOM_MODEL = 'lora-tutorial-ns/llama-3.1-8b-xlam-run1@train_100k'

## Start customisation run

When to use sequence packing?

If your texts are uniformly short, fastest is usually:
  - sequence_packing_enabled=False
  - max_seq_length ≈ p95 of your lengths (e.g., 128–256)
  - crank micro_batch_size until HBM/SMs saturate (watch tokens/s, not step/s)

Use sequence packing if there is high variance in lengths of inputs -> not the case for us

In [178]:
# Create customization job
# If WANDB_API_KEY is set, we send it in the request header, which will report the training metrics to Weights & Biases (WandB).
if WANDB_API_KEY:
    client_with_wandb = nemo_client.with_options(default_headers={"wandb-api-key": WANDB_API_KEY})
else:
    client_with_wandb = nemo_client

customization = client_with_wandb.customization.jobs.create(
    name="llama-3.1-8b-xlam-ft-seq-packed",
    output_model=CUSTOM_MODEL,
    # config=f"{BASE_MODEL}@{BASE_MODEL_VERSION}",
    config=f"{NMS_NAMESPACE}/{CONFIG_NAME}",
    dataset={"name": DATASET_NAME, "namespace": NMS_NAMESPACE},
    # "max_seq_length": 4096 -> changing this in hyperparams doesn't do anything needs to be separate
    hyperparameters={
        # If you use sequence packing, this overrides batch size choices
        "sequence_packing_enabled": False,
        "training_type": "sft",
        "finetuning_type": "lora",
        "epochs": 2,
        "batch_size": 32, # batch_size / micro_batch_size == gradient accumulation steps (batch size must be divisible by micro batch size)
        "learning_rate": 0.0001,
        # "data_parallel_size": 1,
        "lora": {
            "adapter_dim": 16,
            "adapter_dropout": 0.1
        }
    }
)
print(f"Created customization job: {customization.id}")
customization

Created customization job: cust-ULUNroNp6FVYqRa4UnDgiD




In [179]:
# To track status
JOB_ID = customization.id

customization = nemo_client.customization.jobs.retrieve(JOB_ID)

# This will be the name of the model that will be used to send inference queries to
CUSTOMIZED_MODEL = customization.output_model

In [180]:
JOB_ID

'cust-ULUNroNp6FVYqRa4UnDgiD'

## Turn on this cell if you want to kill a currently running job
Doing it directly on kubectl won't work

In [181]:
# nemo_client.customization.jobs.cancel(job_id=JOB_ID)

## Get job Status

In [5]:
 # Get job status
job_status = nemo_client.customization.jobs.status(job_id=JOB_ID)

print("Percentage done:", job_status.percentage_done)
print("Job Status:", json.dumps(job_status.model_dump(), indent=2, default=str))

Percentage done: 100.0
Job Status: {
  "created_at": "2025-09-03 15:07:24.390376",
  "status": "completed",
  "updated_at": "2025-09-03 17:14:23.063404",
  "best_epoch": 0,
  "elapsed_time": 0.0,
  "epochs_completed": 1,
  "metrics": {
    "keys": [
      "train_loss",
      "val_loss"
    ],
    "metrics": {
      "train_loss": [
        {
          "step": 9,
          "timestamp": "2025-09-03T15:40:27.256077",
          "value": 2.6148929595947266
        },
        {
          "step": 19,
          "timestamp": "2025-09-03T15:40:43.841692",
          "value": 0.6593790054321289
        },
        {
          "step": 29,
          "timestamp": "2025-09-03T15:41:02.129653",
          "value": 0.2678964138031006
        },
        {
          "step": 39,
          "timestamp": "2025-09-03T15:41:18.568311",
          "value": 0.15813495218753815
        },
        {
          "step": 49,
          "timestamp": "2025-09-03T15:41:34.259095",
          "value": 0.18235889077186584
       

In [186]:
 # Add wait job function to wait for the customization job to complete

from time import sleep, time

def wait_job(nemo_client, job_id: str, polling_interval: int = 10, timeout: int = 6000):
    """Helper for waiting an eval job using SDK."""
    start_time = time()
    job = nemo_client.customization.jobs.retrieve(job_id=job_id)
    status = job.status

    while (status in ["pending", "created", "running"]):
        # Check for timeout
        if time() - start_time > timeout:
            raise RuntimeError(f"Took more than {timeout} seconds.")

        # Sleep before polling again
        sleep(polling_interval)

        # Fetch updated status and progress
        job = nemo_client.customization.jobs.retrieve(job_id=job_id)
        status = job.status
        progress = 0.0
        if status == "running" and job.status_details:
            progress = job.status_details.percentage_done or 0.0
        elif status == "completed":
            progress = 100

        print(f"Job status: {status} after {time() - start_time:.2f} seconds. Progress: {progress}%")


    return job

# job = wait_job(nemo_client, JOB_ID, polling_interval=30, timeout=24000)

# Wait for 2 minutes, because sometimes, the job is finished, but the finetuned model is not ready in NIM yet.
# sleep(120)

In [187]:
 # List models with filters
models_page = nemo_client.models.list(
    filter={"namespace": NMS_NAMESPACE},
    sort="-created_at"
)

# Print models information
print(f"Found {len(models_page.data)} models in namespace {NMS_NAMESPACE}:")
for model in models_page.data:
    print(f"\nModel: {model.name}")
    print(f"  Namespace: {model.namespace}")
    print(f"  Base Model: {model.base_model}")
    print(f"  Created: {model.created_at}")
    if model.peft:
        print(f"  Fine-tuning Type: {model.peft.finetuning_type}")

Found 9 models in namespace lora-tutorial-ns:

Model: llama-3.1-8b-xlam-run1@train_50k
  Namespace: lora-tutorial-ns
  Base Model: meta/llama-3.1-8b-instruct
  Created: 2025-09-01 17:35:44.759716
  Fine-tuning Type: lora

Model: llama-3.1-8b-xlam-run1@train_20k
  Namespace: lora-tutorial-ns
  Base Model: meta/llama-3.1-8b-instruct
  Created: 2025-09-01 16:29:22.508023
  Fine-tuning Type: lora

Model: llama-3.1-8b-xlam-run1@train_5k
  Namespace: lora-tutorial-ns
  Base Model: meta/llama-3.1-8b-instruct
  Created: 2025-09-01 14:42:01.843657
  Fine-tuning Type: lora

Model: llama-3.1-8b-xlam-run1@v5
  Namespace: lora-tutorial-ns
  Base Model: meta/llama-3.1-8b-instruct
  Created: 2025-09-01 12:40:54.220835
  Fine-tuning Type: lora

Model: llama-3.1-8b-xlam-run1@v4
  Namespace: lora-tutorial-ns
  Base Model: meta/llama-3.1-8b-instruct
  Created: 2025-09-01 12:31:01.079960
  Fine-tuning Type: lora

Model: llama-3.1-8b-xlam-run1@v3
  Namespace: lora-tutorial-ns
  Base Model: meta/llama-3.1-8

In [188]:
# CUSTOMIZED_MODEL is constructed as `namespace/model_name`, so we need to extract the model name
model = nemo_client.models.retrieve(namespace=NMS_NAMESPACE, model_name=CUSTOMIZED_MODEL.split("/")[1])

print(f"Model: {model.namespace}/{model.name}")
print(f"Base Model: {model.base_model}")
print(f"Status: {model.artifact.status}")

Model: lora-tutorial-ns/llama-3.1-8b-xlam-run1@train_50k
Base Model: meta/llama-3.1-8b-instruct
Status: upload_completed


In [189]:
 # Check if the custom LoRA model is hosted by NVIDIA NIM
models = nemo_client.inference.models.list()
model_names = [model.id for model in models.data]

assert CUSTOMIZED_MODEL in model_names, \
    f"Model {CUSTOMIZED_MODEL} not found" 

In [190]:
def read_jsonl(file_path):
    """Reads a JSON Lines file and yields parsed JSON objects"""
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()  # Remove leading/trailing whitespace
            if not line:
                continue  # Skip empty lines
            try:
                yield json.loads(line)
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON: {e}")
                continue


test_fp = "data/test.jsonl"
# If using an untrained model, use the more complete prompt
# test_fp = "baseline_data/test.jsonl"
test_data = list(read_jsonl(test_fp))

print(f"There are {len(test_data)} examples in the test set")

There are 15352 examples in the test set


In [191]:
def create_message_batches(data_list, batch_size=100):
    """
    Creates batches of messages from a list of dictionaries.
    
    Args:
        data_list: List of dictionaries containing 'prompt' key
        batch_size: Size of each batch (default: 100)
    
    Returns:
        List of batches, where each batch contains message dictionaries
    """
    batches = []
    
    for i in range(0, len(data_list), batch_size):
        batch = data_list[i:i + batch_size]
        
        # Create messages for this batch
        batch_messages = []
        for test_sample in batch:
            messages = {"role": "user", "content": test_sample['prompt']}
            batch_messages.append(messages)
        
        batches.append(batch_messages)
    
    return batches

# Usage example:
message_batches = create_message_batches(test_data, batch_size=100)

print(f"Created {len(message_batches)} batches")
print(f"First batch has {len(message_batches[0])} messages")
print(f"Sample message from first batch: {message_batches[0][0]}")

Created 154 batches
First batch has 100 messages
Sample message from first batch: {'role': 'user', 'content': 'Given the following headline:\n### START HEADLINE ###\n\nTejon Ranch Reports Q4 EPS $0.01 From $(0.01) YoY, Sales $11.68M From $12.70M YoY\n\n### END HEADLINE ###\n\nWhat event type best classifies it? Choose from the following list:\n\n-analyst rating\n-price targets\n-earnings\n-labour related\n-mergers and acquisitions\n-dividends\n-regulatory\n-stock price movement\n-credit ratings\n-products-services\n-product approval\n-guidance\n-other\n\nProvide only the event type putting it inside double square brackets and in a new line like:\n[[label]]\n\n### START EVENT OUTPUT ###\n\n'}


In [192]:
responses = []
c=0
for batch in message_batches:
    for message in batch:
        completion = nemo_client.chat.completions.create(
                                            model = CUSTOMIZED_MODEL,
                                            # model = "meta/llama-3.1-8b-instruct",
                                            messages = [message],
                                            temperature = 0.1,
                                            top_p = 0.7,
                                            max_tokens = 16,
                                            stream = False
                                            )
        resp = completion.choices[0].message.content
        responses.append(resp)
        c += 1
        if c % 500 == 0:
            print(f"Processed {c} messages")

Processed 500 messages
Processed 1000 messages
Processed 1500 messages
Processed 2000 messages
Processed 2500 messages
Processed 3000 messages
Processed 3500 messages
Processed 4000 messages
Processed 4500 messages
Processed 5000 messages
Processed 5500 messages
Processed 6000 messages
Processed 6500 messages
Processed 7000 messages
Processed 7500 messages
Processed 8000 messages
Processed 8500 messages
Processed 9000 messages
Processed 9500 messages
Processed 10000 messages
Processed 10500 messages
Processed 11000 messages
Processed 11500 messages
Processed 12000 messages
Processed 12500 messages
Processed 13000 messages
Processed 13500 messages
Processed 14000 messages
Processed 14500 messages
Processed 15000 messages


In [193]:
# Clean responses using lambda to remove square brackets
# responses = list(map(lambda x: x.replace('[', '').replace(']', '').replace('-p','p').replace('-e','e'), responses))

In [194]:
import re

def extract_inside_brackets(s: str) -> str:
    match = re.search(r"\[\[(.*?)\]\]", s)
    return match.group(1) if match else "other"

predictions = list(map(lambda x: extract_inside_brackets(x).lower().strip(), responses))

In [195]:
true_labels = [extract_inside_brackets(i['completion']) for i in test_data]

In [196]:
set(true_labels)

{'analyst rating',
 'credit ratings',
 'dividends',
 'earnings',
 'guidance',
 'labour issues',
 'mergers and acquisitions',
 'no event',
 'other',
 'price targets',
 'product approval',
 'products-services',
 'regulatory',
 'stock price movement'}

In [197]:
set(predictions)

{'analyst rating',
 'credit ratings',
 'dividends',
 'earnings',
 'guidance',
 'labour issues',
 'mergers and acquisitions',
 'no event',
 'other',
 'price targets',
 'product approval',
 'products-services',
 'regulatory',
 'stock price movement'}

In [198]:
from sklearn.metrics import classification_report

In [199]:
classification_report(y_true=true_labels,
    y_pred=predictions,
    zero_division=0,
    output_dict=True
)

{'analyst rating': {'precision': 0.9556088782243551,
  'recall': 0.9398230088495575,
  'f1-score': 0.9476502082093992,
  'support': 1695.0},
 'credit ratings': {'precision': 0.7659574468085106,
  'recall': 0.9,
  'f1-score': 0.8275862068965517,
  'support': 80.0},
 'dividends': {'precision': 0.9517543859649122,
  'recall': 0.9908675799086758,
  'f1-score': 0.970917225950783,
  'support': 219.0},
 'earnings': {'precision': 0.9693004529441369,
  'recall': 0.9634817408704353,
  'f1-score': 0.9663823381836427,
  'support': 1999.0},
 'guidance': {'precision': 0.9185700099304865,
  'recall': 0.9324596774193549,
  'f1-score': 0.9254627313656828,
  'support': 992.0},
 'labour issues': {'precision': 0.9220430107526881,
  'recall': 0.9423076923076923,
  'f1-score': 0.9320652173913043,
  'support': 364.0},
 'mergers and acquisitions': {'precision': 0.9021852237252862,
  'recall': 0.9332615715823466,
  'f1-score': 0.9174603174603174,
  'support': 929.0},
 'no event': {'precision': 0.93324250681198

In [169]:
print(f"Name of your custom model is: {CUSTOMIZED_MODEL}") 

Name of your custom model is: lora-tutorial-ns/llama-3.2-1b-xlam-run1@v5


## Results:

All training data:

{'[[analyst rating]]': {'precision': 0.9598562013181546,
  'recall': 0.9451327433628318,
  'f1-score': 0.9524375743162902,
  'support': 1695.0},
 '[[credit ratings]]': {'precision': 0.8505747126436781,
  'recall': 0.925,
  'f1-score': 0.8862275449101796,
  'support': 80.0},
 '[[dividends]]': {'precision': 0.9728506787330317,
  'recall': 0.9817351598173516,
  'f1-score': 0.9772727272727273,
  'support': 219.0},
 '[[earnings]]': {'precision': 0.969261279127417,
  'recall': 0.9779889944972486,
  'f1-score': 0.973605577689243,
  'support': 1999.0},
 '[[guidance]]': {'precision': 0.9330669330669331,
  'recall': 0.9415322580645161,
  'f1-score': 0.9372804816859006,
  'support': 992.0},
 '[[labour issues]]': {'precision': 0.9295392953929539,
  'recall': 0.9423076923076923,
  'f1-score': 0.9358799454297408,
  'support': 364.0},
 '[[mergers and acquisitions]]': {'precision': 0.919661733615222,
  'recall': 0.9364908503767492,
  'f1-score': 0.928,
  'support': 929.0},
 '[[no event]]': {'precision': 0.9079903147699758,
  'recall': 0.8571428571428571,
  'f1-score': 0.8818342151675485,
  'support': 875.0},
 '[[other]]': {'precision': 0.8228498074454429,
  'recall': 0.8067967275015733,
  'f1-score': 0.8147442008261837,
  'support': 1589.0},
 '[[price targets]]': {'precision': 0.9924736578023081,
  'recall': 0.9964735516372796,
  'f1-score': 0.9944695827048768,
  'support': 1985.0},
 '[[product approval]]': {'precision': 0.9388888888888889,
  'recall': 0.9441340782122905,
  'f1-score': 0.9415041782729805,
  'support': 179.0},
 '[[products-services]]': {'precision': 0.9273008507347255,
  'recall': 0.9294573643410853,
  'f1-score': 0.9283778552071235,
  'support': 1290.0},
 '[[regulatory]]': {'precision': 0.8688888888888889,
  'recall': 0.8856172140430351,
  'f1-score': 0.8771733034212003,
  'support': 883.0},
 '[[stock price movement]]': {'precision': 0.9293193717277487,
  'recall': 0.9370875494940607,
  'f1-score': 0.9331872946330778,
  'support': 2273.0},
 'accuracy': 0.9303673788431475,
 'macro avg': {'precision': 0.9230373295825265,
  'recall': 0.9290640743427552,
  'f1-score': 0.9258567486812195,
  'support': 15352.0},
 'weighted avg': {'precision': 0.9301616333438684,
  'recall': 0.9303673788431475,
  'f1-score': 0.9301827599599295,
  'support': 15352.0}}

50K examples

{'analyst rating': {'precision': 0.9556088782243551,
  'recall': 0.9398230088495575,
  'f1-score': 0.9476502082093992,
  'support': 1695.0},
 'credit ratings': {'precision': 0.7659574468085106,
  'recall': 0.9,
  'f1-score': 0.8275862068965517,
  'support': 80.0},
 'dividends': {'precision': 0.9517543859649122,
  'recall': 0.9908675799086758,
  'f1-score': 0.970917225950783,
  'support': 219.0},
 'earnings': {'precision': 0.9693004529441369,
  'recall': 0.9634817408704353,
  'f1-score': 0.9663823381836427,
  'support': 1999.0},
 'guidance': {'precision': 0.9185700099304865,
  'recall': 0.9324596774193549,
  'f1-score': 0.9254627313656828,
  'support': 992.0},
 'labour issues': {'precision': 0.9220430107526881,
  'recall': 0.9423076923076923,
  'f1-score': 0.9320652173913043,
  'support': 364.0},
 'mergers and acquisitions': {'precision': 0.9021852237252862,
  'recall': 0.9332615715823466,
  'f1-score': 0.9174603174603174,
  'support': 929.0},
 'no event': {'precision': 0.9332425068119891,
  'recall': 0.7828571428571428,
  'f1-score': 0.8514605344934743,
  'support': 875.0},
 'other': {'precision': 0.7918263090676884,
  'recall': 0.7803650094398993,
  'f1-score': 0.786053882725832,
  'support': 1589.0},
 'price targets': {'precision': 0.9914829659318637,
  'recall': 0.996977329974811,
  'f1-score': 0.9942225571464456,
  'support': 1985.0},
 'product approval': {'precision': 0.9175824175824175,
  'recall': 0.9329608938547486,
  'f1-score': 0.925207756232687,
  'support': 179.0},
 'products-services': {'precision': 0.9207843137254902,
  'recall': 0.9100775193798449,
  'f1-score': 0.9153996101364522,
  'support': 1290.0},
 'regulatory': {'precision': 0.8675115207373272,
  'recall': 0.8527746319365799,
  'f1-score': 0.8600799543118218,
  'support': 883.0},
 'stock price movement': {'precision': 0.8869565217391304,
  'recall': 0.9423669159700836,
  'f1-score': 0.9138225255972696,
  'support': 2273.0},
 'accuracy': 0.9173397602918186,
 'macro avg': {'precision': 0.9067718545675916,
  'recall': 0.9143271938822266,
  'f1-score': 0.9095550761501189,
  'support': 15352.0},
 'weighted avg': {'precision': 0.9177365417550454,
  'recall': 0.9173397602918186,
  'f1-score': 0.916962012675563,
  'support': 15352.0}}
  

20K examples

{'analyst rating': {'precision': 0.9537815126050421,
  'recall': 0.9374631268436578,
  'f1-score': 0.9455519190717049,
  'support': 1695.0},
 'credit ratings': {'precision': 0.776595744680851,
  'recall': 0.9125,
  'f1-score': 0.8390804597701149,
  'support': 80.0},
 'dividends': {'precision': 0.9473684210526315,
  'recall': 0.9863013698630136,
  'f1-score': 0.9664429530201343,
  'support': 219.0},
 'earnings': {'precision': 0.9564356435643564,
  'recall': 0.9664832416208105,
  'f1-score': 0.9614331923364021,
  'support': 1999.0},
 'guidance': {'precision': 0.9183266932270916,
  'recall': 0.9294354838709677,
  'f1-score': 0.9238476953907816,
  'support': 992.0},
 'labour issues': {'precision': 0.9327731092436975,
  'recall': 0.9148351648351648,
  'f1-score': 0.9237170596393898,
  'support': 364.0},
 'mergers and acquisitions': {'precision': 0.8589743589743589,
  'recall': 0.93756727664155,
  'f1-score': 0.896551724137931,
  'support': 929.0},
 'no event': {'precision': 0.9151079136690647,
  'recall': 0.7268571428571429,
  'f1-score': 0.8101910828025478,
  'support': 875.0},
 'other': {'precision': 0.7663612565445026,
  'recall': 0.736941472624292,
  'f1-score': 0.7513634905357716,
  'support': 1589.0},
 'price targets': {'precision': 0.9939668174962293,
  'recall': 0.9959697732997481,
  'f1-score': 0.9949672873678913,
  'support': 1985.0},
 'product approval': {'precision': 0.9171270718232044,
  'recall': 0.9273743016759777,
  'f1-score': 0.9222222222222223,
  'support': 179.0},
 'products-services': {'precision': 0.9346567411083541,
  'recall': 0.875968992248062,
  'f1-score': 0.9043617446978791,
  'support': 1290.0},
 'regulatory': {'precision': 0.8484162895927602,
  'recall': 0.8493771234428086,
  'f1-score': 0.8488964346349746,
  'support': 883.0},
 'stock price movement': {'precision': 0.8670962545308095,
  'recall': 0.9472063352397713,
  'f1-score': 0.9053826745164003,
  'support': 2273.0},
 'accuracy': 0.9066571130797291,
 'macro avg': {'precision': 0.8990705591509253,
  'recall': 0.9031629146473547,
  'f1-score': 0.8995721385817247,
  'support': 15352.0},
 'weighted avg': {'precision': 0.9072497678513273,
  'recall': 0.9066571130797291,
  'f1-score': 0.9058127945411604,
  'support': 15352.0}}

5K examples

{'[[analyst rating]]': {'precision': 0.944647201946472,
  'recall': 0.9162241887905604,
  'f1-score': 0.9302186283318359,
  'support': 1695.0},
 '[[credit ratings]]': {'precision': 0.6513761467889908,
  'recall': 0.8875,
  'f1-score': 0.7513227513227513,
  'support': 80.0},
 '[[dividends]]': {'precision': 0.9596412556053812,
  'recall': 0.9771689497716894,
  'f1-score': 0.9683257918552036,
  'support': 219.0},
 '[[earnings]]': {'precision': 0.9323091694671147,
  'recall': 0.9714857428714357,
  'f1-score': 0.9514943655071043,
  'support': 1999.0},
 '[[guidance]]': {'precision': 0.8805220883534136,
  'recall': 0.8840725806451613,
  'f1-score': 0.8822937625754527,
  'support': 992.0},
 '[[labour issues]]': {'precision': 0.890625,
  'recall': 0.9395604395604396,
  'f1-score': 0.9144385026737968,
  'support': 364.0},
 '[[mergers and acquisitions]]': {'precision': 0.8180961357210179,
  'recall': 0.9343379978471474,
  'f1-score': 0.8723618090452261,
  'support': 929.0},
 '[[no event]]': {'precision': 0.8435114503816794,
  'recall': 0.7577142857142857,
  'f1-score': 0.798314268512944,
  'support': 875.0},
 '[[other]]': {'precision': 0.7716468590831919,
  'recall': 0.5720578980490875,
  'f1-score': 0.6570292735814962,
  'support': 1589.0},
 '[[price targets]]': {'precision': 0.9934607645875252,
  'recall': 0.9949622166246851,
  'f1-score': 0.9942109237352127,
  'support': 1985.0},
 '[[product approval]]': {'precision': 0.8870056497175142,
  'recall': 0.8770949720670391,
  'f1-score': 0.8820224719101124,
  'support': 179.0},
 '[[products-services]]': {'precision': 0.91852487135506,
  'recall': 0.8302325581395349,
  'f1-score': 0.8721498371335505,
  'support': 1290.0},
 '[[regulatory]]': {'precision': 0.7417475728155339,
  'recall': 0.8652321630804077,
  'f1-score': 0.7987454260324098,
  'support': 883.0},
 '[[stock price movement]]': {'precision': 0.8397309062129007,
  'recall': 0.9335679718433788,
  'f1-score': 0.8841666666666667,
  'support': 2273.0},
 'accuracy': 0.8811881188118812,
 'macro avg': {'precision': 0.8623460765739855,
  'recall': 0.8815151403574896,
  'f1-score': 0.8683638913488403,
  'support': 15352.0},
 'weighted avg': {'precision': 0.8812137072273998,
  'recall': 0.8811881188118812,
  'f1-score': 0.8782864557987727,
  'support': 15352.0}}

Base model (Prompt, no training data)

{'analyst rating': {'precision': 0.6200836820083682,
  'recall': 0.8743362831858407,
  'f1-score': 0.7255813953488373,
  'support': 1695.0},
 'credit ratings': {'precision': 0.7368421052631579,
  'recall': 0.7,
  'f1-score': 0.717948717948718,
  'support': 80.0},
 'dividends': {'precision': 0.9393939393939394,
  'recall': 0.8493150684931506,
  'f1-score': 0.8920863309352518,
  'support': 219.0},
 'earnings': {'precision': 0.859713890170743,
  'recall': 0.9319659829914958,
  'f1-score': 0.8943831012962074,
  'support': 1999.0},
 'guidance': {'precision': 0.9744408945686901,
  'recall': 0.6149193548387096,
  'f1-score': 0.754017305315204,
  'support': 992.0},
 'labour issues': {'precision': 0.7308533916849015,
  'recall': 0.9175824175824175,
  'f1-score': 0.8136419001218027,
  'support': 364.0},
 'mergers and acquisitions': {'precision': 0.7180616740088106,
  'recall': 0.8772874058127018,
  'f1-score': 0.7897286821705426,
  'support': 929.0},
 'no event': {'precision': 0.4473684210526316,
  'recall': 0.11657142857142858,
  'f1-score': 0.18495013599274707,
  'support': 875.0},
 'other': {'precision': 0.41391155002146846,
  'recall': 0.6066708621774701,
  'f1-score': 0.4920877998979071,
  'support': 1589.0},
 'price targets': {'precision': 0.9958847736625515,
  'recall': 0.6095717884130982,
  'f1-score': 0.75625,
  'support': 1985.0},
 'product approval': {'precision': 0.5622895622895623,
  'recall': 0.9329608938547486,
  'f1-score': 0.7016806722689075,
  'support': 179.0},
 'products-services': {'precision': 0.9443069306930693,
  'recall': 0.5914728682170542,
  'f1-score': 0.7273593898951383,
  'support': 1290.0},
 'regulatory': {'precision': 0.708029197080292,
  'recall': 0.768969422423556,
  'f1-score': 0.7372421281216069,
  'support': 883.0},
 'stock price movement': {'precision': 0.7600324280502635,
  'recall': 0.8249010118785746,
  'f1-score': 0.7911392405063291,
  'support': 2273.0},
 'accuracy': 0.7234236581552892,
 'macro avg': {'precision': 0.7436580314248891,
  'recall': 0.7297517706028749,
  'f1-score': 0.7127211999870857,
  'support': 15352.0},
 'weighted avg': {'precision': 0.7576584943390323,
  'recall': 0.7234236581552892,
  'f1-score': 0.7169357563774181,
  'support': 15352.0}}


Base model - no prompt

{'analyst rating': {'precision': 0.47901591895803186,
  'recall': 0.7811209439528024,
  'f1-score': 0.5938551244673693,
  'support': 1695.0},
 'analyst/investor events': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'board election': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'corporate event': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'corporate summary': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'credit ratings': {'precision': 0.36923076923076925,
  'recall': 0.9,
  'f1-score': 0.5236363636363637,
  'support': 80.0},
 'dividends': {'precision': 0.9173913043478261,
  'recall': 0.9634703196347032,
  'f1-score': 0.9398663697104677,
  'support': 219.0},
 'earnings': {'precision': 0.6946788990825689,
  'recall': 0.9469734867433717,
  'f1-score': 0.8014394580863675,
  'support': 1999.0},
 'economic indicator': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'guidance': {'precision': 0.8426666666666667,
  'recall': 0.3185483870967742,
  'f1-score': 0.46232626188734455,
  'support': 992.0},
 'investment ideas': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'labour issues': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 364.0},
 'labour related': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'mergers and acquisitions': {'precision': 0.5541284403669725,
  'recall': 0.9752421959095802,
  'f1-score': 0.7067082683307332,
  'support': 929.0},
 'news roundup': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'no event': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 875.0},
 'options brief': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'other': {'precision': 0.14285714285714285,
  'recall': 0.0031466331025802393,
  'f1-score': 0.006157635467980296,
  'support': 1589.0},
 'price targets': {'precision': 0.9657980456026058,
  'recall': 0.2987405541561713,
  'f1-score': 0.4563293574451712,
  'support': 1985.0},
 'product approval': {'precision': 0.33101851851851855,
  'recall': 0.7988826815642458,
  'f1-score': 0.46808510638297873,
  'support': 179.0},
 'products-services': {'precision': 0.530327868852459,
  'recall': 0.5015503875968992,
  'f1-score': 0.5155378486055777,
  'support': 1290.0},
 'regulatory': {'precision': 0.500733137829912,
  'recall': 0.7734994337485843,
  'f1-score': 0.6079216733422341,
  'support': 883.0},
 'sector update': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'shareholder meeting': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'stock performance': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 0.0},
 'stock price movement': {'precision': 0.6063829787234043,
  'recall': 0.902771667399912,
  'f1-score': 0.7254728654764009,
  'support': 2273.0},
 'accuracy': 0.5761464304325169,
 'macro avg': {'precision': 0.26670114196295686,
  'recall': 0.31399794965021627,
  'f1-score': 0.26182062818611496,
  'support': 15352.0},
 'weighted avg': {'precision': 0.5530024212495165,
  'recall': 0.5761464304325169,
  'f1-score': 0.509494995221204,
  'support': 15352.0}}