In [5]:
# Import Libraries
import os
import json
import yaml
import logging
from tqdm import tqdm
from azure.ai.ml import MLClient, Input
from azure.identity import DefaultAzureCredential
from azure.ai.ml.constants import AssetTypes

In [6]:
# Set global logging level
logging.getLogger().setLevel(logging.WARNING)

# Specifically reduce Azure-related logging in this notebook
logging.getLogger("azure").setLevel(logging.ERROR)
logging.getLogger("azure.identity").setLevel(logging.ERROR)
logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(logging.ERROR)

In [19]:
# read the Azure ML workspace configuration from config.yml
with open("config.yml", "r") as f:
    config = yaml.safe_load(f)

# Azure ML workspace configuration
subscription_id = config["subscription_id"]
resource_group = config["resource_group"]
workspace_name = config["workspace_name"]

# Finetuned model batch endpoint configuration
batch_endpoint_name = config["batch_endpoint_name"]
batch_deployment_name = config["batch_deployment_name"]

In [8]:
# Uncomment the following line to log in to Azure
#!az login

In [9]:
# get the MLClient instance
ml_client = MLClient(DefaultAzureCredential(), subscription_id, resource_group, workspace_name)

In [10]:
# get the test data
with open("./data/test.jsonl", "r", encoding='utf-8') as f:
    test_data = [json.loads(line) for line in f]

In [11]:
# Create batch input data for scoring
output_file = "batch_input.jsonl"
max_items = len(test_data)  # Use all items in the test set
batch_data = []
for i, item in enumerate(test_data[:max_items]):
    question = item["question"]
    options = item["options"]
    answer_idx = item["answer_idx"]
    
    # Format options as A. Option text...
    formatted_options = "\n".join([f"{key}. {val}" for key, val in sorted(options.items())])
    
    # Create request in the format expected by the model
    request = {
        "id": f"item_{i}",  # Unique identifier for tracking
        "messages": [
            {
                "role": "system",
                "content": "You are a medical expert. Read the following USMLE question and choose the best answer. Give me the answer as A/B/C/D/E."
            },
            {
                "role": "user",
                "content": f"Question:\n{question}\n\nOptions:\n{formatted_options}"
            }
        ],
        "max_tokens": 10,
        "temperature": 0.1,
        "ground_truth": answer_idx  # For evaluation purposes
    }
    batch_data.append(request)

# Save to JSONL format for batch processing
with open(output_file, "w", encoding='utf-8') as f:
    for item in batch_data:
        f.write(json.dumps(item) + "\n")

print(f"Created batch input file '{output_file}' with {len(batch_data)} items")

Created batch input file 'batch_input.jsonl' with 1273 items


In [None]:
# Submit batch scoring job
input_file = "batch_input.jsonl"
try:
    # Use direct file path approach
    input_data = Input(
        type=AssetTypes.URI_FILE,
        path=f"./{input_file}"
    )
    
    print(f"Invoking batch endpoint...")
    job = ml_client.batch_endpoints.invoke(
        endpoint_name=batch_endpoint_name,
        input=input_data,
        deployment_name=batch_deployment_name
    )
    
    print(f"Batch job submitted successfully!")
    print(f"Job name: {job.name}")


except Exception as e:
    print(f"Batch scoring job failed: {e}")
    print(f"Error details: {str(e)}")

Invoking batch endpoint...
Batch job submitted successfully!
Job name: batchjob-746f237c-7c5b-4550-b608-62494432be65
Batch job submitted successfully!
Job name: batchjob-746f237c-7c5b-4550-b608-62494432be65


In [27]:
# Uncomment the following lines to resume a batch scoring job if it was interrupted
# Note: This requires the job ID from a previous run
# input_file = "batch_input.jsonl"
# try:
#     # Use direct file path approach
#     input_data = Input(
#         type=AssetTypes.URI_FILE,
#         path=f"./{input_file}"
#     )
    
#     print(f"Invoking batch endpoint...")
#     job = ml_client.batch_endpoints.invoke(
#         endpoint_name=batch_endpoint_name,
#         input=input_data,
#         deployment_name=batch_deployment_name,
#         resume_from="batchjob-29ca7dc5-aa57-4289-935e-f03b0428891c"
#     )
    
#     print(f"Batch job resubmitted successfully!")
#     print(f"Job name: {job.name}")


# except Exception as e:
#     print(f"Batch scoring job failed: {e}")
#     print(f"Error details: {str(e)}")

In [25]:
# Download the results
ml_client.jobs.download(job.name, download_path="./batch_results")
print("Results downloaded to './batch_results'")

Downloading artifact azureml://datastores/workspaceblobstore/paths/azureml/f5f75641-b175-494f-87f6-9c5519cde1fc/score/ to batch_results


Results downloaded to './batch_results'


In [26]:
# Finetuned model batch endpoint evaluation
with open("./batch_results/batch_input_results.json", "r", encoding='utf-8') as f:
    batch_results = json.load(f)

print(f"Total number of predictions: {len(batch_results)}")

# Calculate accuracy
correct_predictions = 0
total_predictions = len(batch_results)

for result in batch_results:
    prediction = result["prediction"]
    ground_truth = result["ground_truth"]
    
    if prediction == ground_truth:
        correct_predictions += 1

accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0

print(f"\nBatch Inference Results:")
print(f"Correct predictions: {correct_predictions}")
print(f"Total predictions: {total_predictions}")
print(f"Accuracy: {accuracy:.2%}")

Total number of predictions: 1273

Batch Inference Results:
Correct predictions: 592
Total predictions: 1273
Accuracy: 46.50%
