## Imports and Setup
Sets up the environment and defines the path to custom modules.

In [None]:
import boto3
import json
import numpy as np
import os
from sklearn.metrics import classification_report, confusion_matrix
from sagemaker.tuner import HyperparameterTuner
import sagemaker

# Configuration
sess = sagemaker.Session()
bucket = sess.default_bucket()
prefix = "cbis-ddsm-classification"

## Retrieve and Evaluate the Best Model
Auto-Discover Latest Tuning Job and Evaluate

In [None]:
# Auto-Discover Latest Tuning Job
print("\nðŸ”Ž Searching for the latest completed Hyperparameter Tuning Job...")

sm_client = boto3.client('sagemaker')

# List jobs sorted by creation time (descending)
response = sm_client.list_hyperparameter_tuning_jobs(
    SortBy='CreationTime',
    SortOrder='Descending',
    MaxResults=1,
    StatusEquals='Completed' # Only looks for successful jobs
)

if response['HyperparameterTuningJobSummaries']:
    # Grab the name of the most recent one
    tuning_job_name = response['HyperparameterTuningJobSummaries'][0]['HyperparameterTuningJobName']
    print(f"âœ… Latest Tuning Job found: {tuning_job_name}")
else:
    # STOP EXECUTION HERE IF NOT FOUND
    error_msg = "No completed Tuning Job found! Please run notebook 02 first."
    print(error_msg)
    raise ValueError(error_msg) # This stops the notebook

# 1. Retrieve the Best Model
print(f"\nAttaching to job '{tuning_job_name}' to find the best model...")
tuner = HyperparameterTuner.attach(tuning_job_name)

# Find out which training job was the best
best_training_job = tuner.best_training_job()
print(f"âœ… The winning model was training job: {best_training_job}")

print("Retrieving the best model...")
tuner = HyperparameterTuner.attach(tuning_job_name)
predictor = tuner.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.xlarge',
    endpoint_name='cbis-test-endpoint-eval'
)

# 2. Prepare Test Data (Ground Truth)
s3 = boto3.client('s3')
print("Downloading validation list to use as ground truth...")
s3.download_file(bucket, f"{prefix}/metadata/validation.lst", "eval_list.lst")

y_true = []
y_pred = []

print("Starting evaluation (this may take a while depending on dataset size)...")

# 3. Inference Loop
with open("eval_list.lst", "r") as f:
    for line in f:
        # LST Format: INDEX \t LABEL \t PATH
        parts = line.strip().split('\t')
        label = int(float(parts[1])) # 0 or 1
        img_s3_path = parts[2]

        # A. Store the Ground Truth (y_true)
        y_true.append(label)

        # B. Download the image (Fixing Error #1)
        local_img = "temp_img.jpg"
        s3.download_file(bucket, f"{prefix}/images/{img_s3_path}", local_img)

        with open(local_img, "rb") as image_file:
            payload = image_file.read()

        # C. Prediction
        response = predictor.predict(payload, initial_args={'ContentType': 'application/x-image'})

        # D. Process Response (Fixing Error #3)
        # Response comes as json: [prob_0, prob_1]
        probs = json.loads(response)
        # If prob of class 1 (Malignant) is greater than class 0, pred = 1
        prediction = np.argmax(probs)
        y_pred.append(prediction)

        # (Optional) Progress indicator
        print(f".", end="", flush=True)

# Cleanup
os.remove("eval_list.lst")
if os.path.exists("temp_img.jpg"):
    os.remove("temp_img.jpg")

# 4. Metrics
print("\n\n--- Classification Report ---")
target_names = ['Benign', 'Malignant']
print(classification_report(y_true, y_pred, target_names=target_names))

print("\n--- Confusion Matrix ---")
print(confusion_matrix(y_true, y_pred))

In [None]:
# 5. IMPORTANT: Delete Endpoint
predictor.delete_endpoint()
print("\nTest endpoint deleted.")