## 1. TEST DEPLOYED VERTEX AI MODEL WITH TEST SET (FIXED)

In [2]:
import numpy as np
import pandas as pd
from google.cloud import aiplatform
from google.oauth2 import service_account
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix, ConfusionMatrixDisplay
)
import matplotlib.pyplot as plt
import time
from tqdm import tqdm
from datasets import load_dataset


print("="*80)
print("TESTING DEPLOYED VERTEX AI MODEL")
print("="*80)

TESTING DEPLOYED VERTEX AI MODEL


## 2. LOAD TEST DATA

In [3]:
dataset = load_dataset('hpe-ai/medical-cases-classification-tutorial')
test_data = dataset['test']
X_test_raw = [sample['transcription'] for sample in test_data]
y_test_true = [sample['medical_specialty'] for sample in test_data]

print(f"✓ Loaded {len(X_test_raw)} test samples")
print(f"✓ Number of classes: {len(set(y_test_true))}")

Repo card metadata block was not found. Setting CardData to empty.


✓ Loaded 370 test samples
✓ Number of classes: 13


## 3. CONFIGURE VERTEX AI ENDPOINT

In [4]:
KEY_PATH = "/home/kmgdk/Documents/Important/ml-test-473905-37df07d55b6b.json"
PROJECT_ID = "ml-test-473905"
LOCATION = "us-central1"
ENDPOINT_ID = "4455391540050657280"

credentials = service_account.Credentials.from_service_account_file(KEY_PATH)
aiplatform.init(project=PROJECT_ID, location=LOCATION, credentials=credentials)
endpoint = aiplatform.Endpoint(
    endpoint_name=f"projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}"
)

print(f"✓ Connected to endpoint: {endpoint.display_name}")


✓ Connected to endpoint: medical-docs-classification-api


## 4.  MAKE PREDICTIONS ON TEST SET

In [5]:
y_pred = []
latencies = []
batch_size = 10

for i in tqdm(range(0, len(X_test_raw), batch_size), desc="Processing batches"):
    batch_texts = X_test_raw[i:i+batch_size]

    # FIX: send list of strings instead of dicts
    instances = batch_texts

    try:
        start_time = time.time()
        response = endpoint.predict(instances=instances)
        latency = (time.time() - start_time) * 1000
        latencies.append(latency)

        # response.predictions is usually a list of dicts or strings
        # Here we assume model returns list of predicted labels
        batch_predictions = response.predictions
        # If predictions are dicts with key 'label', adjust accordingly:
        if isinstance(batch_predictions[0], dict) and 'label' in batch_predictions[0]:
            batch_predictions = [p['label'] for p in batch_predictions]

        y_pred.extend(batch_predictions)

    except Exception as e:
        print(f"\n⚠️ Error in batch {i//batch_size}: {e}")
        y_pred.extend(['ERROR'] * len(batch_texts))

print(f"✓ Completed {len(y_pred)} predictions")


Processing batches: 100%|██████████| 37/37 [00:43<00:00,  1.17s/it]

✓ Completed 370 predictions





## 5. CALCULATE PERFORMANCE METRICS

In [6]:
valid_indices = [i for i, pred in enumerate(y_pred) if pred != 'ERROR']
y_pred_valid = [y_pred[i] for i in valid_indices]
y_true_valid = [y_test_true[i] for i in valid_indices]

if len(valid_indices) == 0:
    raise RuntimeError("All predictions failed. Check model input format.")

accuracy = accuracy_score(y_true_valid, y_pred_valid)
precision_macro = precision_score(y_true_valid, y_pred_valid, average='macro', zero_division=0)
recall_macro = recall_score(y_true_valid, y_pred_valid, average='macro', zero_division=0)
f1_macro = f1_score(y_true_valid, y_pred_valid, average='macro', zero_division=0)
f1_weighted = f1_score(y_true_valid, y_pred_valid, average='weighted', zero_division=0)

print(f"\nAccuracy: {accuracy:.4f}, Macro F1: {f1_macro:.4f}")
print(f"Average latency per batch: {np.mean(latencies):.2f} ms")


Accuracy: 0.7757, Macro F1: 0.6768
Average latency per batch: 1164.82 ms


## 6. SAVE RESULTS

In [7]:
results_df = pd.DataFrame({
    'True_Label': y_true_valid,
    'Predicted_Label': y_pred_valid,
    'Correct': [y_true_valid[i] == y_pred_valid[i] for i in range(len(y_true_valid))]
})

summary_path = '../artifacts/deployed_model_predictions_fixed.csv'
results_df.to_csv(summary_path, index=False)
print("✓ Predictions saved to 'deployed_model_predictions_fixed.csv'")

✓ Predictions saved to 'deployed_model_predictions_fixed.csv'


## 7. Test Custom text Inputs

In [8]:
new_patient = [
    "Patient admitted with acute chest pain and shortness of breath. ECG shows ST-segment elevation in inferior leads. Emergent percutaneous coronary intervention performed with stent placement in right coronary artery. Patient stabilized on dual antiplatelet therapy and beta-blocker.",
    "Patient presented with chronic knee pain and stiffness. X-ray shows advanced osteoarthritis with joint space narrowing. Total knee arthroplasty performed. Rehabilitation program initiated.",
    "Patient with Parkinson’s disease evaluated for worsening tremors and bradykinesia. Adjusted carbidopa-levodopa regimen and initiated deep brain stimulation evaluation.",
    "Newborn delivered at 32 weeks gestation weighing 1.5 kg. Required CPAP for respiratory distress syndrome. Started on surfactant therapy and admitted to NICU.",
    "Patient presented with flashes of light and sudden increase in floaters. Ophthalmic exam revealed retinal detachment involving macula. Emergency pars plana vitrectomy with scleral buckle performed.",
    "Patient presented with abnormal uterine bleeding. Endometrial biopsy performed and treatment initiated."
]


print("\n...Calling the Endpoint...")
response = endpoint.predict(instances=new_patient)

print(response)

print("...Received a Response.")


...Calling the Endpoint...
Prediction(predictions=['Cardiovascular / Pulmonary', 'Orthopedic', 'Neurology', 'Cardiovascular / Pulmonary', 'Cardiovascular / Pulmonary', 'Hematology - Oncology'], deployed_model_id='7855288201320071168', metadata=None, model_version_id='1', model_resource_name='projects/711469596313/locations/us-central1/models/3638656710752600064', explanations=None)
...Received a Response.
