# Heart Failure Prediction using AutoML

This notebook demonstrates how to use Azure AutoML to train a classification model
for predicting heart failure mortality based on clinical records.

## Overview
1. Setup workspace and compute
2. Load and register the dataset
3. Configure and run AutoML experiment
4. Analyze results and retrieve best model
5. Register and deploy the best model

## 1. Import Libraries and Setup Workspace

In [None]:
import logging
import os
import json
import numpy as np
import pandas as pd

from azureml.core import Workspace, Experiment, Dataset
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.automl import AutoMLConfig
from azureml.widgets import RunDetails
from azureml.core.model import Model
from azureml.core.webservice import AciWebservice
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig

In [None]:
# Connect to the workspace
ws = Workspace.from_config()
print(f"Workspace name: {ws.name}")
print(f"Subscription ID: {ws.subscription_id}")
print(f"Resource group: {ws.resource_group}")

## 2. Create Compute Cluster

In [None]:
# Define the compute cluster name
compute_name = "cpu-cluster"

try:
    # Check if the compute target already exists
    compute_target = ComputeTarget(workspace=ws, name=compute_name)
    print(f"Found existing compute target: {compute_name}")
except ComputeTargetException:
    # Create a new compute cluster
    print(f"Creating new compute cluster: {compute_name}")
    
    compute_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_D2_V2",
        max_nodes=4,
        min_nodes=0
    )
    
    compute_target = ComputeTarget.create(ws, compute_name, compute_config)
    compute_target.wait_for_completion(show_output=True)

print(f"Compute target status: {compute_target.get_status().serialize()}")

## 3. Load and Register Dataset

In [None]:
# Load the local dataset
df = pd.read_csv('heart_failure_clinical_records_dataset.csv')

# Display dataset info
print(f"Dataset shape: {df.shape}")
print(f"\nColumn names: {df.columns.tolist()}")
print(f"\nTarget variable distribution:")
print(df['DEATH_EVENT'].value_counts())

# Display first few rows
df.head()

In [None]:
# Get the default datastore
datastore = ws.get_default_datastore()

# Upload the dataset to the datastore
datastore.upload_files(
    files=['heart_failure_clinical_records_dataset.csv'],
    target_path='heart-failure-data/',
    overwrite=True,
    show_progress=True
)

# Create a TabularDataset from the uploaded file
dataset = Dataset.Tabular.from_delimited_files(
    path=(datastore, 'heart-failure-data/heart_failure_clinical_records_dataset.csv')
)

# Register the dataset
dataset = dataset.register(
    workspace=ws,
    name='heart-failure-dataset',
    description='Heart Failure Clinical Records Dataset from Kaggle',
    create_new_version=True
)

print(f"Dataset registered: {dataset.name}")

## 4. Configure AutoML Experiment

### AutoML Settings Explained:
- **experiment_timeout_minutes**: Maximum time (30 mins) for the entire experiment to prevent excessive resource usage
- **max_concurrent_iterations**: Run up to 4 iterations in parallel (matching our compute nodes)
- **primary_metric**: Using 'accuracy' as our main optimization metric for classification
- **n_cross_validations**: 5-fold cross-validation for robust model evaluation
- **enable_early_stopping**: Stop poorly performing runs early to save time
- **featurization**: 'auto' lets AutoML handle feature engineering automatically
- **enable_onnx_compatible_models**: Enable ONNX export for portability

In [None]:
# AutoML settings
automl_settings = {
    "experiment_timeout_minutes": 30,
    "max_concurrent_iterations": 4,
    "primary_metric": 'accuracy',
    "n_cross_validations": 5,
    "enable_early_stopping": True,
    "featurization": 'auto',
    "verbosity": logging.INFO,
    "enable_onnx_compatible_models": True
}

# AutoML configuration
automl_config = AutoMLConfig(
    task='classification',
    compute_target=compute_target,
    training_data=dataset,
    label_column_name='DEATH_EVENT',
    **automl_settings
)

print("AutoML configuration created successfully!")

## 5. Run AutoML Experiment

In [None]:
# Create the experiment
experiment = Experiment(ws, "heart-failure-automl")

# Submit the AutoML run
print("Submitting AutoML experiment...")
automl_run = experiment.submit(automl_config, show_output=True)

In [None]:
# Display the RunDetails widget to monitor progress
RunDetails(automl_run).show()

In [None]:
# Wait for the run to complete
automl_run.wait_for_completion(show_output=True)

## 6. Retrieve and Analyze Best Model

In [None]:
# Get the best run and model
best_run, best_model = automl_run.get_output()

# Display best run details
print(f"Best Run ID: {best_run.id}")
print(f"\nBest Model Algorithm: {best_run.properties['run_algorithm']}")
print(f"\nBest Model Metrics:")

# Get metrics
metrics = best_run.get_metrics()
for metric_name, metric_value in metrics.items():
    print(f"  {metric_name}: {metric_value}")

In [None]:
# Get the best model's properties
print("\nBest Model Properties:")
print(json.dumps(best_run.properties, indent=2))

In [None]:
# Get feature importance (if available)
try:
    from azureml.train.automl.runtime.automl_explain_utilities import get_feature_importance
    feature_importance = get_feature_importance(best_run)
    print("\nFeature Importance:")
    for feature, importance in feature_importance.items():
        print(f"  {feature}: {importance}")
except Exception as e:
    print(f"Could not retrieve feature importance: {e}")

## 7. Register the Best Model

In [None]:
# Register the best model
model_name = 'heart-failure-automl-model'

registered_model = best_run.register_model(
    model_name=model_name,
    model_path='outputs/model.pkl',
    description='Heart Failure Prediction Model trained with AutoML',
    tags={
        'algorithm': best_run.properties['run_algorithm'],
        'accuracy': str(metrics.get('accuracy', 'N/A'))
    }
)

print(f"Model registered: {registered_model.name}")
print(f"Model version: {registered_model.version}")
print(f"Model ID: {registered_model.id}")

## 8. Deploy the Model

In [None]:
# Get the best run's environment
best_run_env = best_run.get_environment()

# Get the scoring script from AutoML
script_file_name = 'scoring_file_v_1_0_0.py'
best_run.download_file('outputs/scoring_file_v_1_0_0.py', script_file_name)

print(f"Downloaded scoring script: {script_file_name}")

In [None]:
# Configure inference
inference_config = InferenceConfig(
    entry_script=script_file_name,
    environment=best_run_env
)

# Configure the ACI deployment
aci_config = AciWebservice.deploy_configuration(
    cpu_cores=1,
    memory_gb=1,
    auth_enabled=True,
    enable_app_insights=True,
    description='Heart Failure Prediction Service (AutoML)'
)

print("Deployment configuration created!")

In [None]:
# Deploy the model
service_name = 'heart-failure-automl-service'

service = Model.deploy(
    workspace=ws,
    name=service_name,
    models=[registered_model],
    inference_config=inference_config,
    deployment_config=aci_config,
    overwrite=True
)

service.wait_for_deployment(show_output=True)
print(f"\nService state: {service.state}")
print(f"Scoring URI: {service.scoring_uri}")

## 9. Test the Deployed Model

In [None]:
import requests

# Get the scoring URI and keys
scoring_uri = service.scoring_uri
primary_key, secondary_key = service.get_keys()

# Prepare sample data for testing
# Using sample values from the dataset
sample_data = {
    "data": [
        {
            "age": 75,
            "anaemia": 0,
            "creatinine_phosphokinase": 582,
            "diabetes": 0,
            "ejection_fraction": 20,
            "high_blood_pressure": 1,
            "platelets": 265000,
            "serum_creatinine": 1.9,
            "serum_sodium": 130,
            "sex": 1,
            "smoking": 0,
            "time": 4
        },
        {
            "age": 55,
            "anaemia": 0,
            "creatinine_phosphokinase": 7861,
            "diabetes": 0,
            "ejection_fraction": 38,
            "high_blood_pressure": 0,
            "platelets": 263358.03,
            "serum_creatinine": 1.1,
            "serum_sodium": 136,
            "sex": 1,
            "smoking": 0,
            "time": 6
        }
    ]
}

# Set the headers
headers = {
    'Content-Type': 'application/json',
    'Authorization': f'Bearer {primary_key}'
}

# Make the request
response = requests.post(scoring_uri, json=sample_data, headers=headers)

print(f"Status code: {response.status_code}")
print(f"Response: {response.json()}")

## 10. Save ONNX Model (Optional - Standout Suggestion)

In [None]:
# Try to get the ONNX model if available
try:
    from azureml.automl.runtime.onnx_convert import OnnxConverter
    
    # Get ONNX model
    best_run_onnx, onnx_model = automl_run.get_output(return_onnx_model=True)
    
    # Save ONNX model
    onnx_model_path = 'outputs/automl_model.onnx'
    with open(onnx_model_path, 'wb') as f:
        f.write(onnx_model.SerializeToString())
    
    print(f"ONNX model saved to: {onnx_model_path}")
except Exception as e:
    print(f"Could not export ONNX model: {e}")

## 11. Cleanup (Optional)

In [None]:
# Delete the web service (uncomment to run)
# service.delete()
# print("Service deleted.")

In [None]:
# Delete the compute cluster (uncomment to run)
# compute_target.delete()
# print("Compute cluster deleted.")