# AutoML

This autoML experiment is based on the tutorial found here on Microsoft's official documentation: https://learn.microsoft.com/en-us/azure/machine-learning/how-to-configure-auto-train?view=azureml-api-2&tabs=python

https://learn.microsoft.com/en-us/python/api/azure-ai-ml/azure.ai.ml.automl.regressionjob?view=azure-python&utm_source=chatgpt.com

## Imports

In [65]:
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient
import mltable
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import automl, Input
from azure.ai.ml.entities import Data
from azure.storage.blob import BlobServiceClient
from azure.mgmt.storage import StorageManagementClient
from azure.core.exceptions import AzureError

In [7]:
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient

# Initialize the credential and MLClient
credential = DefaultAzureCredential()
ml_client = None

try:
    # Attempt to create MLClient from config
    print("🔑 Attempting to create MLClient from configuration...")
    ml_client = MLClient.from_config(credential)
    print("✅ MLClient successfully created from configuration.")
except Exception as ex:
    # Log exception details for debugging
    print(f"❌ Error encountered while creating MLClient from config: {str(ex)}")
    
    # Provide fallback by manually specifying workspace details
    print("🔄 Attempting to create MLClient using fallback configuration...")
    try:
        subscription_id = "48778e11-0fc7-4fc8-a16c-304a430e61a4"
        resource_group = "student-performance-rg"
        workspace = "student-performance-ws"
        
        ml_client = MLClient(credential, subscription_id, resource_group, workspace)
        print(f"✅ MLClient successfully created for workspace: {workspace}.")
    except Exception as inner_ex:
        print(f"❌ Error encountered while creating MLClient with fallback configuration: {str(inner_ex)}")

Found the config file in: /config.json


🔑 Attempting to create MLClient from configuration...
✅ MLClient successfully created from configuration.


## Import Data

In [31]:
# Define the path to the dataset
data_file_path = './data/student-mat.csv'

# Define file paths for loading the dataset
paths = [
    {'file': f'{data_file_path}'}
]

# Load the dataset into a table format
try:
    print("🔄 Loading the dataset from the provided file paths... Please wait.")
    train_table = mltable.from_delimited_files(paths)
    print("✅ Dataset loaded successfully! Now saving the processed table to the directory...")
except Exception as e:
    print(f"❌ Error while loading the dataset: {e}")

# Save the processed table to the specified directory
try:
    train_table.save('./train_data')
    print("🎉 The dataset has been successfully saved to './train_data'. Ready for further use!")
except Exception as e:
    print(f"❌ Error while saving the dataset: {e}")

🔄 Loading the dataset from the provided file paths... Please wait.
✅ Dataset loaded successfully! Now saving the processed table to the directory...
🎉 The dataset has been successfully saved to './train_data'. Ready for further use!


## Viewing the Data

## Setting up the Configuration

In [None]:
# Create an Input object for the training data, specifying the data source and type
try:
    my_training_data_input = Input(
        type=AssetTypes.MLTABLE,
        path="./train_data"
    )
    print(f"✅ Training data input created successfully. Path: {my_training_data_input.path}")
except Exception as e:
    print(f"❌ Error while creating training data input: {e}")

# Configure and initialize the regression job with specified parameters
try:
    regression_job = automl.regression(
        compute="notebook-compute",
        experiment_name="autoML",
        training_data=my_training_data_input,
        target_column_name="G3",
        primary_metric="normalized_mean_absolute_error",
        n_cross_validations=5,
        enable_model_explainability=True
    )
    print(f"✅ Regression job initialized successfully with target column: {regression_job.target_column_name}")
except Exception as e:
    print(f"❌ Error while initializing regression job: {e}")

# Set job execution limits (optional) to manage training time and trials
try:
    regression_job.set_limits(
        timeout_minutes=15,
        trial_timeout_minutes=15,
        max_trials=5,
        enable_early_termination=True
    )
    print(f"✅ Job limits set successfully. Max trials: {regression_job.limits.max_trials}")
except Exception as e:
    print(f"❌ Error while setting job limits: {e}")

# Define optional training settings to customize model training
try:
    regression_job.set_training(
        enable_onnx_compatible_models=True,
        enable_vote_ensemble=True
    )
    print(f"✅ Training settings configured. ONNX compatible models: {regression_job.training.enable_onnx_compatible_models}")
except Exception as e:
    print(f"❌ Error while setting training properties: {e}")
    
# Print summary of configuration
try:
    print(f"🔍 Experiment name: {regression_job.experiment_name}")
    print(f"🔍 Primary metric selected: {regression_job.primary_metric}")
    print(f"🔍 Cross-validation folds: {regression_job.n_cross_validations}")
    print(f"🔍 Early termination enabled: {regression_job.limits.enable_early_termination}")
    print(f"🔍 Ensemble voting enabled: {regression_job.training.enable_vote_ensemble}")
except Exception as e:
    print(f"❌ Error while printing job configuration summary: {e}")


✅ Training data input created successfully. Path: ./train_data
🔍 Inspecting the properties of the Input object:
{'type': 'mltable', '_port_name': None, 'description': None, 'path': './train_data', 'path_on_compute': None, 'mode': None, 'default': None, 'optional': None, '_is_inferred_optional': False, 'min': None, 'max': None, 'enum': None, 'datastore': None}
✅ Regression job initialized successfully with target column: G3
✅ Job limits set successfully. Max trials: 5
✅ Training settings configured. ONNX compatible models: True
🔍 Experiment name: autoML
🔍 Primary metric selected: NormalizedMeanAbsoluteError
🔍 Cross-validation folds: 5
🔍 Early termination enabled: True
🔍 Ensemble voting enabled: True


## Run the Job

In [45]:
# Submit the AutoML job to the backend for execution
try:
    returned_job = ml_client.jobs.create_or_update(
        regression_job
    )
    print(f"✅ Job created successfully: {returned_job}")
except Exception as e:
    print(f"❌ Error while creating or updating the job: {e}")

# Retrieve the URL for monitoring the job status
try:
    job_status_url = returned_job.services["Studio"].endpoint
    print(f"🔗 You can monitor the job status here: {job_status_url}")
except Exception as e:
    print(f"❌ Error while retrieving the job status URL: {e}")

❌ Error while creating or updating the job: ["Value <azure.ai.ml._restclient.v2024_01_01_preview.models._models_py3.CustomNCrossValidations object at 0x7f764c6e70a0> passed is not in set ['auto']", "int() argument must be a string, a bytes-like object or a real number, not 'CustomNCrossValidations'"]
🔗 You can monitor the job status here: https://ml.azure.com/runs/upbeat_skin_2f3m0gf59k?wsid=/subscriptions/48778e11-0fc7-4fc8-a16c-304a430e61a4/resourcegroups/student-performance-rg/workspaces/student-performance-ws&tid=f804f881-90d8-4e7d-8309-7fb565eaaf2c
