# AutoML

## Setting Up ML Client

In [1]:
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient

# 🔐 Authenticate and connect to Azure ML workspace
credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential=credential)

# ✅ Confirm connection
print("\n✅ Azure ML Workspace Loaded Successfully\n")
print(f"🔑 Subscription ID : {ml_client.subscription_id}")
print(f"📦 Resource Group  : {ml_client.resource_group_name}")
print(f"🧠 Workspace Name  : {ml_client.workspace_name}\n")

Found the config file in: /config.json



✅ Azure ML Workspace Loaded Successfully

🔑 Subscription ID : 03571d3d-f675-492f-8346-8b04c78ed80f
📦 Resource Group  : student-performance-rg
🧠 Workspace Name  : student-performance-ws



## Grabbing the Name of the Storage Account

In [2]:
from azure.mgmt.storage import StorageManagementClient

subscription_id = ml_client.subscription_id  
resource_group = ml_client.resource_group_name

storage_client = StorageManagementClient(credential, subscription_id)

# List storage accounts in the resource group
accounts = list(storage_client.storage_accounts.list_by_resource_group(resource_group))

# Check if any accounts exist
if not accounts:
    raise Exception("❌ No storage accounts found in this resource group.")

# Grab the first one
storage_account_name = accounts[0].name

# ✅ Optional: print them
print("\n📦 Storage Accounts in Resource Group:")
for account in accounts:
    print("•", account.name)

print(f"\n✅ Using storage account named: {storage_account_name}")


📦 Storage Accounts in Resource Group:
• studperfstor

✅ Using storage account named: studperfstor


## Grabbing the Name of the Container with JSON Data

In [3]:
from azure.storage.blob import BlobServiceClient

# Get storage account keys
keys = storage_client.storage_accounts.list_keys(resource_group, storage_account_name)
storage_key = keys.keys[0].value

# Build BlobServiceClient using account name and key
account_url = f"https://{storage_account_name}.blob.core.windows.net"
blob_service_client = BlobServiceClient(account_url=account_url, credential=storage_key)

# List all containers and filter
print("\n📁 Blob Containers in the Storage Account (filtered):")
filtered_containers = [
    container.name for container in blob_service_client.list_containers()
    if "insights" not in container.name.lower() and "azureml" not in container.name.lower()
]

if not filtered_containers:
    raise Exception("❌ No user-defined containers found (filtered out 'insights' and 'azureml').")

# Grab the first matching container
container_name = filtered_containers[0]

# ✅ Output
for name in filtered_containers:
    print("•", name)

print(f"\n✅ Using container: {container_name}")


📁 Blob Containers in the Storage Account (filtered):
• student-math-project-files

✅ Using container: student-math-project-files


## Locating the Name of the Config File

In [4]:
# Create a ContainerClient for the chosen container
container_client = blob_service_client.get_container_client(container_name)

# Find the first .json blob in the container
config_name = None
print(f"\n📄 Files in Container '{container_name}':")

for blob in container_client.list_blobs():
    print("•", blob.name)
    if config_name is None and blob.name.lower().endswith(".json"):
        config_name = blob.name

if not config_name:
    raise Exception("❌ No .json file found in this container.")

print(f"\n✅ Using config file: {config_name}")


📄 Files in Container 'student-math-project-files':
• config.json
• student-mat.csv

✅ Using config file: config.json


## Importing Config Information and Saving It In Memory

In [5]:
import json
from azure.storage.blob import BlobClient

# === Build blob URL and load JSON ===
blob_url = f"https://{storage_account_name}.blob.core.windows.net/{container_name}/{config_name}"
blob_client = BlobClient.from_blob_url(blob_url, credential=credential)

# Download and parse JSON
json_bytes = blob_client.download_blob().readall()
json_data = json.loads(json_bytes)

# === Metadata ===
created_at = json_data["metadata"]["created_at"]
created_year = json_data["metadata"]["created_year"]
created_month = json_data["metadata"]["created_month"]
created_day = json_data["metadata"]["created_day"]
created_time = json_data["metadata"]["created_time"]

# === Azure Info ===
subscription_id = json_data["azure"]["subscription_id"]
resource_group = json_data["azure"]["resource_group"]
location = json_data["azure"]["location"]
storage_account_name = json_data["azure"]["storage_account_name"]
storage_container_uri = json_data["azure"]["storage_container_uri"]
managed_identity_principal_id = json_data["azure"]["managed_identity_principal_id"]
auth_mode = json_data["azure"]["auth_mode"]

# === Workspace Info ===
workspace_name = json_data["workspace"]["workspace_name"]
storage_account_id = json_data["workspace"]["storage_account_id"]
key_vault_id = json_data["workspace"]["key_vault_id"]
app_insights_id = json_data["workspace"]["application_insights_id"]

# === Datastore Info ===
datastore_name = json_data["datastore"]["datastore_name"]
container_name = json_data["datastore"]["container_name"]
blob_name = json_data["datastore"]["blob_name"]

# === Dataset Info ===
dataset_name = json_data["dataset"]["dataset_name"]
dataset_uri = json_data["dataset"]["dataset_uri"]
dataset_version = json_data["dataset"]["dataset_version"]
dataset_description = json_data["dataset"]["dataset_description"]
delimiter = json_data["dataset"]["delimiter"]
encoding = json_data["dataset"]["encoding"]
has_header = json_data["dataset"]["has_header"]

# === Compute Info ===
compute_name = json_data["compute"]["compute_name"]
compute_size = json_data["compute"]["compute_size"]

# === Pretty Print with Emojis ===
print("\n📅 Workspace Metadata:")
print(f"  • Created At   : {created_at}")
print(f"  • Date         : {created_day} {created_month} {created_year}")
print(f"  • Time         : {created_time}")

print("\n🔐 Azure Configuration:")
print(f"  • Subscription ID      : {subscription_id}")
print(f"  • Resource Group       : {resource_group}")
print(f"  • Location             : {location}")
print(f"  • Auth Mode            : {auth_mode}")
print(f"  • Managed Identity ID  : {managed_identity_principal_id}")

print("\n📦 Storage:")
print(f"  • Account Name         : {storage_account_name}")
print(f"  • Container URI        : {storage_container_uri}")

print("\n🧠 Workspace:")
print(f"  • Workspace Name       : {workspace_name}")
print(f"  • Storage Account ID   : {storage_account_id}")
print(f"  • Key Vault ID         : {key_vault_id}")
print(f"  • App Insights ID      : {app_insights_id}")

print("\n🗃️ Datastore:")
print(f"  • Datastore Name       : {datastore_name}")
print(f"  • Container Name       : {container_name}")
print(f"  • Blob Name            : {blob_name}")

print("\n📊 Dataset:")
print(f"  • Dataset Name         : {dataset_name}")
print(f"  • Version              : {dataset_version}")
print(f"  • Description          : {dataset_description}")
print(f"  • URI                  : {dataset_uri}")
print(f"  • Delimiter            : {delimiter}")
print(f"  • Encoding             : {encoding}")
print(f"  • Has Header           : {has_header}")

print("\n🖥️ Compute:")
print(f"  • Compute Name         : {compute_name}")
print(f"  • Compute Size         : {compute_size}\n")


📅 Workspace Metadata:
  • Created At   : 2025-04-16T15:36:33+0200
  • Date         : 16 April 2025
  • Time         : 15:36:33 CEST

🔐 Azure Configuration:
  • Subscription ID      : 03571d3d-f675-492f-8346-8b04c78ed80f
  • Resource Group       : student-performance-rg
  • Location             : norwayeast
  • Auth Mode            : managed_identity
  • Managed Identity ID  : a4d92aeb-6282-4e38-8fbb-69806440a455

📦 Storage:
  • Account Name         : studperfstor
  • Container URI        : https://studperfstor.blob.core.windows.net/student-math-project-files

🧠 Workspace:
  • Workspace Name       : student-performance-ws
  • Storage Account ID   : /subscriptions/03571d3d-f675-492f-8346-8b04c78ed80f/resourceGroups/student-performance-rg/providers/Microsoft.Storage/storageAccounts/studperfstor
  • Key Vault ID         : /subscriptions/03571d3d-f675-492f-8346-8b04c78ed80f/resourceGroups/student-performance-rg/providers/Microsoft.KeyVault/vaults/studentpkeyvault
  • App Insights ID      :

In [8]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
import pandas as pd

# Constants (from your .env setup)
dataset_name = "student-math"
dataset_version = "1"
delimiter = ";"
encoding = "utf-8"

# Initialize MLClient (only if not already initialized)
ml_client = MLClient(
    DefaultAzureCredential(),
    subscription_id="03571d3d-f675-492f-8346-8b04c78ed80f",
    resource_group_name="student-performance-rg",
    workspace_name="student-performance-ws"
)

print(f"\n📦 Fetching dataset '{dataset_name}' (v{dataset_version})...")

try:
    # SDK v2 style: directly fetch the dataset by name & version
    dataset = ml_client.data.get(name=dataset_name, version=dataset_version)

    if not dataset.path:
        print("❌ No accessible path found in data asset.")
    else:
        print(f"📄 Path: {dataset.path}")
        df = pd.read_csv(dataset.path, delimiter=delimiter, encoding=encoding)
        print("✅ Dataset loaded successfully!")
        print(df.head())
except Exception as e:
    print("❌ Failed to load dataset with pandas.")
    print("Error:", e)


📦 Fetching dataset 'student-math' (v1)...
📄 Path: azureml://subscriptions/03571d3d-f675-492f-8346-8b04c78ed80f/resourcegroups/student-performance-rg/workspaces/student-performance-ws/datastores/studentmathdatastore/paths/student-mat.csv
✅ Dataset loaded successfully!
  school sex  age address famsize Pstatus  Medu  Fedu     Mjob      Fjob  ...  \
0     GP   F   18       U     GT3       A     4     4  at_home   teacher  ...   
1     GP   F   17       U     GT3       T     1     1  at_home     other  ...   
2     GP   F   15       U     LE3       T     1     1  at_home     other  ...   
3     GP   F   15       U     GT3       T     4     2   health  services  ...   
4     GP   F   16       U     GT3       T     3     3    other     other  ...   

  famrel freetime  goout  Dalc  Walc health absences  G1  G2  G3  
0      4        3      4     1     1      3        6   5   6   6  
1      5        3      3     1     1      3        4   5   5   6  
2      4        3      2     2     3      3