In [None]:
# ---------------------------------------------------------
# NOTEBOOK: 2_azure_upload_mltable.ipynb
# GOAL: Register the Canadian-Enriched Data as an Azure Asset
# ---------------------------------------------------------

import os
import shutil
import pandas as pd
import time
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential

# 1. CONNECT TO AZURE
credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential=credential)
print(f"Connected to Workspace: {ml_client.workspace_name}")

# 2. PREPARE THE FOLDER STRUCTURE
# We create a clean folder to hold our "Canadian Data Package"
folder_path = "data_clinical_upload"
if os.path.exists(folder_path):
    shutil.rmtree(folder_path)
os.makedirs(folder_path)

# 3. MOVE THE DATA
# We assume you just ran Step 1 and have 'diabetes_clinical_enriched.csv' ready.
source_file = "diabetes_clinical_enriched.csv"
destination_file = f"{folder_path}/diabetes_clinical.csv"

# Load and save to ensure it's clean (and to rename it nicely)
try:
    df = pd.read_csv(source_file)
    df.to_csv(destination_file, index=False)
    print(f"Data moved to '{folder_path}' successfully.")
except FileNotFoundError:
    print(f"ERROR: Could not find '{source_file}'. Did you run Notebook 1?")

# 4. CREATE THE MLTABLE FILE (The Map)
mltable_content = """
paths:
  - file: ./diabetes_clinical.csv
transformations:
  - read_delimited:
      delimiter: ','
      encoding: 'utf8'
      header: 'all_files_same_headers'
"""

with open(f"{folder_path}/MLTable", "w") as f:
    f.write(mltable_content)
print("MLTable file created.")

# 5. REGISTER THE ASSET IN AZURE
# We use a specific name to signal this is the 'Clinical' version
asset_name = "diabetes-clinical-enriched-130us"
current_version = time.strftime("%m%d_%H%M")

my_data = Data(
    path=folder_path,
    type=AssetTypes.MLTABLE,
    description="Diabetes data mapped to ICD-10-CA standards with high-risk flags.",
    name=asset_name,
    version=current_version
)

print(f"Uploading and Registering Asset: {asset_name}:{current_version}...")
created_data = ml_client.data.create_or_update(my_data)

print("---------------------------------------------------------")
print(f"VICTORY! Data is now in the Cloud.")
print(f"Asset Name: {asset_name}")
print(f"Version: {current_version}")
print("---------------------------------------------------------")