In [3]:
import os
import shutil
import pandas as pd
import time
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential

# 1. CONNECT TO AZURE
credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential=credential)
print(f"Connected to Workspace: {ml_client.workspace_name}")

# 2. PREPARE THE FOLDER STRUCTURE
folder_path = "data_clinical_upload"
if os.path.exists(folder_path):
    shutil.rmtree(folder_path)
os.makedirs(folder_path)

# 3. COPY THE NEW ENRICHED CSV (with binary flag) INTO THE UPLOAD FOLDER
source_file = "../data/processed_icd/diabetes_clinical_enriched.csv"
destination_file = f"{folder_path}/diabetes_clinical.csv"

df = pd.read_csv(source_file)
df.to_csv(destination_file, index=False)
print(f"Data copied to '{folder_path}/diabetes_clinical.csv'.")

# 4. CREATE THE MLTABLE FILE (if not already present)
mltable_content = """
paths:
  - file: ./diabetes_clinical.csv
transformations:
  - read_delimited:
      delimiter: ','
      encoding: 'utf8'
      header: 'all_files_same_headers'
"""

with open(f"{folder_path}/MLTable", "w") as f:
    f.write(mltable_content)
print("MLTable file created.")

# 5. REGISTER / VERSION THE ASSET IN AZURE
asset_name = "diabetes-clinical-enriched-130us"
current_version = time.strftime("%m%d_%H%M")

my_data = Data(
    path=folder_path,
    type=AssetTypes.MLTABLE,
    description="Diabetes data mapped to ICD-10-CA with High_Risk_A1C and readmitted_30d_binary.",
    name=asset_name,
    version=current_version,
)

print(f"Uploading and Registering Asset: {asset_name}:{current_version}...")
created_data = ml_client.data.create_or_update(my_data)

print("---------------------------------------------------------")
print(f"VICTORY! Data is now in the Cloud.")
print(f"Asset Name: {asset_name}")
print(f"Version: {current_version}")
print("---------------------------------------------------------")


Found the config file in: C:\Users\achar\OneDrive\Desktop\Project_Narayan\Clinical-Readmission-AI-ICD10\config.json


Overriding of current TracerProvider is not allowed
Overriding of current LoggerProvider is not allowed
Overriding of current MeterProvider is not allowed
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented


Connected to Workspace: AML-Clinical-Readmission


  df = pd.read_csv(source_file)


Data copied to 'data_clinical_upload/diabetes_clinical.csv'.
MLTable file created.
Uploading and Registering Asset: diabetes-clinical-enriched-130us:1127_1308...


[32mUploading data_clinical_upload (22.76 MBs): 100%|##########| 22764982/22764982 [00:00<00:00, 34028802.70it/s]
[39m



---------------------------------------------------------
VICTORY! Data is now in the Cloud.
Asset Name: diabetes-clinical-enriched-130us
Version: 1127_1308
---------------------------------------------------------
