# Analyze data drift in Azure ML

This tutorial is based on the data from Azure Open Datasets using weather data.

This tutorial will allow you to monitor data drift on an Azure ML dataset.

In [6]:
import os
from dotenv import load_dotenv
load_dotenv()

subscription_id = os.environ["AZURE_SUBSCRIPTION_ID"]
resource_group_name = os.environ["AZURE_RESOURCE_GROUP"]
workspace_name = os.environ["AZUREML_WORKSPACE_NAME"]

In [7]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential

try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    # This will open a browser page for
    credential = InteractiveBrowserCredential()

In [8]:
from azure.ai.ml import MLClient
try:
    ml_client = MLClient.from_config(credential=credential)
except Exception as ex:
    client_config = {
        "subscription_id": subscription_id,
        "resource_group": resource_group_name,
        "workspace_name": workspace_name,
    }

print(ml_client)

Found the config file in: .\.azureml\config.json
Class WorkspaceHubOperations: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


MLClient(credential=<azure.identity._credentials.default.DefaultAzureCredential object at 0x000001B6255B5DB0>,
         subscription_id=ec967cb5-f2b0-43c2-9ba2-4a2eb94bbacc,
         resource_group_name=azureml-demos,
         workspace_name=azureml-demos)


In [9]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

my_path = "./data/weather/"
v1 = "initial"

DATASET_NAME = "weather"

my_data = Data(
    name=DATASET_NAME,
    version=v1,
    description="Open datasets NOAA weather data",
    path=my_path,
    type=AssetTypes.URI_FOLDER
)

## create data asset if it doesn't already exist:
try:
    data_asset = ml_client.data.get(name=DATASET_NAME, version=v1)
    print(
        f"Data asset already exists. Name: {my_data.name}, version: {my_data.version}"
    )
except:
    ml_client.data.create_or_update(my_data)
    print(f"Data asset created. Name: {my_data.name}, version: {my_data.version}")

Data asset already exists. Name: weather, version: initial
