In [1]:
from dotenv import load_dotenv
import os

load_dotenv()
SUBSCRIPTION_ID = os.getenv("SUBSCRIPTION_ID") 
RESOURCE_GROUP = os.getenv("RESOURCE_GROUP")
AML_WORKSPACE_NAME = os.getenv("AML_WORKSPACE_NAME")

In [2]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

# authenticate
credential = DefaultAzureCredential()

# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION_ID,
    resource_group_name=RESOURCE_GROUP,
    workspace_name=AML_WORKSPACE_NAME,
)

Class WorkspaceHubOperations: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


In [3]:
version_name = "2"
data_asset_name = "CreditCards"
data_asset = ml_client.data.get(name=data_asset_name, version=version_name)

In [None]:
import os
import argparse
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [None]:
# load the data
credit_df = pd.read_parquet(data_asset.path)
credit_df.head()

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default
0,20000,2,2,1,24,2,2,-1,-1,-2,...,0,0,0,0,689,0,0,0,0,1
1,120000,2,2,2,26,-1,2,0,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,90000,2,2,2,34,0,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,50000,2,2,1,37,0,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,50000,1,2,1,57,-1,0,-1,0,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0


In [None]:
train_df, test_df = train_test_split(
    credit_df,
    test_size=0.25,
)

## Get the data ready for training ##

In [None]:
# Extracting the label column
y_train = train_df.pop("default")

# convert the dataframe values to array
X_train = train_df.values

# Extracting the label column
y_test = test_df.pop("default")

# convert the dataframe values to array
X_test = test_df.values

## Track the models with MLFLow ##

In [None]:
# get the mlflow tracking URI from azure ML
mlflow_tracking_uri = ml_client.workspaces.get(ml_client.workspace_name).mlflow_tracking_uri

# set the tracking URI in mlflow
mlflow.set_tracking_uri(mlflow_tracking_uri)

# set name for logging
mlflow.set_experiment("Credit cards training tutorial")
# enable autologging with MLflow
mlflow.sklearn.autolog()

## Train the model ##

In [None]:
# Train Gradient Boosting Classifier
print(f"Training with data of shape {X_train.shape}")

# can specify run name using run_name="gradient-boosting-classifier"
mlflow.start_run()

# Can log custom metrics or other information
# mlflow.log_metric('mymetric', 1)

clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print(classification_report(y_test, y_pred))
# Stop logging for this model
mlflow.sklearn.log_model(clf, "gradient-boosting-classifier")
mlflow.end_run()

Training with data of shape (22500, 23)




              precision    recall  f1-score   support

           0       0.84      0.95      0.89      5843
           1       0.68      0.37      0.48      1657

    accuracy                           0.82      7500
   macro avg       0.76      0.66      0.69      7500
weighted avg       0.81      0.82      0.80      7500



## Try a different classifier ##

In [None]:
# Train  AdaBoost Classifier
from sklearn.ensemble import AdaBoostClassifier

print(f"Training with data of shape {X_train.shape}")

# can specify run name 
run_name="adaboost-classifier"
mlflow.start_run(run_name="adaboost-classifier")
ada = AdaBoostClassifier()

ada.fit(X_train, y_train)

y_pred = ada.predict(X_test)

print(classification_report(y_test, y_pred))
# Stop logging for this model
mlflow.sklearn.log_model(ada, "adaboost-classifier")
mlflow.end_run()

Training with data of shape (22500, 23)




              precision    recall  f1-score   support

           0       0.83      0.96      0.89      5843
           1       0.67      0.33      0.44      1657

    accuracy                           0.82      7500
   macro avg       0.75      0.64      0.67      7500
weighted avg       0.80      0.82      0.79      7500

