# ML Flow Example Notebook 01

Employ ML Flow methods for tracking process and registering model

See this article for explanation:
https://learn.microsoft.com/en-us/azure/machine-learning/how-to-use-mlflow-cli-runs 

by David Cochran

# Setup

## Connect to Azure Resources

In [None]:
# Connect to Azure Resources
from azureml.core import Workspace
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

# Load workspace information from the config file and utilize below
ws = Workspace.from_config()

# Authenticate
credential = DefaultAzureCredential()

# Use the Workspace ws information to provide the following
SUBSCRIPTION = ws.subscription_id
RESOURCE_GROUP = ws.resource_group
WS_NAME = ws.name

# Create a handle to this workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION,
    resource_group_name=RESOURCE_GROUP,
    workspace_name=WS_NAME,
)

# # Print the workspace information (if desired)
# print(credential)
# print(SUBSCRIPTION)
# print(RESOURCE_GROUP)
# print(WS_NAME)
# print(ws.location)

## Data Setup

- Line up the data source

- Store as variable `df`


In [None]:
import pandas as pd

# Pull in data -- cleaned data ready for ML

# Get online data using !wget is done in Microsoft Learn in lower parts of this exercise:
# https://learn.microsoft.com/en-us/training/modules/explore-analyze-data-with-python/3-exercise-explore-data


# Provide the URL for the RAW version of the dataset in GitHub
# Ensure the GitHub URL includes "https://raw.githubusercontent.com/"
!wget https://raw.githubusercontent.com/drcochran-newman/datasets/main/churn_modeling/churn_cleaned.csv

# Now read the data from the dataset now saved locally in your current Azure ML directory
# Use the same file name
df = pd.read_csv('churn_cleaned.csv')

df.head()

# ML Process

In [None]:
# Imports

# Train/Test
from sklearn.model_selection import train_test_split

# Algorithm
# from sklearn.linear_model import LogisticRegression
# from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
# from sklearn.ensemble import GradientBoostingClassifier

# Metrics
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# MLflow
import mlflow
import mlflow.sklearn

In [None]:
########################################################

# ML Variables

# Target Variable
target = "Exited"

# Train-Test Split
split = 0.2

# Random Seed
seed = 42

########################################################

In [None]:
########################################################

# Specify details for this training run

experiment_name = 'churn_variations'

algorithm = 'RandomForestClassifier'

training_iteration = '.04'

registered_model_name = algorithm + training_iteration

run_name = registered_model_name

model = RandomForestClassifier(
    random_state=seed,
    max_depth = 16,
    n_estimators = 50
)

########################################################

In [None]:
########################################################

# ML Train, Test, Track and Log

########################################################

# Associate with experiment
mlflow.set_experiment(experiment_name)

# Start Logging
mlflow.start_run(run_name=run_name)

# enable autologging
mlflow.sklearn.autolog()

# Define Features — all columns except target variable
features = df.drop(target, axis=1)

# Define Labels — only the target variable column
labels = df[target]

# Create Train and Test Splits
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = split, random_state = seed)

# Log shape of train and test splits
mlflow.log_metric("num_samples", df.shape[0])
mlflow.log_metric("num_features", df.shape[1] - 1)

# Train and test the model
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred) * 100
precision = precision_score(y_test, y_pred) * 100
recall = recall_score(y_test, y_pred) * 100
f1 = f1_score(y_test, y_pred) * 100
auc = roc_auc_score(y_test, y_pred)

# Register the model to the workspace
print("Registering the model via MLFlow")
mlflow.sklearn.log_model(
    sk_model=model,
    registered_model_name=registered_model_name,
    artifact_path=registered_model_name,
)

# Save the model to a file
mlflow.sklearn.save_model(
    sk_model=model,
    path=registered_model_name
)

# Stop Logging
mlflow.end_run()