# 3.3 Step 3.1: ML Flow Logistic Regression
## Custom ML Experiment using Churn Modelling Dataset

add ML Flow methods for tracking process and registering model

https://learn.microsoft.com/en-us/azure/machine-learning/how-to-use-mlflow-cli-runs 

by David Cochran



# Setup

## Connect to Azure Resources

In [None]:
# Connect to Azure Resources
from azureml.core import Workspace
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

# Load workspace information from the config file and utilize below
ws = Workspace.from_config()

# Authenticate
credential = DefaultAzureCredential()

# Use the Workspace ws information to provide the following
SUBSCRIPTION = ws.subscription_id
RESOURCE_GROUP = ws.resource_group
WS_NAME = ws.name

# Create a handle to this workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION,
    resource_group_name=RESOURCE_GROUP,
    workspace_name=WS_NAME,
)

## Data Setup

- Line up the data source

- Store as variable `df`


In [2]:
import pandas as pd

# Pull in data -- cleaned data ready for ML

# Get online data using !wget is done in Microsoft Learn in lower parts of this exercise:
# https://learn.microsoft.com/en-us/training/modules/explore-analyze-data-with-python/3-exercise-explore-data


# Provide the URL for the RAW version of the dataset in GitHub
# Ensure the GitHub URL includes "https://raw.githubusercontent.com/"
!wget https://raw.githubusercontent.com/drcochran-newman/datasets/main/churn_modeling/churn_cleaned.csv

# Now read the data from the dataset now saved locally in your current Azure ML directory
# Use the same file name
df = pd.read_csv('churn_cleaned.csv')

df.head()

--2024-06-25 20:43:38--  https://raw.githubusercontent.com/drcochran-newman/datasets/main/churn_modeling/churn_cleaned.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 380085 (371K) [text/plain]
Saving to: ‘churn_cleaned.csv.7’


2024-06-25 20:43:39 (10.5 MB/s) - ‘churn_cleaned.csv.7’ saved [380085/380085]



Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,0,1,42,2,0.0,1,1,1,101348.88,1
1,608,1,1,41,1,83807.86,1,0,1,112542.58,0
2,502,0,1,42,8,159660.8,3,1,0,113931.57,1
3,699,0,1,39,1,0.0,2,0,0,93826.63,0
4,850,1,1,43,2,125510.82,1,1,1,79084.1,0


# ML Process

In [3]:
# Imports

# Train/Test
from sklearn.model_selection import train_test_split

# Algorithm
from sklearn.linear_model import LogisticRegression
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.ensemble import GradientBoostingClassifier

# Metrics
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

In [4]:
# ML Variables

target = "Exited"

split = 0.2

seed = 42

# Define the Model
# Specify the algorithm and hyperparameters



In [5]:
# Registered Model Name
registered_model_name = "Churn_Modelling_LogReg.04"

# Hyperparameter(s) 
C = 100

model = LogisticRegression(
    random_state=seed,
    fit_intercept=False,
    max_iter=500,
    C = C
)

In [6]:
import mlflow
import mlflow.sklearn

# Start Logging
mlflow.start_run()

# enable autologging
mlflow.sklearn.autolog()

# Define Features — all columns except target variable
features = df.drop(target, axis=1)

# Define Labels — only the target variable column
labels = df[target]

# Create Train and Test Splits
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = split, random_state = seed)

# Log shape of train and test splits
mlflow.log_metric("num_samples", df.shape[0])
mlflow.log_metric("num_features", df.shape[1] - 1)

# Train and test the model
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred) * 100
precision = precision_score(y_test, y_pred) * 100
recall = recall_score(y_test, y_pred) * 100
f1 = f1_score(y_test, y_pred) * 100
auc = roc_auc_score(y_test, y_pred)

##########################
#<save and register model>
##########################
# Registering the model to the workspace
print("Registering the model via MLFlow")
mlflow.sklearn.log_model(
    sk_model=model,
    registered_model_name=registered_model_name,
    artifact_path=registered_model_name,
)

# Saving the model to a file
mlflow.sklearn.save_model(
    sk_model=model,
    path=registered_model_name
)
###########################
#</save and register model>
###########################

# Stop Logging
mlflow.end_run()

Successfully registered model 'Churn_Modelling_LogReg.04'.
2024/06/25 20:43:52 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: Churn_Modelling_LogReg.04, version 1
Created version '1' of model 'Churn_Modelling_LogReg.04'.


Registering the model via MLFlow
