In [1]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

  from cryptography.hazmat.backends import default_backend
  """Entry point for launching an IPython kernel.


Ready to use Azure ML 1.47.0 to work with mlopsprojectNEW


In [2]:
import mlflow
import mlflow.sklearn
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, roc_curve
import os

  """Entry point for launching an IPython kernel.


In [4]:
# Start MLflow run
# End any active run
if mlflow.active_run():
    mlflow.end_run()

# Start MLflow run
mlflow.start_run()

# Set experiment name (optional, if not set, a default experiment is used)
mlflow.set_experiment('churn_train_mlflow')

print("Loading Data...")
df = pd.read_csv('data/Churn_Modelling.csv')
df_encoded = pd.get_dummies(df[['Geography', 'Gender']], drop_first=True)
df = df.drop(['Geography', 'Gender'], axis=1)
df_combined = pd.concat([df, df_encoded], axis=1)

# Separate features and labels
y = df_combined[['Exited']].values
X = df_combined[['CreditScore','Age','Tenure','Balance','NumOfProducts','HasCrCard','IsActiveMember','EstimatedSalary','Geography_Germany','Geography_Spain','Gender_Male']].values

# Split data into 80% training, 10% test, and 10% holdout
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)

Loading Data...


In [5]:
# Further split the temp data into 50% for test and 50% for holdout (10% each of original data)
X_test, X_holdout, y_test, y_holdout = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Train a Random Forest model
print('Training a Random Forest model')
model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)

# Log model parameters (optional)
mlflow.log_param("n_estimators", model.n_estimators)
mlflow.log_param("criterion", model.criterion)
mlflow.log_param("max_depth", model.max_depth)

# Calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
print('Accuracy:', acc)
mlflow.log_metric('Accuracy', acc)

# Calculate AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test, y_scores[:, 1])
print('AUC:', auc)
mlflow.log_metric('AUC', auc)

Training a Random Forest model


  


Accuracy: 0.756312
AUC: 0.8425701905464883


In [6]:
# Save the trained model locally
model_file = 'churn_model.pkl'
output_dir = 'outputs'
os.makedirs(output_dir, exist_ok=True)
model_file_path = os.path.join(output_dir, model_file)
joblib.dump(value=model, filename=model_file_path)

print(f"Model saved to {model_file_path}")

# Log the model with MLflow
mlflow.sklearn.log_model(model, "model")
# Log the saved model file
mlflow.log_artifact(model_file_path, artifact_path="outputs")

Model saved to outputs/churn_model.pkl


In [7]:
# Save and log the holdout set for later use
holdout_file_path = os.path.join(output_dir, 'holdout_set.pkl')
joblib.dump((X_holdout, y_holdout), holdout_file_path)
mlflow.log_artifact(holdout_file_path, artifact_path="outputs")

# End MLflow run
mlflow.end_run()

print("Run completed, holdout set saved and logged.")

Run completed, holdout set saved and logged.
