In [0]:
#pip install evidently==0.2.0

In [0]:
#%pip install mlflow

In [0]:
#%pip install numpy==1.23.5

In [0]:
#%pip install scikit-learn==0.24.1

In [0]:
import evidently
import pandas as pd
import numpy as np

## Load Model 

In [0]:
import mlflow.pyfunc

# Load the model
model_path = "dbfs:/databricks/mlflow-tracking/1303991687879297/969793f27af7446489a004f6163b7cca/artifacts/model"
model = mlflow.pyfunc.load_model(model_path)

In [0]:
dbutils.fs.ls("/FileStore/tables")

## Load Data

In [0]:
train_data = pd.read_csv("/dbfs/FileStore/tables/train_data.csv")

In [0]:
test_data = pd.read_csv("/dbfs/FileStore/tables/test_data.csv")
test_data.head()

## Validate Results

In [0]:
# Drop the target column (if it exists) to get only features
X_test = test_data.drop(columns=["Diabetes_binary"], errors="ignore")

# Predict using the loaded model
y_pred = model.predict(X_test)

# Add predictions to the test data for evaluation
test_data["Predictions"] = y_pred


In [0]:
from sklearn.metrics import f1_score, classification_report

# Assuming 'Diabetes_binary' is your true label column
y_true = test_data["Diabetes_binary"]
y_pred = test_data["Predictions"]


# classification report
print("\nClassification Report:")
print(classification_report(y_true, y_pred))


## Model Monitoring with Evidently AI 

In [0]:
from evidently.dashboard import Dashboard
from evidently.tabs import DataDriftTab, ClassificationPerformanceTab


In [0]:
train_data.head()

In [0]:
test_data.head()

In [0]:
# Ensure `train_data` has all the required features for prediction
X_train = train_data.drop(columns=["Diabetes_binary"], errors="ignore") 

# Generate predictions using the trained model
train_predictions = model.predict(X_train)

# Add predictions to the train_data
train_data["prediction"] = train_predictions


In [0]:
# Rename 'Diabetes_binary' to 'target' in train_data and test_data
train_data.rename(columns={"Diabetes_binary": "target"}, inplace=True)
test_data.rename(columns={"Diabetes_binary": "target"}, inplace=True)
test_data.rename(columns={"Predictions": "prediction"}, inplace=True)

# Verify the changes
print("Train Data Columns:", train_data.columns)
print("Test Data Columns:", test_data.columns)

In [0]:
from evidently.dashboard import Dashboard
from evidently.tabs import DataDriftTab, ClassificationPerformanceTab

# Create the Evidently dashboard
dashboard = Dashboard(tabs=[DataDriftTab(), ClassificationPerformanceTab()])

# Calculate metrics for monitoring
dashboard.calculate(train_data, test_data)

# Save the Evidently dashboard to an HTML file
dashboard.save("/dbfs/FileStore/evidently_dashboard.html")

Download link: https://3715126558473529.9.gcp.databricks.com/files/evidently_dashboard.html

## Change 2 features: Swap Smoke and Stroke columns

In [0]:
print(test_data['Smoker'].value_counts())
print(test_data['Stroke'].value_counts())

In [0]:
test_data[['Smoker', 'Stroke']].hist(figsize=(10, 5))

## Swap Smoker and Stroke columns
By swapping the stroke and smoker columns, we will create a dataset with mostly smokers and even dsitribution of stroke cases

In [0]:
#copy test data 
test_data_swap = test_data.copy()

In [0]:
#drop prediction column
test_data_swap = test_data_swap.drop(columns=['prediction'])
test_data_swap.head()

In [0]:
test_data_swap[["Smoker", "Stroke"]] = test_data_swap[["Stroke", "Smoker"]]

In [0]:
print(test_data_swap['Smoker'].value_counts())
print(test_data_swap['Stroke'].value_counts())

In [0]:
# Drop the target column (if it exists) to get only features
X_test_swap = test_data_swap.drop(columns=["target"], errors="ignore")

# Predict using the loaded model
y_pred_swap = model.predict(X_test_swap)

# Add predictions to the test data for evaluation
test_data_swap["prediction"] = y_pred_swap


In [0]:
# Assuming 'Diabetes_binary' is your true label column
y_true_swap = test_data_swap["target"]
y_pred_swap = test_data_swap["prediction"]


# classification report
print("\nClassification Report:")
print(classification_report(y_true_swap, y_pred_swap))

In [0]:
from evidently.dashboard import Dashboard
from evidently.tabs import DataDriftTab, ClassificationPerformanceTab

# Create the Evidently dashboard
dashboard_2 = Dashboard(tabs=[DataDriftTab(), ClassificationPerformanceTab()])

# Calculate metrics for monitoring
dashboard_2.calculate(train_data, test_data_swap)

# Save the Evidently dashboard to an HTML file
dashboard_2.save("/dbfs/FileStore/evidently_dashboard_smoke_stroke_swap.html")

Download link: https://3715126558473529.9.gcp.databricks.com/files/evidently_dashboard_smoke_stroke_swap.html