In [2]:
pip install tensorflow keras numpy mlflow evidently scikit-learn


Note: you may need to restart the kernel to use updated packages.




In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import mlflow
import mlflow.tensorflow
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from sklearn.model_selection import train_test_split


In [2]:
# Load dataset
fashion_mnist = keras.datasets.fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# Normalize images
X_train, X_test = X_train / 255.0, X_test / 255.0

# Split validation set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)


In [13]:
#  Set MLflow Experiment
mlflow.set_experiment("Fashion_MNIST_Tracking")

#  Define Different Training Configurations
configurations = [
    {"epochs": 5, "optimizer": "adam"},
    {"epochs": 10, "optimizer": "adam"},
    {"epochs": 5, "optimizer": "sgd"},
]

#  Train and Log Each Model in MLflow
for config in configurations:
    with mlflow.start_run(run_name=f"Model_E{config['epochs']}_{config['optimizer']}"):  
        print(f"\n Training Model: Epochs={config['epochs']}, Optimizer={config['optimizer']}")

        #  Define Your Provided Model
        model = keras.Sequential([
            keras.layers.Flatten(input_shape=(28, 28)),
            keras.layers.Dense(128, activation='relu'),
            keras.layers.Dropout(0.2),
            keras.layers.Dense(10, activation='softmax')
        ])

        #  Compile Model
        model.compile(optimizer=config["optimizer"],
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

        #  Train Model
        history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=config["epochs"], verbose=1)

        #  Log Parameters & Metrics in MLflow
        mlflow.log_param("epochs", config["epochs"])
        mlflow.log_param("optimizer", config["optimizer"])
        mlflow.log_metric("train_accuracy", history.history['accuracy'][-1])
        mlflow.log_metric("val_accuracy", history.history['val_accuracy'][-1])

        #  Save Model & Log in MLflow
        model_filename = f"fashion_mnist_model_E{config['epochs']}_{config['optimizer']}.h5"
        model.save(model_filename)
        mlflow.log_artifact(model_filename)

print("\n All models trained and logged successfully!")


 Training Model: Epochs=5, Optimizer=adam



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.7612 - loss: 0.6945 - val_accuracy: 0.8442 - val_loss: 0.4384
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8497 - loss: 0.4111 - val_accuracy: 0.8660 - val_loss: 0.3683
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8631 - loss: 0.3751 - val_accuracy: 0.8682 - val_loss: 0.3574
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8731 - loss: 0.3489 - val_accuracy: 0.8752 - val_loss: 0.3476
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - accuracy: 0.8806 - loss: 0.3277 - val_accuracy: 0.8773 - val_loss: 0.3358





 Training Model: Epochs=10, Optimizer=adam
Epoch 1/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.7580 - loss: 0.6936 - val_accuracy: 0.8405 - val_loss: 0.4427
Epoch 2/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.8515 - loss: 0.4166 - val_accuracy: 0.8688 - val_loss: 0.3629
Epoch 3/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.8651 - loss: 0.3718 - val_accuracy: 0.8685 - val_loss: 0.3567
Epoch 4/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8712 - loss: 0.3531 - val_accuracy: 0.8798 - val_loss: 0.3441
Epoch 5/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8731 - loss: 0.3421 - val_accuracy: 0.8775 - val_loss: 0.3383
Epoch 6/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8825 - loss: 0.3197 - val_accur




 Training Model: Epochs=5, Optimizer=sgd
Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.6261 - loss: 1.1461 - val_accuracy: 0.8098 - val_loss: 0.5887
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.8005 - loss: 0.5990 - val_accuracy: 0.8260 - val_loss: 0.5145
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.8191 - loss: 0.5283 - val_accuracy: 0.8335 - val_loss: 0.4729
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.8317 - loss: 0.4947 - val_accuracy: 0.8385 - val_loss: 0.4516
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8373 - loss: 0.4654 - val_accuracy: 0.8502 - val_loss: 0.4293





 All models trained and logged successfully!


In [7]:
import pandas as pd

# Convert to DataFrame with column names
num_features = 28 * 28  # Total pixels in an image
column_names = [f"pixel_{i}" for i in range(num_features)]

reference_data = pd.DataFrame(X_test.reshape(-1, num_features)[:5000], columns=column_names)
current_data = pd.DataFrame(X_test.reshape(-1, num_features)[5000:], columns=column_names)

# Create a drift detection report
drift_report = Report(metrics=[DataDriftPreset()])
drift_report.run(reference_data=reference_data, current_data=current_data)

# Generate report
drift_report.save_html("drift_report.html")

# Log drift report to MLflow
mlflow.log_artifact("drift_report.html")


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


Observations -
* As per the report generated we do not see any data drift as of now whereas we have still given the threshold for the drift. If a new dataset comes and this is ran again then we can observe from the report if data drift occurs or not. This helps in monitoring anytime the data drift issue.
* We can notice that for three different models we have different model metrics in the mlflow. We chose adam and sgd optimizers and with different number of epochs to run on our dataset. 
* We log the accuracy for training and validation for each model. 
* Mlflow helps us to not only log but also observe the performance of the models by looking into all the required parameters to conclude which model may be best for a certain use case.
* We have also logged the drift report for our dataset which helps in evaluating when the retraining will be needed. If there is any change in the dataset tomorrow , the drift report can detect and we can analyse when retraining will be needed. We have kept a threshold of 0.5 for this detection and used evidently AI for the same.
* Evidently AI automatically detects drift in data distribution without needing manual feature selection.
* Evidently AI supports both feature drift (input data changes) and target drift (label distribution shifts).
* In real-world scenarios, input data may change due to seasonality, user behavior, or sensor degradation. Evidently AI makes it easy to monitor these changes without writing complex statistical tests manually.