

------

# **`Model Evaluation`**

------




In [1]:
!pwd

/e/Practice python/Chest Cancer Classification Using MLflow and DVC/research


In [2]:
# change directory
import os
os.chdir("../")

In [3]:
!pwd

/e/Practice python/Chest Cancer Classification Using MLflow and DVC


### **`Setting Up Model Evaluation Configrations`**

In [4]:
import os
import json
import numpy as np
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Define the environment variables at the module level
MLFLOW_TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI")
MLFLOW_TRACKING_USERNAME = os.getenv("MLFLOW_TRACKING_USERNAME")
MLFLOW_TRACKING_PASSWORD = os.getenv("MLFLOW_TRACKING_PASSWORD")

In [5]:
import tensorflow as tf

# load the model
model = tf.keras.models.load_model("artifacts/training/model.h5")
# Define metrics as per your original training setup
model.compile(optimizer='adam',  # Use the optimizer you trained with
              loss='sparse_categorical_crossentropy',  # Or the loss function you used
              metrics=['accuracy'])  # Or the metrics you used




In [6]:
# define model evaluation entity class

from dataclasses import dataclass
from pathlib import Path

# Define a frozen dataclass to store evaluation configuration settings
@dataclass(frozen=True)
class EvaluationConfig:
    # Path to the trained model
    path_of_model: Path
    
    # Path to the training data
    training_data: Path
    
    # A dictionary to hold all parameters related to the evaluation
    all_params: dict
    
    # URI for the MLflow tracking server
    mlflow_uri: str
    
    # List specifying the image size parameters
    params_image_size: list
    
    # Integer specifying the batch size for evaluation
    params_batch_size: int

In [7]:
from chest_cancer_classifier.constants import *
from chest_cancer_classifier.utils.common_functions import read_yaml, create_directories, save_json

In [8]:
# set up configuration manager

class ConfigurationManager:
    def __init__(
        self, 
        config_filepath=CONFIG_FILE_PATH,  # Path to the configuration file
        params_filepath=PARAMS_FILE_PATH   # Path to the parameters file
    ):
        # Read YAML configuration files and store the contents in self.config and self.params
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        # Create necessary directories defined in the configuration
        create_directories([self.config.artifacts_root])

    def get_evaluation_config(self) -> EvaluationConfig:
        # Create an instance of EvaluationConfig with specific parameters
        eval_config = EvaluationConfig(
            path_of_model="artifacts/training/model.h5",  # Path to the trained model file
            training_data="artifacts/data_ingestion/Chest-CT-Scan-data",  # Path to the training data
            mlflow_uri="https://dagshub.com/muhammadadilnaeem/Chest-Cancer-Classification-Using-MLflow-and-DVC.mlflow",  # MLflow tracking URI
            all_params=self.params,  # All parameters loaded from the parameters file
            params_image_size=self.params.IMAGE_SIZE,  # Image size parameter from the parameters
            params_batch_size=self.params.BATCH_SIZE  # Batch size parameter from the parameters
        )
        return eval_config  # Return the evaluation configuration instance

In [None]:
# Import the mlflow library for tracking machine learning experiments
import mlflow

# Import the Keras module from mlflow for tracking Keras models specifically
import mlflow.keras

# Import TensorFlow, a library for building and training machine learning models
import tensorflow as tf

# Import Path from the pathlib module to handle filesystem paths in an object-oriented way
from pathlib import Path

# Import urlparse from the urllib.parse module to parse URLs into components
from urllib.parse import urlparse


In [None]:
class Evaluation:
    def __init__(self, config: EvaluationConfig):
        # Initialize the Evaluation class with the provided configuration
        self.config = config

    def _valid_generator(self):
        # Create a data generator for validation images with specific configurations
        datagenerator_kwargs = dict(
            rescale=1./255,  # Normalize pixel values to [0, 1]
            validation_split=0.30  # Use 30% of the data for validation
        )

        # Specify parameters for the data flow generator
        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],  # Resize images to the target size, excluding the last dimension (channels)
            batch_size=self.config.params_batch_size,  # Set the batch size for loading images
            interpolation="bilinear"  # Use bilinear interpolation for resizing
        )

        # Create a Keras ImageDataGenerator for validation data
        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        # Generate batches of validation data from the specified directory
        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data,  # Directory containing validation images
            subset="validation",  # Specify that this is the validation subset
            shuffle=False,  # Do not shuffle the data
            class_mode="sparse",  # Set to "sparse" for integer labels
            **dataflow_kwargs  # Pass the data flow parameters
        )

    @staticmethod
    def load_model(path: Path) -> tf.keras.Model:
        # Load a Keras model from the specified path
        return tf.keras.models.load_model(path)

    def evaluation(self):
        # Load the model specified in the configuration
        self.model = self.load_model(self.config.path_of_model)

        # Recompile the model with binary_crossentropy to ensure correct configuration for binary classification
        self.model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
        
        # Create the validation data generator
        self._valid_generator()
        
        # Evaluate the model on the validation data
        self.score = self.model.evaluate(self.valid_generator)
        
        # Save the evaluation scores
        self.save_score()

    def save_score(self):
        # Save the evaluation scores (loss and accuracy) to a JSON file
        scores = {"loss": self.score[0], "accuracy": self.score[1]}
        save_json(path=Path("scores.json"), data=scores)

    def log_into_mlflow(self):
        # Set the MLflow tracking URI from the configuration
        mlflow.set_registry_uri(self.config.mlflow_uri)
        
        # Parse the tracking URI to determine the type of storage used
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
        
        # Set the experiment name
        experiment_name = "Model Evaluation Experimentation" 
        mlflow.set_experiment(experiment_name)

        # Start a new MLflow run to log parameters and metrics
        with mlflow.start_run(run_name="Model Evaluation"):
            # Log parameters from the configuration
            mlflow.log_params(self.config.all_params)
            # Log evaluation metrics (loss and accuracy)
            mlflow.log_metrics(
                {"loss": self.score[0], "accuracy": self.score[1]}
            )
            # Check if the tracking URL is not a file store
            if tracking_url_type_store != "file":
                # Register the model with MLflow
                mlflow.keras.log_model(self.model, "model", registered_model_name="VGG16Model")
                                
            else:
                # Log the model without registration if using file store
                mlflow.keras.log_model(self.model, "model")


In [11]:
try:
    # Initialize the ConfigurationManager to load configurations
    config = ConfigurationManager()
    
    # Retrieve the evaluation configuration from the ConfigurationManager
    eval_config = config.get_evaluation_config()
    
    # Create an instance of the Evaluation class using the evaluation configuration
    evaluation = Evaluation(eval_config)
    
    # Perform the evaluation process (load model, generate validation data, evaluate)
    evaluation.evaluation()
    
    # Log the evaluation results and model information to MLflow
    evaluation.log_into_mlflow()

except Exception as e:
    # Catch any exceptions that occur during the process and re-raise them
    raise e



Found 102 images belonging to 2 classes.


  self._warn_if_super_not_called()


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 5s/step - accuracy: 1.0000 - loss: 0.0394


Registered model 'VGG16Model' already exists. Creating a new version of this model...
2024/11/13 21:36:04 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: VGG16Model, version 4
Created version '4' of model 'VGG16Model'.
2024/11/13 21:36:04 INFO mlflow.tracking._tracking_service.client: 🏃 View run Model Evaluation at: https://dagshub.com/muhammadadilnaeem/Chest-Cancer-Classification-Using-MLflow-and-DVC.mlflow/#/experiments/2/runs/7f9f4d2eff2742d89a13cd2320598f23.
2024/11/13 21:36:04 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/muhammadadilnaeem/Chest-Cancer-Classification-Using-MLflow-and-DVC.mlflow/#/experiments/2.


In [14]:
import mlflow
import numpy as np
from keras.preprocessing import image

# Define the path to the logged MLflow model
logged_model = 'runs:/7f9f4d2eff2742d89a13cd2320598f23/model'

# Load the model as an MLflow PyFunc model
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Load and preprocess a single image
img_path = r'E:\Practice python\Chest Cancer Classification Using MLflow and DVC\artifacts\data_ingestion\Chest-CT-Scan-data\adenocarcinoma\000005 (3).png'
img = image.load_img(img_path, target_size=(224, 224))

# Convert the image to an array and preprocess
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array /= 255.0

# Predict on the preprocessed image data
predictions = loaded_model.predict(img_array)
print("Single Image Prediction:", predictions)

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 6/6 [02:10<00:00, 21.77s/it]  
  saveable.load_own_variables(weights_store.get(inner_path))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Single Image Prediction: [[0.9977822 0.0022178]]


In [13]:
import tensorflow as tf
print(tf.__version__)
print(tf.keras.__version__)

2.18.0
3.6.0


----