In [1]:
import os  # Import the os module to interact with the operating system


In [2]:
%pwd  # This is a Jupyter Notebook magic command to display the current working directory


'c:\\Users\\ayupt\\Desktop\\Data Science Projects\\End to End Deployment\\Kidney-Disease-Classificaion-End-to-End-MLflow-DVC\\research'

In [3]:
os.chdir("../")  # Change the current working directory to the parent directory


In [4]:
%pwd  # Again, display the updated working directory to confirm the change


'c:\\Users\\ayupt\\Desktop\\Data Science Projects\\End to End Deployment\\Kidney-Disease-Classificaion-End-to-End-MLflow-DVC'

In [5]:
# Set the MLflow tracking server URI (DagsHub in this case)
os.environ["MLFLOW_TRACKING_URI"] = "https://dagshub.com/abhishekpatel16/Kidney-Disease-Classificaion-End-to-End-MLflow-DVC.mlflow"

# Set MLflow authentication credentials (username and password)
os.environ["MLFLOW_TRACKING_USERNAME"] = "abhishekpatel16"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "6e859ec1ff2f86d80684bc1de31e51d799188b7b"  # Replace with a secure method instead of hardcoding

In [6]:
import tensorflow as tf # Import TensorFlow library


In [7]:
# Load the pre-trained model from the specified file path
model = tf.keras.models.load_model("artifacts/training/model.h5")


In [8]:
# Import required modules
from dataclasses import dataclass  # Provides a decorator to create immutable data classes
from pathlib import Path  # Used for handling file system paths

# Define an immutable data class to store evaluation configuration
@dataclass(frozen=True)  # frozen=True makes the class immutable
class EvaluationConfig:
    path_of_model: Path  # Path to the trained model file
    training_data: Path  # Path to the training dataset
    all_params: dict  # Dictionary containing all hyperparameters and settings
    mlflow_uri: str  # URI for MLflow tracking
    params_image_size: list  # Image size parameter (e.g., [224, 224] for image input)
    params_batch_size: int  # Batch size for processing data

In [9]:
# Import all constants from the constants module inside cnnClassifier
# These constants may include paths, model parameters, and other fixed values
from cnnClassifier.constants import *  

# Import specific utility functions from the common module inside cnnClassifier.utils
from cnnClassifier.utils.common import (  
    read_yaml,        # Function to read YAML configuration files and return data as a dictionary
    create_directories,  # Function to create directories if they don’t exist
    save_json         # Function to save a dictionary or object into a JSON file
)


In [10]:
class ConfigurationManager:
    """
    ConfigurationManager is responsible for reading configuration files and setting up necessary directories.
    It also provides methods to retrieve different configurations required for model training and evaluation.
    """

    def __init__(
        self, 
        config_filepath=CONFIG_FILE_PATH,  # Default path for the configuration YAML file
        params_filepath=PARAMS_FILE_PATH   # Default path for the parameters YAML file
    ):
        """
        Initializes the ConfigurationManager by reading the configuration and parameter files,
        and ensuring that necessary directories exist.
        """
        self.config = read_yaml(config_filepath)  # Read the main configuration file into a dictionary
        self.params = read_yaml(params_filepath)  # Read the parameters file into a dictionary

        # Create required directories as specified in the configuration file
        create_directories([self.config.artifacts_root])

    def get_evaluation_config(self) -> EvaluationConfig:
        """
        Creates and returns an EvaluationConfig object with predefined paths and parameters.

        Returns:
            EvaluationConfig: A dataclass instance containing all necessary paths and evaluation parameters.
        """
        eval_config = EvaluationConfig(
            path_of_model="artifacts/training/model.h5",  # Path where the trained model is stored
            training_data="artifacts/data_ingestion/kidney-ct-scan-image",  # Path to training dataset
            mlflow_uri="https://dagshub.com/abhishekpatel16/Kidney-Disease-Classificaion-End-to-End-MLflow-DVC.mlflow",  # MLflow tracking URI
            all_params=self.params,  # Dictionary of all model parameters from params.yaml
            params_image_size=self.params.IMAGE_SIZE,  # Image size for preprocessing
            params_batch_size=self.params.BATCH_SIZE  # Batch size for model training/evaluation
        )
        return eval_config  # Return the configuration object


In [11]:
# Import TensorFlow library for building and training deep learning models
import tensorflow as tf  

# Import Path class from pathlib for handling file and directory paths
from pathlib import Path  

# Import MLflow for tracking experiments and managing model lifecycle
import mlflow  

# Import MLflow Keras module for logging and loading Keras models with MLflow
import mlflow.keras  

# Import urlparse from urllib.parse for parsing and handling URLs
from urllib.parse import urlparse  


In [12]:
class Evaluation:
    """
    Evaluation class to load a trained model, evaluate its performance, 
    and log results into MLflow.
    """

    def __init__(self, config: EvaluationConfig):
        """
        Initializes the Evaluation class with the provided configuration.

        Args:
            config (EvaluationConfig): Configuration settings for evaluation.
        """
        self.config = config  # Store evaluation configuration

    def _valid_generator(self):
        """
        Creates a validation data generator for model evaluation.
        """

        # Define preprocessing parameters for image data generator
        datagenerator_kwargs = dict(
            rescale=1./255,  # Normalize pixel values to [0, 1]
            validation_split=0.30  # Use 30% of the dataset for validation
        )

        # Define parameters for data loading
        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],  # Resize images to match model input size
            batch_size=self.config.params_batch_size,  # Batch size for validation data
            interpolation="bilinear"  # Use bilinear interpolation for resizing
        )

        # Create an image data generator for validation set
        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        # Load validation data from the dataset directory
        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data,  # Path to training data
            subset="validation",  # Load only validation subset
            shuffle=False,  # Do not shuffle validation data
            **dataflow_kwargs  # Pass additional arguments for data loading
        )

    @staticmethod
    def load_model(path: Path) -> tf.keras.Model:
        """
        Loads a pre-trained Keras model from the given file path.

        Args:
            path (Path): Path to the saved model file.

        Returns:
            tf.keras.Model: Loaded Keras model.
        """
        return tf.keras.models.load_model(path)

    def evaluation(self):
        """
        Evaluates the model on the validation dataset and saves the results.
        """
        self.model = self.load_model(self.config.path_of_model)  # Load trained model
        self._valid_generator()  # Initialize validation data generator
        self.score = self.model.evaluate(self.valid_generator)  # Evaluate model performance
        self.save_score()  # Save the evaluation results

    def save_score(self):
        """
        Saves evaluation scores (loss and accuracy) as a JSON file.
        """
        scores = {"loss": self.score[0], "accuracy": self.score[1]}  # Extract loss and accuracy
        save_json(path=Path("scores.json"), data=scores)  # Save scores as a JSON file

    def log_into_mlflow(self):
        """
        Logs model parameters and evaluation metrics into MLflow.
        """
        mlflow.set_registry_uri(self.config.mlflow_uri)  # Set MLflow tracking URI
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme  # Parse the tracking URL type

        with mlflow.start_run():  # Start a new MLflow run
            mlflow.log_params(self.config.all_params)  # Log all model parameters
            mlflow.log_metrics(
                {"loss": self.score[0], "accuracy": self.score[1]}  # Log evaluation metrics
            )

            # Check if MLflow is using a file-based storage
            if tracking_url_type_store != "file":
                # Register the model in MLflow Model Registry
                # If using MLflow's tracking server, models can be versioned and stored
                mlflow.keras.log_model(self.model, "model", registered_model_name="VGG16Model")
            else:
                # Log the model without registering (for local file storage)
                mlflow.keras.log_model(self.model, "model")


In [13]:
try:
    # Create an instance of ConfigurationManager to load configurations
    config = ConfigurationManager()

    # Retrieve the evaluation configuration settings
    eval_config = config.get_evaluation_config()

    # Create an Evaluation instance with the retrieved configuration
    evaluation = Evaluation(eval_config)

    # Perform model evaluation using the validation dataset
    evaluation.evaluation()

    # Log evaluation metrics and model details into MLflow
    evaluation.log_into_mlflow()

# Catch any exceptions that occur during the execution of the above code
except Exception as e:
    # Raise the caught exception to display the error message
    raise e


[2025-02-15 18:35:54,584: INFO: common: YAML file: config\config.yaml loaded successfully]
[2025-02-15 18:35:54,619: INFO: common: YAML file: params.yaml loaded successfully]
[2025-02-15 18:35:54,624: INFO: common: Created directory at: artifacts]
Found 139 images belonging to 2 classes.
[2025-02-15 18:37:01,858: INFO: common: JSON file saved at: scores.json]




INFO:tensorflow:Assets written to: C:\Users\ayupt\AppData\Local\Temp\tmpuj6e4scm\model\data\model\assets
[2025-02-15 18:37:10,730: INFO: builder_impl: Assets written to: C:\Users\ayupt\AppData\Local\Temp\tmpuj6e4scm\model\data\model\assets]


Registered model 'VGG16Model' already exists. Creating a new version of this model...
2025/02/15 18:38:35 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: VGG16Model, version 2
Created version '2' of model 'VGG16Model'.
