In [1]:
import os  # Import the os module to interact with the operating system


In [2]:
%pwd  # This is a Jupyter Notebook magic command to display the current working directory


'c:\\Users\\ayupt\\Desktop\\Data Science Projects\\End to End Deployment\\Kidney-Disease-Classificaion-End-to-End-MLflow-DVC\\research'

In [3]:
os.chdir("../")  # Change the current working directory to the parent directory


In [4]:
%pwd  # Again, display the updated working directory to confirm the change


'c:\\Users\\ayupt\\Desktop\\Data Science Projects\\End to End Deployment\\Kidney-Disease-Classificaion-End-to-End-MLflow-DVC'

In [5]:
# Import necessary modules
from dataclasses import dataclass  # Used to define immutable (frozen) data structures
from pathlib import Path  # Provides a convenient way to handle file system paths

# Define a dataclass to store training configuration settings
@dataclass(frozen=True)  # `frozen=True` makes the class immutable
class TrainingConfig:
    root_dir: Path  # Directory to store training-related artifacts
    trained_model_path: Path  # Path to save the trained model after training
    updated_base_model_path: Path  # Path to the updated base model file
    training_data: Path  # Path to the dataset used for training
    params_epochs: int  # Number of epochs for training the model
    params_batch_size: int  # Size of each training batch
    params_is_augmentation: bool  # Boolean flag indicating whether data augmentation is applied
    params_image_size: list  # Dimensions of input images (e.g., [224, 224, 3])


In [6]:
''' 
Importing required modules for CNN classifier setup
'''

# Importing constants (e.g., paths, hyperparameters, model configurations) from a constants module
from cnnClassifier.constants import *

# Importing utility functions for reading YAML files and creating directories
from cnnClassifier.utils.common import read_yaml, create_directories

# Importing TensorFlow for deep learning model creation and training
import tensorflow as tf


In [7]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        """
        Initializes the ConfigurationManager class.
        Reads YAML configuration and parameter files and ensures necessary directories exist.
        
        :param config_filepath: Path to the configuration file.
        :param params_filepath: Path to the parameters file.
        """
        self.config = read_yaml(config_filepath)  # Read and store configuration data from the YAML file
        self.params = read_yaml(params_filepath)  # Read and store parameter values from the YAML file

        # Create the root directory for storing artifacts if it does not exist
        create_directories([self.config.artifacts_root])

    def get_training_config(self) -> TrainingConfig:
        """
        Retrieves and prepares the training configuration based on the loaded configuration and parameters.
        
        :return: An instance of TrainingConfig containing structured training parameters.
        """
        training = self.config.training  # Extract the training section from the configuration file
        prepare_base_model = self.config.prepare_base_model  # Extract model preparation section from the config
        params = self.params  # Extract parameters related to training

        # Define the path where the training data is located
        training_data = os.path.join(self.config.data_ingestion.unzip_dir, "kidney-ct-scan-image")
        
        # Ensure the root directory for training exists, creating it if necessary
        create_directories([Path(training.root_dir)])

        # Create and return a TrainingConfig object, encapsulating all necessary training parameters
        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),  # Path to the training root directory
            trained_model_path=Path(training.trained_model_path),  # Path where the trained model will be saved
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),  # Path for the updated base model
            training_data=Path(training_data),  # Path to the training dataset
            params_epochs=params.EPOCHS,  # Number of epochs for training
            params_batch_size=params.BATCH_SIZE,  # Batch size for training
            params_is_augmentation=params.AUGMENTATION,  # Boolean indicating whether data augmentation is used
            params_image_size=params.IMAGE_SIZE  # Image size used in training
        )

        return training_config  # Return the fully configured TrainingConfig instance


In [8]:
import os  # Provides functions for interacting with the operating system
import urllib.request as request  # Module for fetching data across the web
from zipfile import ZipFile  # Used for extracting zip files
import tensorflow as tf  # TensorFlow library for machine learning and deep learning tasks
import time  # Module for time-related functions

In [9]:
class Training:
    def __init__(self, config: TrainingConfig):
        """
        Initializes the Training class with configuration parameters.
        
        :param config: An instance of TrainingConfig containing training settings.
        """
        self.config = config

    def get_base_model(self):
        """
        Loads the pre-trained model from the specified path.
        """
        self.model = tf.keras.models.load_model(
            self.config.updated_base_model_path
        )

    def train_valid_generator(self):
        """
        Prepares training and validation data generators with data augmentation.
        """
        # Define standard parameters for image data generators
        datagenerator_kwargs = dict(
            rescale=1./255,  # Normalize pixel values
            validation_split=0.20  # Use 20% of the data for validation
        )

        # Define parameters for resizing images and batch processing
        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],  # Target image size excluding channels
            batch_size=self.config.params_batch_size,  # Batch size for training
            interpolation="bilinear"  # Interpolation method for resizing images
        )

        # Create a validation data generator
        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        # Load validation data from directory
        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="validation",
            shuffle=False,  # Do not shuffle validation data
            **dataflow_kwargs
        )

        # Apply data augmentation if specified in configuration
        if self.config.params_is_augmentation:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=40,  # Random rotation up to 40 degrees
                horizontal_flip=True,  # Randomly flip images horizontally
                width_shift_range=0.2,  # Random horizontal shift
                height_shift_range=0.2,  # Random vertical shift
                shear_range=0.2,  # Shear transformation
                zoom_range=0.2,  # Random zoom
                **datagenerator_kwargs
            )
        else:
            train_datagenerator = valid_datagenerator  # Use validation generator if no augmentation

        # Load training data from directory
        self.train_generator = train_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="training",
            shuffle=True,  # Shuffle training data
            **dataflow_kwargs
        )

    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        """
        Saves the trained model to the specified path.
        
        :param path: Path where the model will be saved.
        :param model: Trained TensorFlow model.
        """
        model.save(path)

    def train(self):
        """
        Trains the model using the prepared data generators.
        """
        # Calculate the number of steps per epoch for training and validation
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size

        # Train the model using the fit method
        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epochs,  # Number of training epochs
            steps_per_epoch=self.steps_per_epoch,  # Steps per epoch
            validation_steps=self.validation_steps,  # Steps per validation cycle
            validation_data=self.valid_generator  # Validation dataset
        )

        # Save the trained model
        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )


In [10]:
try:
    # Initialize the configuration manager
    config = ConfigurationManager()
    
    # Retrieve the training configuration settings
    training_config = config.get_training_config()
    
    # Initialize the Training class with the configuration
    training = Training(config=training_config)
    
    # Load the base model
    training.get_base_model()
    
    # Prepare the data generators for training and validation
    training.train_valid_generator()
    
    # Start the training process
    training.train()
    
except Exception as e:
    # Raise the exception to identify and debug any errors
    raise e


[2025-02-15 14:53:16,618: INFO: common: YAML file: config\config.yaml loaded successfully]
[2025-02-15 14:53:16,642: INFO: common: YAML file: params.yaml loaded successfully]
[2025-02-15 14:53:16,646: INFO: common: Created directory at: artifacts]
[2025-02-15 14:53:16,649: INFO: common: Created directory at: artifacts\training]
Found 93 images belonging to 2 classes.
Found 372 images belonging to 2 classes.
