In [1]:
# import os for creating directories and file paths in the notebook environment
import os

In [2]:
# checking the current working directory of the notebook environment
%pwd

'g:\\Jagadish\\Production-Ready-Chest-Cancer-Detection-Deep-Learning-Model-MLOps-with-MLflow-DVC-CI-CD-and-AWS\\research'

In [3]:
# changing the current working directory to the parent directory of the notebook environment
os.chdir("../")

In [4]:
# checking the current working directory of the notebook environment
%pwd

'g:\\Jagadish\\Production-Ready-Chest-Cancer-Detection-Deep-Learning-Model-MLOps-with-MLflow-DVC-CI-CD-and-AWS'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    """
    Configuration class for training parameters and file paths.

    Attributes:
        root_dir (Path): Directory where the project is located.
        trained_model_path (Path): Path to the trained model.
        updated_base_model_path (Path): Path to the updated base model.
        training_data (Path): Path to the training data.
        params_epochs (int): Number of training epochs.
        params_batch_size (int): Size of each training batch.
        params_is_augmentation (bool): Indicates if data augmentation is applied.
        params_image_size (list): Size of the images used for training.
    """
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_data: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list


In [6]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories
import tensorflow as tf

In [7]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        """
        Initializes the ConfigurationManager with the file paths for the configuration and parameters.

        Args:
            config_filepath (str): Path to the configuration YAML file.
            params_filepath (str): Path to the parameters YAML file.
        """
        self.config = read_yaml(config_filepath)  # Read configuration from YAML file
        self.params = read_yaml(params_filepath)  # Read parameters from YAML file

        # Create directories specified in the configuration
        create_directories([self.config.artifacts_root])  

    def get_training_config(self) -> TrainingConfig:
        """
        Creates a TrainingConfig object with the training parameters and file paths.

        Returns:
            TrainingConfig: An instance of the TrainingConfig class containing training configuration details.
        """
        training = self.config.training  # Retrieve training configuration
        prepare_base_model = self.config.prepare_base_model  # Retrieve base model preparation configuration
        params = self.params  # Retrieve training parameters

        # Construct the path to the training data
        training_data = os.path.join(self.config.data_ingestion.unzip_dir, "Chest-CT-Scan-data")

        # Create the directory for the training root if it doesn't exist
        create_directories([Path(training.root_dir)])  

        # Create an instance of TrainingConfig with the retrieved configurations and parameters
        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
            training_data=Path(training_data),
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_is_augmentation=params.AUGMENTATION,
            params_image_size=params.IMAGE_SIZE
        )

        return training_config


In [8]:
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time

In [9]:
class Training:
    def __init__(self, config: TrainingConfig):
        """
        Initializes the Training class with the given configuration.

        Args:
            config (TrainingConfig): Configuration parameters for training.
        """
        self.config = config  # Store the training configuration

    def get_base_model(self):
        """
        Loads the base model from the specified path.
        """
        self.model = tf.keras.models.load_model(
            self.config.updated_base_model_path
        )

    def train_valid_generator(self):
        """
        Creates training and validation data generators with or without augmentation
        based on the configuration parameters.
        """
        # Common data generator arguments
        datagenerator_kwargs = dict(
            rescale=1./255,  # Rescale pixel values
            validation_split=0.20  # Split 20% of data for validation
        )

        # Arguments for data flow
        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],  # Target image size
            batch_size=self.config.params_batch_size,  # Batch size
            interpolation="bilinear"  # Interpolation method
        )

        # Create validation data generator
        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        # Flow validation data from directory
        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="validation",
            shuffle=False,
            **dataflow_kwargs
        )

        # Conditional data augmentation
        if self.config.params_is_augmentation:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=40,  # Rotate images
                horizontal_flip=True,  # Flip images horizontally
                width_shift_range=0.2,  # Shift images horizontally
                height_shift_range=0.2,  # Shift images vertically
                shear_range=0.2,  # Shear images
                zoom_range=0.2,  # Zoom images
                **datagenerator_kwargs
            )
        else:
            train_datagenerator = valid_datagenerator  # Use validation generator if no augmentation

        # Flow training data from directory
        self.train_generator = train_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="training",
            shuffle=True,
            **dataflow_kwargs
        )

    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        """
        Saves the trained model to the specified path.

        Args:
            path (Path): Path where the model will be saved.
            model (tf.keras.Model): Trained TensorFlow model.
        """
        model.save(path)

    def train(self):
        """
        Trains the model using the training and validation generators.

        Steps are calculated based on the number of samples and batch size.
        The trained model is saved to the specified path.
        """
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size

        # Train the model
        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epochs,
            steps_per_epoch=self.steps_per_epoch,
            validation_steps=self.validation_steps,
            validation_data=self.valid_generator
        )

        # Save the trained model
        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )


In [10]:
try:
    # Initialize the ConfigurationManager with default paths
    config = ConfigurationManager()
    
    # Retrieve the training configuration
    training_config = config.get_training_config()
    
    # Initialize the Training class with the retrieved configuration
    training = Training(config=training_config)
    
    # Load the base model
    training.get_base_model()
    
    # Create training and validation data generators
    training.train_valid_generator()
    
    # Train the model using the generators
    training.train()
    
except Exception as e:
    # Raise any exceptions encountered during the process
    raise e


[2024-12-15 18:39:50,935: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-12-15 18:39:50,976: INFO: common: yaml file: params.yaml loaded successfully]
[2024-12-15 18:39:51,167: INFO: common: created directory at: artifacts]
[2024-12-15 18:39:51,190: INFO: common: created directory at: artifacts\training]
Found 68 images belonging to 2 classes.
Found 275 images belonging to 2 classes.
