# Environment Set Up

In [1]:
import os
from dotenv import load_dotenv

# Loading environment variables from .env
load_dotenv()

# Changing directory to main directory for easy data access
working_directory = os.getenv("WORKING_DIRECTORY")
os.chdir(working_directory)

# Checking the change
%pwd

'/workspaces/TumorTracer'

In [2]:
from pathlib import Path

# Checking the change
print("Git folder exists:", Path(".git").exists())

Git folder exists: True


# 3. Model Training

In [3]:
from dataclasses import dataclass
from pathlib import Path
from cnnClassifier import get_logger
from typing import Optional, Dict, Any

# Initializing the logger
logger = get_logger()

@dataclass(frozen=True)
class ModelTrainingConfig:
    """
    Immutable configuration class to store all parameters 
    and paths required for model training. 
    """
    root_dir: Path                                          # Directory for training artifacts
    trained_model_path: Path                                # Final model output path
    updated_base_model: Path                                # Pretrained model with custom head
    training_data: Path                                     # Directory with training images
    validation_data: Path                                   # Directory with validation images
    params_augmentation: bool                               # Whether to apply augmentation
    params_image_size: tuple[int, int, int]                 # Input image size, e.g., [224, 224, 3]
    params_batch_size: int                                  # Batch size for training
    params_epochs: int                                      # Total epochs
    params_learning_rate: float                             # Learning rate for training
    params_if_augmentation: Optional[Dict[str, Any]] = None # Dict of augmentation hyperparameters

In [4]:
from cnnClassifier.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from cnnClassifier.utils.common import read_yaml, create_directories
from cnnClassifier import get_logger

# Initializing the logger
logger = get_logger()

class ConfigurationManager:
    def __init__(self, config_file_path=CONFIG_FILE_PATH, params_file_path=PARAMS_FILE_PATH) -> None:
        """
        Reads configuration files (config.yaml and params.yaml), 
        ensures necessary directories exist, and prepares structured config objects.

        Args:
        - config_file_path (str): Path to the config.yaml file.
        - params_file_path (str): Path to the params.yaml file.
        """
        # Validate and load config.yaml
        if not Path(config_file_path).exists():
            logger.error(f"Config file not found at: {config_file_path}")
            raise FileNotFoundError(f"Config file not found at: {config_file_path}")
        self.config = read_yaml(config_file_path)

        # Validate and load params.yaml
        if not Path(config_file_path).exists():
            logger.error(f"Params file not found at: {params_file_path}")
            raise FileNotFoundError(f"Params file not found at: {params_file_path}")
        self.params = read_yaml(params_file_path)

        logger.info(f"Loading configuration from {config_file_path} and parameters from {params_file_path}")

        # Create the root artifacts directory (if not already present)
        create_directories([self.config.artifacts_root])


    def get_training_config(self) -> ModelTrainingConfig:
        """
        Prepares and returns the ModelTrainingConfig object.

        Returns:
        - ModelTrainingConfig: Structured config for training the updated base model.
        """
        training_config = self.config.model_training
        training_params = self.params.model_training

        # Ensure the data_ingestion directory exists
        create_directories([training_config.root_dir])

        # Load augmentation params only if augmentation is enabled and params for it are present
        params_for_augmentation = {}
        if training_params.AUGMENTATION and hasattr(training_params, "AUGMENTATION_PARAMS"):
            params_for_augmentation = dict(training_params.AUGMENTATION_PARAMS )

        training_config = ModelTrainingConfig(
            root_dir=Path(training_config.root_dir),
            trained_model_path=Path(training_config.trained_model_path),
            updated_base_model=Path(training_config.updated_model_path),
            training_data=Path(training_config.training_dataset),
            validation_data=Path(training_config.validation_dataset),
            params_augmentation=training_params.AUGMENTATION,
            params_image_size=tuple(training_params.IMAGE_SIZE),
            params_batch_size=training_params.BATCH_SIZE,
            params_epochs=training_params.EPOCHS,
            params_learning_rate=training_params.LEARNING_RATE,
            params_if_augmentation=params_for_augmentation,
        )

        logger.info(f"ModelTrainingConfig created with: {training_config}")

        return training_config

In [5]:
import os
import tensorflow as tf
from math import ceil
from typing import Union
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from pathlib import Path

from cnnClassifier.utils.common import create_directories, save_json
from cnnClassifier import get_logger

# Initializing the logger
logger = get_logger()

class ModelTraining:
    """
    Initializes training pipeline with given configuration.
    """
    def __init__(self, config: ModelTrainingConfig) -> None:
        self.config = config
        self.output_model = None
        self.training_generator = None
        self.valid_generator = None
        self.training_images = None
        self.validation_images = None
    
    
    def get_base_model(self) -> None:
        """
        Loads the base model form specified path.
        """
        model_path = Path(self.config.updated_base_model)

        if not model_path.exists():
            logger.error(f"Could not find model at {model_path}. Run the Base Model pipeline stage first.")
            raise FileNotFoundError(f"Could not find model at {model_path}. Run the Base Model pipeline stage first.")
        
        try:
            self.output_model = tf.keras.models.load_model(model_path)
            logger.info(f"Successfully loaded the base model from {model_path}.")

        except Exception as exception_error:
            logger.error(f"Unexpected error while loading the update base model at {model_path}: {exception_error}")
            raise 
            
    
    def get_data_generators(self) -> None:
        """
        Update train and validation data generators using ImageDataGenerator.
        Applies augmentation only on training data if enabled.
        """
        try:
            logger.info("Preparing ImageDataGenerators...")

            train_datagen = ImageDataGenerator(rescale=1.0/255, **self.config.params_if_augmentation)
            valid_datagen = ImageDataGenerator(rescale=1.0/255)

            self.training_generator = self._build_generator(train_datagen, self.config.training_data, "Train")
            self.valid_generator = self._build_generator(valid_datagen, self.config.validation_data, "Valid")

            # Ensure class-to-index mapping is consistent
            if self.training_generator.class_indices != self.valid_generator.class_indices:
                logger.error("Mismatch in class indices between train and validation generators!")
                raise ValueError("Mismatch in class indices between train and validation generators!")

            logger.info("ImageDataGenerators created successfully.")
        
        except Exception as exception_error:
            logger.error(f"Unexpected error while creating data generators: {exception_error}")
            raise


    def train(self) -> None:
        if self.output_model == None:
            logger.error("Base model not found. Run get_base_model() before calling train().")
            raise ValueError("Base model not found. Run get_base_model() before calling train().")
        
        try:
            logger.info("Initializing model training...")

            # Enabling Eager Execution (optional in TF 2.x) due to library requirement
            tf.config.run_functions_eagerly(True)

            # Recompile model with a fresh optimizer (required after loading)
            self.output_model.compile(
                optimizer=tf.keras.optimizers.Adam(learning_rate=self.config.params_learning_rate),
                loss=tf.keras.losses.CategoricalCrossentropy(),
                metrics=["accuracy"]
            )

            # Counting the images in each of the datasets
            self.training_images = self._count_images_in_directory(self.config.training_data)
            self.validation_images = self._count_images_in_directory(self.config.validation_data)

            # Fitting the model
            self.output_model.fit(
                self.training_generator,
                validation_data=self.valid_generator,
                epochs=self.config.params_epochs,
                steps_per_epoch=ceil(self.training_images / self.config.params_batch_size),
                validation_steps=ceil(self.validation_images / self.config.params_batch_size),
            )
        
            logger.info("Successfully trained model based on provided parameters.")

            self._save_model(save_path=self.config.trained_model_path, model=self.output_model)

        except Exception as exception_error:
            logger.error(f"Unexpected error while training the model: {exception_error}")
            raise

    
    def save_class_indices(self) -> None:
        """
        Saves the class index mapping as a JSON file for future reference.
        """
        if self.training_generator == None:
            logger.error("Class indices not found. Run get_data_generators() before calling save_class_indices().")
            raise ValueError("Class indices not found. Run get_data_generators() before calling save_class_indices().")

        try:
            save_path = Path(self.config.root_dir / "class_indices.json")
            save_json(save_path=save_path, data=self.training_generator.class_indices)

        except Exception as exception_error:
            logger.error(f"Unexpected error while saving class indices: {exception_error}")
            raise


    def _build_generator(self, datagen: ImageDataGenerator, data_path: Union[str, Path], tag: str) -> None:
        """
        Helper to build a flow_from_directory generator with consistent options.
        """
        try:
            data_path = Path(data_path)

            if not data_path.exists():
                logger.error(f"{tag.title()} directory not found: {data_path}")
                raise FileNotFoundError(f"{tag.title()} directory not found: {data_path}")

            # Building generator
            generator_unit = datagen.flow_from_directory(
                directory=data_path,
                target_size=self.config.params_image_size[:2],
                batch_size=self.config.params_batch_size,
                class_mode="categorical",
                shuffle=True,
            )

            return generator_unit

        except Exception as exception_error:
            logger.error(f"Unexpected error while build generator: {exception_error}")
            raise
    

    @staticmethod
    def _count_images_in_directory(directory_path: Union[str, Path]) -> int:
        """
        Counts the total number of image files in a directory and its subfolders.

        Args:
        - directory_path (str or Path): Path to the dataset root (e.g., train or valid)

        Returns:
        - int: Total number of images found
        """
        try:
            directory_path = Path(directory_path)
            total_images = 0

            if not directory_path.exists():
                logger.error(f"Could not find the path {directory_path}")
                raise FileNotFoundError(f"Could not find the path {directory_path}")
            
            for _, _, files in os.walk(directory_path):
                total_images += len([f for f in files if f.lower().endswith((".png", ".jpg", ".jpeg"))])

            if total_images == 0:
                logger.error(f"No images found in {directory_path}")
                raise ValueError(f"No images found in {directory_path}")       

            return total_images

        except Exception as exception_error:
            logger.error(f"Unexpected error while counting images in directory: {exception_error}")
            raise


    @staticmethod
    def _save_model(save_path: Path, model: tf.keras.Model) -> None:
        """
        Saves a given model to the specified path.
        """
        try:
            create_directories([save_path.parent])
            model.save(save_path)
            logger.info(f"Model saved at: {save_path}")
        
        except Exception as exception_error:
            logger.error(f"Unexpected error while saving the model at {save_path}: {exception_error}")
            raise

2025-07-04 06:54:05.878007: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-04 06:54:08.661485: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-04 06:54:18.371703: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
try:
    config_manager = ConfigurationManager()
    training_config = config_manager.get_training_config()

    training_constructor = ModelTraining(config=training_config)
    training_constructor.get_base_model()
    training_constructor.get_data_generators()
    training_constructor.train()
    training_constructor.save_class_indices()

except Exception as exception_error:
    logger.exception(f"Unexpected error during model training pipeline: {exception_error}")
    raise

[2025-07-04 06:54:44,482: INFO: common: YAML file: config/config.yaml loaded successfully]
[2025-07-04 06:54:44,902: INFO: common: YAML file: params.yaml loaded successfully]
[2025-07-04 06:54:44,903: INFO: 3329300415: Loading configuration from config/config.yaml and parameters from params.yaml]
[2025-07-04 06:54:44,971: INFO: common: Directory: artifacts created successfully.]
[2025-07-04 06:54:44,972: INFO: common: Directory: artifacts/model_training created successfully.]
[2025-07-04 06:54:44,974: INFO: 3329300415: ModelTrainingConfig created with: ModelTrainingConfig(root_dir=PosixPath('artifacts/model_training'), trained_model_path=PosixPath('artifacts/model_training/trained_model.h5'), updated_base_model=PosixPath('artifacts/base_model/updated_base_model.h5'), training_data=PosixPath('artifacts/data_ingestion/Data/train'), validation_data=PosixPath('artifacts/data_ingestion/Data/valid'), params_augmentation=True, params_image_size=(224, 224, 3), params_batch_size=16, params_epoc



[2025-07-04 06:54:48,340: INFO: 1183082594: Successfully loaded the base model from artifacts/base_model/updated_base_model.h5.]


INFO:cnnClassifierLogger_running:Successfully loaded the base model from artifacts/base_model/updated_base_model.h5.


[2025-07-04 06:54:48,341: INFO: 1183082594: Preparing ImageDataGenerators...]


INFO:cnnClassifierLogger_running:Preparing ImageDataGenerators...


Found 613 images belonging to 4 classes.
Found 72 images belonging to 4 classes.
[2025-07-04 06:54:49,055: INFO: 1183082594: ImageDataGenerators created successfully.]


INFO:cnnClassifierLogger_running:ImageDataGenerators created successfully.


[2025-07-04 06:54:49,059: INFO: 1183082594: Initializing model training...]


INFO:cnnClassifierLogger_running:Initializing model training...
  self._warn_if_super_not_called()


Epoch 1/5


2025-07-04 06:54:52.948646: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 205520896 exceeds 10% of free system memory.
2025-07-04 06:54:53.925039: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 205520896 exceeds 10% of free system memory.
2025-07-04 06:54:54.091809: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 205520896 exceeds 10% of free system memory.
2025-07-04 06:54:54.196338: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 205520896 exceeds 10% of free system memory.
2025-07-04 06:54:55.182111: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 205520896 exceeds 10% of free system memory.


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m868s[0m 22s/step - accuracy: 0.3458 - loss: 13.3627 - val_accuracy: 0.5139 - val_loss: 5.0941
Epoch 2/5
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m800s[0m 21s/step - accuracy: 0.5893 - loss: 5.4325 - val_accuracy: 0.6944 - val_loss: 2.4442
Epoch 3/5
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m802s[0m 21s/step - accuracy: 0.7609 - loss: 1.4935 - val_accuracy: 0.8056 - val_loss: 1.6772
Epoch 4/5
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m799s[0m 20s/step - accuracy: 0.8403 - loss: 1.0734 - val_accuracy: 0.8333 - val_loss: 1.7113
Epoch 5/5
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m798s[0m 20s/step - accuracy: 0.7882 - loss: 1.7114 - val_accuracy: 0.6528 - val_loss: 5.4372
[2025-07-04 08:02:38,408: INFO: 1183082594: Successfully trained model based on provided parameters.]


INFO:cnnClassifierLogger_running:Successfully trained model based on provided parameters.


[2025-07-04 08:02:38,410: INFO: common: Directory: artifacts/model_training created successfully.]


INFO:cnnClassifierLogger_test:Directory: artifacts/model_training created successfully.


[2025-07-04 08:02:38,580: INFO: 1183082594: Model saved at: artifacts/model_training/trained_model.h5]


INFO:cnnClassifierLogger_running:Model saved at: artifacts/model_training/trained_model.h5


[2025-07-04 08:02:38,585: INFO: common: Directory: artifacts/model_training created successfully.]


INFO:cnnClassifierLogger_test:Directory: artifacts/model_training created successfully.


[2025-07-04 08:02:38,589: INFO: common: JSON file saved at: artifacts/model_training/class_indices.json]


INFO:cnnClassifierLogger_test:JSON file saved at: artifacts/model_training/class_indices.json
