In [None]:
import os
import shutil
from typing import Optional

In [2]:
os.chdir("../")


In [3]:
%pwd

'f:\\Personal Project\\Medical-Image-Segmentation'

In [30]:
from dataclasses import dataclass

from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    copy_trained_model_path: Path
    training_data: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list

In [None]:
from LiverTumorSegmentation.constants import *
from LiverTumorSegmentation.utils.common import read_yaml, create_directories

In [32]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath=CONFIG_FILE_PATH,
        params_filepath=PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    
    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        training_data = self.config.training.training_data
        create_directories([Path(training.root_dir)])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
            copy_trained_model_path=Path(training.copy_trained_model_path),
            training_data=Path(training_data),
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_is_augmentation=params.AUGMENTATION,
            params_image_size=params.IMAGE_SIZE
        )
        return training_config
    

In [33]:
import tensorflow as tf

In [34]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config
        self.model: Optional[tf.keras.Model] = None

    def get_base_model(self):
        """Load the base model from the updated base model path."""
        self.model = tf.keras.models.load_model(
            self.config.updated_base_model_path
        )

    def train_valid_generator(self):
        """Create train and validation data generators."""
        # Check if training data directory exists
        if not self.config.training_data.exists():
            raise FileNotFoundError(
                f"Training data directory not found: {self.config.training_data}\n"
                f"Please ensure data ingestion has been completed and the directory exists."
            )
        
        datagenerator_kwargs = dict(
            rescale=1./255,
            validation_split=0.20
        )

        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],
            batch_size=self.config.params_batch_size,
            interpolation="bilinear"
        )

        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="validation",
            shuffle=False,
            **dataflow_kwargs
        )

        if self.config.params_is_augmentation:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=40,
                horizontal_flip=True,
                width_shift_range=0.2,
                height_shift_range=0.2,
                shear_range=0.2,
                zoom_range=0.2,
                **datagenerator_kwargs
            )
        else:
            train_datagenerator = valid_datagenerator

        self.train_generator = train_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="training",
            shuffle=True,
            **dataflow_kwargs
        )
        
        # Validate that generators have samples
        if self.train_generator.samples == 0:
            error_msg = (
                f"\n‚ùå No training images found in {self.config.training_data}\n\n"
                f"ImageDataGenerator expects a CLASSIFICATION directory structure:\n"
                f"  {self.config.training_data}/\n"
                f"    class1/\n"
                f"      image1.jpg\n"
                f"      image2.jpg\n"
                f"    class2/\n"
                f"      image1.jpg\n"
                f"      image2.jpg\n\n"
                f"‚ö†Ô∏è  This is a SEGMENTATION project. You have two options:\n\n"
                f"Option 1: Organize your data into class subdirectories\n"
                f"  - Create subdirectories for each class\n"
                f"  - Place images in their respective class folders\n\n"
                f"Option 2: Use a segmentation data loader\n"
                f"  - See artifacts/sample/train_sample.py for an example\n"
                f"  - Uses PKLSegmentationDataset for pickle-based segmentation data\n\n"
                f"üí° Run the diagnostic cell above to see what data structure you have."
            )
            raise ValueError(error_msg)
        
        if self.valid_generator.samples == 0:
            raise ValueError(
                f"No validation images found in {self.config.training_data}\n"
                f"Please ensure there are enough images for a 20% validation split."
            )

    @staticmethod
    def save_model(path: Path, model: tf.keras.Model) -> None:
        """Save the trained model to the specified path."""
        path.parent.mkdir(parents=True, exist_ok=True)
        model.save(path)

    def train(self):
        """Train the model using the configured generators."""
        # Validate generators before training
        if not hasattr(self, 'train_generator') or not hasattr(self, 'valid_generator'):
            raise ValueError("Data generators not initialized. Call train_valid_generator() first.")
        
        if self.train_generator.samples == 0:
            raise ValueError("Cannot train: train_generator has 0 samples.")
        
        if self.valid_generator.samples == 0:
            raise ValueError("Cannot train: valid_generator has 0 samples.")
        
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size
        
        # Ensure at least 1 step per epoch
        if self.steps_per_epoch == 0:
            raise ValueError(
                f"steps_per_epoch is 0. "
                f"train_generator.samples={self.train_generator.samples}, "
                f"batch_size={self.train_generator.batch_size}"
            )
        
        if self.validation_steps == 0:
            raise ValueError(
                f"validation_steps is 0. "
                f"valid_generator.samples={self.valid_generator.samples}, "
                f"batch_size={self.valid_generator.batch_size}"
            )

        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epochs,
            steps_per_epoch=self.steps_per_epoch,
            validation_steps=self.validation_steps,
            validation_data=self.valid_generator
        )

        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )

    def copy_model(self) -> None:
        """
        Copy the trained model file to the `copy_trained_model_path`
        directory defined in the configuration.
        """
        src = Path(self.config.trained_model_path)
        dst_dir = Path(self.config.copy_trained_model_path)
        dst_dir.mkdir(parents=True, exist_ok=True)
        dst = dst_dir / src.name
        shutil.copy2(src, dst)
    

In [None]:
# Diagnostic: Check what data structure exists
import os
from pathlib import Path

config = ConfigurationManager()
training_config = config.get_training_config()
data_path = training_config.training_data

print(f"Checking data directory: {data_path}")
print(f"Exists: {data_path.exists()}")

if data_path.exists():
    print(f"\nContents of {data_path}:")
    for item in sorted(data_path.iterdir()):
        if item.is_dir():
            file_count = len(list(item.glob("*")))
            print(f"  üìÅ {item.name}/ ({file_count} items)")
        else:
            print(f"  üìÑ {item.name}")
    
    # Check for common image formats
    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif', '.nii', '.nii.gz'}
    all_files = list(data_path.rglob("*"))
    image_files = [f for f in all_files if f.suffix.lower() in image_extensions or f.suffixes[-2:] == ['.nii', '.gz']]
    pickle_files = list(data_path.rglob("*.pkl*"))
    
    print(f"\nFound {len(image_files)} image files")
    print(f"Found {len(pickle_files)} pickle files")
    
    if len(pickle_files) > 0:
        print("\n‚ö†Ô∏è  Pickle files detected - this suggests segmentation data.")
        print("   You may need to use a segmentation data loader instead of ImageDataGenerator.")
    elif len(image_files) == 0:
        print("\n‚ö†Ô∏è  No image files found. Check your data ingestion process.")
else:
    print(f"\n‚ùå Directory does not exist. Run data ingestion first!")


[2026-01-20 23:07:20,552: INFO: common: yaml file: configs\config.yaml loaded successfully]
[2026-01-20 23:07:20,560: INFO: common: yaml file: configs\params.yaml loaded successfully]
[2026-01-20 23:07:20,562: INFO: common: created directory at: artifacts]
[2026-01-20 23:07:20,565: INFO: common: created directory at: artifacts\training]
Checking data directory: artifacts\data_ingestion\data
Exists: True

Contents of artifacts\data_ingestion\data:
  üìÅ GroundTruth/ (87 items)
  üìÅ Predictions/ (33 items)

Found 0 image files
Found 2958 pickle files

‚ö†Ô∏è  Pickle files detected - this suggests segmentation data.
   You may need to use a segmentation data loader instead of ImageDataGenerator.


In [36]:
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train()
    training.copy_model()
except Exception as e:
    raise e


[2026-01-20 23:07:21,163: INFO: common: yaml file: configs\config.yaml loaded successfully]
[2026-01-20 23:07:21,167: INFO: common: yaml file: configs\params.yaml loaded successfully]
[2026-01-20 23:07:21,169: INFO: common: created directory at: artifacts]
[2026-01-20 23:07:21,171: INFO: common: created directory at: artifacts\training]
Found 0 images belonging to 2 classes.
Found 0 images belonging to 2 classes.


ValueError: 
‚ùå No training images found in artifacts\data_ingestion\data

ImageDataGenerator expects a CLASSIFICATION directory structure:
  artifacts\data_ingestion\data/
    class1/
      image1.jpg
      image2.jpg
    class2/
      image1.jpg
      image2.jpg

‚ö†Ô∏è  This is a SEGMENTATION project. You have two options:

Option 1: Organize your data into class subdirectories
  - Create subdirectories for each class
  - Place images in their respective class folders

Option 2: Use a segmentation data loader
  - See artifacts/sample/train_sample.py for an example
  - Uses PKLSegmentationDataset for pickle-based segmentation data

üí° Run the diagnostic cell above to see what data structure you have.