In [1]:
import os
%pwd
os.chdir("../")
%pwd


'd:\\Data Science\\END to END Proj\\BloodCellClassification'

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    trained_model_path: Path
    tensorboard_log_dir: Path
    epochs: int
    learning_rate: float
    batch_size: int
    img_height: int
    img_width: int

In [3]:
from src.BloodCellClassifier.constant import *
from src.BloodCellClassifier.utils.common import read_yaml,create_directories 

In [19]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_model_training_config(self) -> ModelTrainingConfig:
        config = self.config.model_training
        params = self.params

        return ModelTrainingConfig(
        root_dir=Path(config.root_dir),
        trained_model_path=Path(config.trained_model_path),
        tensorboard_log_dir=Path(config.tensorboard_log_dir),
        epochs=params.EPOCHS,
        learning_rate=params.LEARNING_RATE,
        batch_size=params.BATCH_SIZE,
        img_height=params.IMG_HEIGHT,
        img_width=params.IMG_WIDTH,
    )

In [8]:
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, LearningRateScheduler
from BloodCellClassifier import logger

class ModelTrainer:
    def __init__(self, config: ModelTrainingConfig):
        self.config = config
        # Create directories if they don't exist
        os.makedirs(self.config.root_dir, exist_ok=True)
        os.makedirs(self.config.tensorboard_log_dir, exist_ok=True)

    def _model_exists(self):
        """Check if model is already saved in TF2.12 format"""
        required_files = [
            os.path.join(self.config.trained_model_path, "saved_model.pb"),
            os.path.join(self.config.trained_model_path, "variables/variables.index")
        ]
        return all(os.path.exists(f) for f in required_files)

    def build_model(self):
        """Builds the CNN model (same as original code)"""
        model = Sequential([
            Conv2D(128, (8, 8), strides=(3, 3), activation='relu', input_shape=(224, 224, 3)),
            BatchNormalization(),
            
            Conv2D(256, (5, 5), strides=(1, 1), activation='relu', padding="same"),
            BatchNormalization(),
            MaxPooling2D((3, 3)),
            
            Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding="same"),
            BatchNormalization(),
            Conv2D(256, (1, 1), strides=(1, 1), activation='relu', padding="same"),
            BatchNormalization(),
            Conv2D(256, (1, 1), strides=(1, 1), activation='relu', padding="same"),
            BatchNormalization(),
            
            Conv2D(512, (3, 3), activation='relu', padding="same"),
            BatchNormalization(),
            MaxPooling2D((2, 2)),
            
            Conv2D(512, (3, 3), activation='relu', padding="same"),
            BatchNormalization(),
            Conv2D(512, (3, 3), activation='relu', padding="same"),
            BatchNormalization(),
            MaxPooling2D((2, 2)),
            
            Conv2D(512, (3, 3), activation='relu', padding="same"),
            BatchNormalization(),
            MaxPooling2D((2, 2)),
            
            Flatten(),
            Dense(1024, activation='relu'),
            Dropout(0.5),
            Dense(1024, activation='relu'),
            Dropout(0.5),
            Dense(4, activation='softmax')
        ])

        model.compile(
            loss='categorical_crossentropy',
            optimizer=SGD(learning_rate=self.config.learning_rate),
            metrics=['accuracy']
        )
        return model

    def train(self, train_gen, val_gen):
        """Trains only if model doesn't exist"""
        if self._model_exists():
            logger.info(f"Model already exists at {self.config.trained_model_path}. Loading...")
            return load_model(self.config.trained_model_path), None
        
        model = self.build_model()
        
        callbacks = [
            TensorBoard(log_dir=self.config.tensorboard_log_dir),
            EarlyStopping(patience=3, restore_best_weights=True),
            LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)
        ]
        
        history = model.fit(
            train_gen,
            epochs=self.config.epochs,
            validation_data=val_gen,
            callbacks=callbacks,
            verbose=1
        )
        return model, history

    def save_model(self, model):
        """Saves in TF2.12 format (creates .pb + assets/ + variables/)"""
        if self._model_exists():
            logger.info("Model already exists. Skipping save.")
            return
            
        model.save(
            self.config.trained_model_path,
            save_format="tf"  # This creates the full TF2.12 structure
        )
        logger.info(f"Model saved in TF2.12 format at: {self.config.trained_model_path}")
        logger.info("Contains: saved_model.pb, assets/, variables/")

In [15]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    EOSINOPHIL_dirs: list
    LYMPHOCYTE_dir: list
    MONOCYTE_dirs: list
    NEUTROPHIL_dirs: list
    img_height: int
    img_width: int
    batch_size: int
    test_size: float
    val_size: float
    seed: int
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation
        
        return DataTransformationConfig(
            root_dir=Path(config.root_dir),
            EOSINOPHIL_dirs=[Path(x) for x in config.EOSINOPHIL_dirs],
            LYMPHOCYTE_dir=[Path(x) for x in config.LYMPHOCYTE_dir],
            MONOCYTE_dirs=[Path(x) for x in config.MONOCYTE_dirs],
            NEUTROPHIL_dirs=[Path(x) for x in config.NEUTROPHIL_dirs],
            img_height=self.params.IMG_HEIGHT,
            img_width=self.params.IMG_WIDTH,
            batch_size=self.params.BATCH_SIZE,
            test_size=self.params.TEST_SIZE,
            val_size=self.params.VAL_SIZE,
            seed=self.params.SEED
        )

In [17]:
# Data Transformation Pipeline
from BloodCellClassifier.components.Data_Transformation import DataTransformation
from BloodCellClassifier import logger

try:
    # Initialize configuration
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    
    # Initialize data transformation
    data_transformation = DataTransformation(config=data_transformation_config)
    
    # Step 1: Create dataframe from image directories
    logger.info("Creating dataframe from image directories...")
    bloodCell_df = data_transformation.create_dataframe()
    
    # Step 2: Split data into train, validation, test sets
    logger.info("Splitting data into train/val/test sets...")
    train_set, val_set, test_images = data_transformation.split_data(bloodCell_df)
    
    # Step 3: Create data generators
    logger.info("Creating data generators...")
    train_gen, val_gen, test_gen = data_transformation.get_data_generators(
        train_set, val_set, test_images
    )
    
    # Optional: Save the split datasets
    logger.info("Saving split datasets...")
    os.makedirs(data_transformation_config.root_dir, exist_ok=True)
    train_set.to_csv(os.path.join(data_transformation_config.root_dir, "train_set.csv"), index=False)
    val_set.to_csv(os.path.join(data_transformation_config.root_dir, "val_set.csv"), index=False)
    test_images.to_csv(os.path.join(data_transformation_config.root_dir, "test_images.csv"), index=False)
    
    logger.info("Data transformation completed successfully!")

except Exception as e:
    logger.exception(f"Error in data transformation pipeline: {e}")
    raise e

[2025-07-31 18:50:20,812: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-07-31 18:50:20,812: INFO: common: yaml file: params.yaml loaded successfully]
[2025-07-31 18:50:20,812: INFO: common: created directory at: artifacts]
[2025-07-31 18:50:20,830: INFO: 2649934531: Creating dataframe from image directories...]
[2025-07-31 18:50:20,898: INFO: Data_Transformation: Created dataframe with 9957 samples]
[2025-07-31 18:50:20,902: INFO: Data_Transformation: Class distribution:
labels
NEUTROPHIL    2499
EOSINOPHIL    2497
LYMPHOCYTE    2483
MONOCYTE      2478
Name: count, dtype: int64]
[2025-07-31 18:50:20,903: INFO: 2649934531: Splitting data into train/val/test sets...]
[2025-07-31 18:50:20,911: INFO: Data_Transformation: Train set size: 5575]
[2025-07-31 18:50:20,911: INFO: Data_Transformation: Validation set size: 1394]
[2025-07-31 18:50:20,912: INFO: Data_Transformation: Test set size: 2988]
[2025-07-31 18:50:20,913: INFO: 2649934531: Creating data generators...]

In [20]:
# Model Training Pipeline
try:
    config = ConfigurationManager()
    model_training_config = config.get_model_training_config()
    
    # Initialize model trainer
    model_trainer = ModelTrainer(config=model_training_config)
    
    # Train the model (using the generators from transformation)
    trained_model, history = model_trainer.train(train_gen, val_gen)
    
    # Save the model
    model_trainer.save_model(trained_model)
    
    # Evaluate on test set
    if trained_model:  # Only evaluate if we have a model
        test_loss, test_acc = trained_model.evaluate(test_gen)
        logger.info(f"Test Accuracy: {test_acc*100:.2f}%")

except Exception as e:
    logger.exception(f"Error in training pipeline: {e}")
    raise e

[2025-07-31 18:53:32,057: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-07-31 18:53:32,057: INFO: common: yaml file: params.yaml loaded successfully]
[2025-07-31 18:53:32,073: INFO: common: created directory at: artifacts]
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
[2025-08-01 10:33:26,961: INFO: builder_impl: Assets written to: artifacts\model_training\blood_cell_model\assets]
[2025-08-01 10:33:27,143: INFO: 281479759: Model saved in TF2.12 format at: artifacts\model_training\blood_cell_model]
[2025-08-01 10:33:27,143: INFO: 281479759: Contains: saved_model.pb, assets/, variables/]
[2025-08-01 10:41:08,077: INFO: 2929818896: Test Accuracy: 95.01%]
