In [1]:
import os
%pwd
os.chdir("../")
%pwd


'd:\\Data Science\\END to END Proj\\BloodGroupClassifierBasedonFingerPrint'

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataPreprocessingConfig:
    data_path: Path
    img_height: int
    img_width: int
    batch_size: int
    validation_split: float

In [3]:
from src.BloodGroupClassifier.constant import *
from src.BloodGroupClassifier.utils.common import read_yaml,create_directories 

In [4]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_preprocessing_config(self) -> DataPreprocessingConfig:
        params = self.params
    
        return DataPreprocessingConfig(
        data_path=Path("artifacts/data_ingestion/"),  # Hardcoded to match original
        img_height=params.IMG_HEIGHT,
        img_width=params.IMG_WIDTH,
        batch_size=params.BATCH_SIZE,
        validation_split=params.VALIDATION_SPLIT
        )

In [5]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from BloodGroupClassifier import logger

class DataPreprocessing:
    def __init__(self, config: DataPreprocessingConfig):
        self.config = config

    def get_data_generators(self):
        try:
            # IDENTICAL to original preprocessing
            train_datagen = ImageDataGenerator(
                rescale=1./255,
                validation_split=self.config.validation_split
            )

            train_generator = train_datagen.flow_from_directory(
                str(self.config.data_path),  # Convert Path to string
                target_size=(self.config.img_height, self.config.img_width),
                batch_size=self.config.batch_size,
                class_mode='categorical',
                subset='training'
            )

            validation_generator = train_datagen.flow_from_directory(
                str(self.config.data_path),
                target_size=(self.config.img_height, self.config.img_width),
                batch_size=self.config.batch_size,
                class_mode='categorical',
                subset='validation'
            )

            logger.info("Created generators with:")
            logger.info(f"  - Image size: {self.config.img_height}x{self.config.img_width}")
            logger.info(f"  - Batch size: {self.config.batch_size}")
            logger.info(f"  - Validation split: {self.config.validation_split}")

            return train_generator, validation_generator

        except Exception as e:
            logger.error(f"Error in preprocessing: {e}")
            raise e

In [6]:
# After data ingestion

try:
    config = ConfigurationManager()

    data_preprocessing_config = config.get_data_preprocessing_config()
    data_preprocessing = DataPreprocessing(config=data_preprocessing_config)
    train_generator, validation_generator = data_preprocessing.get_data_generators()
except Exception as e:
    raise e


[2025-07-29 21:56:43,590: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-07-29 21:56:43,626: INFO: common: yaml file: params.yaml loaded successfully]
[2025-07-29 21:56:43,629: INFO: common: created directory at: artifacts]
Found 4803 images belonging to 8 classes.
Found 1197 images belonging to 8 classes.
[2025-07-29 21:56:44,135: INFO: 3991443896: Created generators with:]
[2025-07-29 21:56:44,135: INFO: 3991443896:   - Image size: 64x64]
[2025-07-29 21:56:44,135: INFO: 3991443896:   - Batch size: 32]
[2025-07-29 21:56:44,135: INFO: 3991443896:   - Validation split: 0.2]
