Step 1: Import Required Libraries

In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
import pickle
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

2024-12-29 11:41:49.870730: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-29 11:41:50.185147: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-29 11:41:50.189250: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
%pwd

'/home/highfrezh/Desktop/AI-ML-ENG/DL PROJECTS/end-to-end-brain-tumor-classification-using-MRI-Dataset/research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/home/highfrezh/Desktop/AI-ML-ENG/DL PROJECTS/end-to-end-brain-tumor-classification-using-MRI-Dataset'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    split_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_data: Path
    testing_data: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list
    labels: list
    classes: int

In [6]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories
import tensorflow as tf

In [7]:
class ConfigurationManager:
    def __init__(self, 
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH):
        """
        Handles reading configuration and parameter files.
        """
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config["artifacts_root"]])

    def get_training_config(self):
        """
        Fetch training configuration and return as a structured object.
        """
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params

        training_data_path = os.path.join(self.config.data_ingestion.unzip_dir, "Training")
        testing_data_path = os.path.join(self.config.data_ingestion.unzip_dir, "Testing")

        create_directories([Path(training.root_dir)])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            split_dir=Path(training.split_dir),
            trained_model_path=Path(training.trained_model_path),
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
            training_data=Path(training_data_path),
            testing_data=Path(testing_data_path),
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_is_augmentation=params.AUGMENTATION,
            params_image_size=params.IMAGE_SIZE,
            labels=params.LABELS,
            classes=params.CLASSES,
        )
        
        return training_config


In [8]:
class DataPreprocessor:
    def __init__(self, training_config):
        """
        Handles data loading and preprocessing based on the training configuration.
        """
        self.training_config = training_config


    def load_data(self):
        """
        Load and preprocess the training and testing data.
        """
        X, Y = [], []

        # Load training data
        for label in self.training_config.labels:
            folder_path = os.path.join(self.training_config.training_data, label)
            for img_name in os.listdir(folder_path):
                img = cv2.imread(os.path.join(folder_path, img_name))
                img = cv2.resize(img, (224, 224))
                X.append(img)
                Y.append(self.training_config.labels.index(label))  # Map label to numeric value

        # Load testing data
        for label in self.training_config.labels:
            folder_path = os.path.join(self.training_config.testing_data, label)
            for img_name in os.listdir(folder_path):
                img = cv2.imread(os.path.join(folder_path, img_name))
                img = cv2.resize(img, (224, 224))
                X.append(img)
                Y.append(self.training_config.labels.index(label))  # Map label to numeric value

        # Convert to NumPy arrays and preprocess
        X = np.array(X)
        Y = np.array(Y)        

        X = preprocess_input(X)
        # Debug shapes
        print(f"X shape: {X.shape}")
        print(f"Y shape: {Y.shape}")

        
        return X, Y


    def prepare_data(self, X, Y, test_size=0.2):
        """
        Split the data into training and validation sets and one-hot encode the labels.
        """
        # Split data
        X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=test_size, random_state=42)

        # One-hot encode the labels
        num_classes = len(self.training_config.labels)
        Y_train = to_categorical(Y_train, num_classes=num_classes)
        Y_val = to_categorical(Y_val, num_classes=num_classes)

        # Debug shapes
        print(f"X_train shape: {X_train.shape}")
        print(f"X_val shape: {X_val.shape}")
        print(f"Y_train shape: {Y_train.shape}")
        print(f"Y_val shape: {Y_val.shape}")

        # Create the save directory if it doesn't exist
        os.makedirs(self.training_config.split_dir, exist_ok=True)

        # Save the datasets using pickle
        with open(os.path.join(self.training_config.split_dir, "train_data.pkl"), "wb") as f:
            pickle.dump((X_train, Y_train), f)
        with open(os.path.join(self.training_config.split_dir, "val_data.pkl"), "wb") as f:
            pickle.dump((X_val, Y_val), f)

        return X_train, Y_train, X_val, Y_val, num_classes


In [9]:
class Training:
    def __init__(self, training_config: TrainingConfig, X_train, Y_train, X_val, Y_val,):
        """
        Initialize the training process with configuration and preprocessed data.
        """
        self.config = training_config
        self.X_train = X_train
        self.Y_train = Y_train
        self.X_val = X_val
        self.Y_val = Y_val
        self.model = None  # Placeholder for the loaded model

    def load_base_model(self):
        """
        Load the pre-saved base model from the specified path.
        """
        print(f"Loading base model from: {self.config.updated_base_model_path}")
        self.model = tf.keras.models.load_model(self.config.updated_base_model_path)
        print("Base model loaded successfully.")

    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        """
        Save the trained model to the specified path.
        """
        model.save(path)
        print(f"Model saved successfully at: {path}")

    def train(self):
        """
        Train the loaded model using preprocessed datasets.
        """
        if self.model is None:
            raise ValueError("Model not loaded. Call 'load_base_model()' before training.")

        # Train the model
        print("Starting training...")
        history = self.model.fit(
            self.X_train, 
            self.Y_train,
            validation_data=(self.X_val, self.Y_val),
            epochs=self.config.params_epochs,
            batch_size=self.config.params_batch_size,
        )
        print("Training completed successfully.")

        # Save the trained model
        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )

        return history


In [None]:
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    preprocess_data = DataPreprocessor(training_config)
    X, Y = preprocess_data.load_data()
    X_train, Y_train, X_val, Y_val,num_classes = preprocess_data.prepare_data(X, Y)    
    training = Training(training_config,X_train, Y_train, X_val, Y_val,)
    training.load_base_model()
    training.train()
    
except Exception as e:
    raise e

[2024-12-29 11:41:57,025: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-12-29 11:41:57,032: INFO: common: yaml file: params.yaml loaded successfully]
[2024-12-29 11:41:57,034: INFO: common: created directory at: artifacts]
[2024-12-29 11:41:57,035: INFO: common: created directory at: artifacts/training]
X shape: (7023, 224, 224, 3)
Y shape: (7023,)
X_train shape: (5618, 224, 224, 3)
X_val shape: (1405, 224, 224, 3)
Y_train shape: (5618, 4)
Y_val shape: (1405, 4)
Loading base model from: artifacts/prepare_base_model/base_model_updated.h5


2024-12-29 11:43:04.001612: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 25690112 exceeds 10% of free system memory.
2024-12-29 11:43:04.082865: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 25690112 exceeds 10% of free system memory.
2024-12-29 11:43:04.147195: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 25690112 exceeds 10% of free system memory.
