In [1]:
import os

In [2]:
%pwd

'c:\\Users\\Chaitanya\\Documents\\ML\\Indian-Medical-Leaf-Clf\\Medicinal-Leaf-Classification\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\Chaitanya\\Documents\\ML\\Indian-Medical-Leaf-Clf\\Medicinal-Leaf-Classification'

### entity (config_entity.py)

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen = True)
class TrainingConfig:
    root_dir: Path  
    trained_model_path: Path
    updated_base_model_path: Path
    training_data: Path
    params_epochs: int
    params_batch_size: int
    params_image_size: list
    params_is_augmentation: bool
    params_seed: int


@dataclass(frozen = True)
class PrepareCallbacksConfig:
    root_dir: Path
    tensorboard_root_log_dir: Path
    checkpoint_model_filepath: Path 

### Congiguration (configuration.py)

In [6]:
from MedicineLeafClassifier.constants import *
from MedicineLeafClassifier.utils.common import read_yaml, create_directories
import tensorflow as tf

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH  ):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
        
    
    def get_prepare_callbacks_config(self) -> PrepareCallbacksConfig:

        config = self.config.prepare_callbacks
            
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)

        create_directories([
            Path(model_ckpt_dir),
            Path(config.tensorboard_root_log_dir)
        ])

        prepare_callbacks_config = PrepareCallbacksConfig(
            root_dir=Path(config.root_dir),
            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)
        )

        return prepare_callbacks_config
    



    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        training_data = os.path.join(self.config.data_ingestion.unzip_dir, "indian-medicinal-leaf-image-dataset","Medicinal Leaf dataset")
        create_directories([
            Path(training.root_dir)
        ])

        trainig_config = TrainingConfig(
            root_dir = Path(training.root_dir),
            trained_model_path = Path(training.trained_model_path),
            updated_base_model_path = Path(prepare_base_model.updated_base_model_path),
            training_data = Path(training_data),
            params_epochs = params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_image_size=params.IMAGE_SIZE,
            params_is_augmentation=params.AUGMENTATION,
            params_seed = params.SEED
        )

        return trainig_config

### Components (training.py)

In [8]:

import os 
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time

In [9]:
class PrepareCallback:
    def __init__(self,config:PrepareCallbacksConfig):
        self.config = config

    
    @property
    def create_tb_callback(self):
        timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        tb_running_log_dir = os.path.join(
            self.config.tensorboard_root_log_dir,
            f"tb_logs_at_{timestamp}",
        )
        return tf.keras.callbacks.TensorBoard(log_dir = tb_running_log_dir)
    @property
    def create_ckpt_callbacks(self):
        return tf.keras.callbacks.ModelCheckpoint(
            filepath=os.path.join(self.config.checkpoint_model_filepath,"model_{epoch}.h5"),  # Replace with your desired path
            save_best_only=True,  # Set to True to save only the best model based on a metric
            monitor='val_accuracy',  # Monitor validation accuracy during training
            save_weights_only=False,  # Set to True to save only model weights
            verbose=1  # Set to 0 for silent operation
        )
    @property
    def create_early_stopping(self):
        return tf.keras.callbacks.EarlyStopping(
            monitor='val_accuracy',  # Monitor validation accuracy
            min_delta=0.01,  # Minimum required change in the monitored metric
            patience=50,  # Number of epochs with no improvement to wait before stopping
            baseline = 0.5,
            restore_best_weights=True  # Restore the weights of the best model before stopping
        )
    @property
    def get_tb_ckpt_callbacks(self):
        return [
            self.create_tb_callback,
            self.create_ckpt_callbacks,
            self.create_early_stopping
        ]

In [10]:
class Training:
    def __init__(self,config: TrainingConfig):
        self.config = config
    
    def get_base_model(self):
        self.model = tf.keras.models.load_model(
            self.config.updated_base_model_path
        )
    
    def train_valid_split(self):
        self.train_data = tf.keras.preprocessing.image_dataset_from_directory(
             self.config.training_data,
             labels = 'inferred',
             label_mode = 'int',
             color_mode = 'rgb',
            class_names = None,
             batch_size = self.config.params_batch_size,
             image_size= (self.config.params_image_size[0], self.config.params_image_size[1]),
             shuffle=True,
             seed=self.config.params_seed,
             validation_split=0.1,
             subset='training',
             )
        
        self.valid_data = tf.keras.preprocessing.image_dataset_from_directory(
             self.config.training_data,
             labels = 'inferred',
             label_mode = 'int',
             color_mode = 'rgb',
            class_names = None,
             batch_size = self.config.params_batch_size,
             image_size= (self.config.params_image_size[0], self.config.params_image_size[1]),
             shuffle=True,
             seed=self.config.params_seed,
             validation_split=0.1,
             subset='validation',
             )
        
        AUTOTUNE = tf.data.AUTOTUNE

        self.train_data = self.train_data.cache().prefetch(buffer_size = AUTOTUNE)

        self.valid_data = self.valid_data.cache().prefetch(buffer_size = AUTOTUNE)

        # return train_data, valid_data
    
    @staticmethod
    def save_model(path: Path, model = tf.keras.Model):
        """ Saves the model after last epoch
          irrespective of best accuracy or not """
        model.save(path)


    def train(self, callbacks_list: list):
        self.model.fit(
            self.train_data,
            epochs = self.config.params_epochs,
            
            validation_data = self.valid_data,
            
            callbacks = callbacks_list
        )

        self.save_model(
            path = self.config.trained_model_path,
             model = self.model )

In [11]:
device = tf.config.list_physical_devices("GPU")
if device:
    try:
        tf.config.experimental.set_memory_growth(device[0], True)
    except RuntimeError as e:
        print(e)

stratergy = tf.distribute.experimental.CentralStorageStrategy()
with stratergy.scope():

    try:
        config = ConfigurationManager()
        prepare_callbacks_config = config.get_prepare_callbacks_config()  
        prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
        callbacks_list = prepare_callbacks.get_tb_ckpt_callbacks

        training_config = config.get_training_config()
        training = Training(config=training_config)
        training.get_base_model()
        training.train_valid_split()
        training.train(callbacks_list=callbacks_list)

    except Exception as e:
        raise e

[2024-03-03 08:37:29,935: INFO: parameter_server_strategy: ParameterServerStrategy (CentralStorageStrategy if you are using a single machine) with compute_devices = ['/job:localhost/replica:0/task:0/device:GPU:0'], variable_device = '/job:localhost/replica:0/task:0/device:GPU:0']
[2024-03-03 08:37:29,962: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-03-03 08:37:29,980: INFO: common: yaml file: params.yaml loaded successfully]
[2024-03-03 08:37:29,986: INFO: common: Created directory at artifacts]
[2024-03-03 08:37:29,988: INFO: common: Created directory at artifacts\prepare_callbacks\checkpoint_dir]
[2024-03-03 08:37:29,990: INFO: common: Created directory at artifacts\prepare_callbacks\tensorboard_log_dir]
[2024-03-03 08:37:29,994: INFO: common: Created directory at artifacts\training]
Found 6900 files belonging to 80 classes.
Using 6210 files for training.
Found 6900 files belonging to 80 classes.
Using 690 files for validation.
Epoch 1/50


ResourceExhaustedError: Graph execution error:

Detected at node 'sequential/inception_resnet_v2/batch_normalization_28/FusedBatchNormV3' defined at (most recent call last):
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\threading.py", line 966, in _bootstrap
      self._bootstrap_inner()
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\threading.py", line 1009, in _bootstrap_inner
      self.run()
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\engine\training.py", line 993, in train_step
      y_pred = self(x, training=True)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\engine\sequential.py", line 410, in call
      return super().call(inputs, training=training, mask=mask)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\layers\normalization\batch_normalization.py", line 850, in call
      outputs = self._fused_batch_norm(inputs, training=training)
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\layers\normalization\batch_normalization.py", line 660, in _fused_batch_norm
      output, mean, variance = control_flow_util.smart_cond(
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\utils\control_flow_util.py", line 108, in smart_cond
      return tf.__internal__.smart_cond.smart_cond(
    File "c:\Users\Chaitanya\Anaconda3\envs\IMLD_clf\lib\site-packages\keras\layers\normalization\batch_normalization.py", line 634, in _fused_batch_norm_training
      return tf.compat.v1.nn.fused_batch_norm(
Node: 'sequential/inception_resnet_v2/batch_normalization_28/FusedBatchNormV3'
OOM when allocating tensor with shape[32,48,25,25] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node sequential/inception_resnet_v2/batch_normalization_28/FusedBatchNormV3}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_41972]