In [5]:
import os

In [29]:
%pwd

'd:\\Code\\Git\\myprojects\\Equation-Solver-End-to-End\\Handwritten_Equation_Solver'

In [2]:
os.chdir("../")

In [21]:
%pwd

'd:\\Code\\Git\\myprojects\\Equation-Solver-End-to-End\\Handwritten_Equation_Solver'

In [67]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class PrepareBaseModelConfig:
    root_dir: Path
    custom_model_URL: str
    base_model_path: Path
    updated_base_model_path: Path
    params_image_size: list
    params_learning_rate: float
    params_include_top: bool
    params_weights: str
    params_classes: int
    params_keep_dense: bool
    params_freeze_all: bool
    params_freeze_till: int

In [68]:
from equation_solver.constants import *
from equation_solver.utils.common import read_yaml, create_directories

In [69]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    

    def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
        config = self.config.prepare_base_model
        
        create_directories([config.root_dir])

        prepare_base_model_config = PrepareBaseModelConfig(
            root_dir=Path(config.root_dir),
            custom_model_URL=str(config.custom_model_URL),
            base_model_path=Path(config.base_model_path),
            updated_base_model_path=Path(config.updated_base_model_path),
            params_image_size=self.params.IMAGE_SIZE,
            params_learning_rate=self.params.LEARNING_RATE,
            params_include_top=self.params.INCLUDE_TOP,
            params_weights=self.params.WEIGHTS,
            params_classes=self.params.CLASSES,
            params_keep_dense=self.params.KEEP_DENSE,
            params_freeze_all=self.params.FREEZE_ALL,
            params_freeze_till=self.params.FREEZE_TILL,
        )

        return prepare_base_model_config

In [70]:
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import gdown
from equation_solver import logger

In [84]:
class PrepareBaseModel:
    def __init__(self, config: PrepareBaseModelConfig):
        self.config = config

    def get_base_model(self):
        '''
        Fetch data from the url
        '''
        try: 
            model_url = self.config.custom_model_URL
            model_download_dir = self.config.base_model_path
            os.makedirs("artifacts/data_ingestion", exist_ok=True)
            logger.info(f"Downloading model from {model_url} into file {model_download_dir}")

            file_id = model_url.split("/")[-2]
            prefix = 'https://drive.google.com/uc?/export=download&id='
            gdown.download(prefix+file_id,str(model_download_dir))

            logger.info(f"Downloaded model from {model_url} into file {model_download_dir}")

        except Exception as e:
            raise e
        
        # Load the downloaded model
        self.model = tf.keras.models.load_model(model_download_dir)

    @staticmethod
    def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate, keep_dense):
        # Function to check if a layer is a convolutional layer
        def is_conv_layer(layer):
            return isinstance(layer, (
                tf.keras.layers.Conv2D,
                tf.keras.layers.Conv1D,
                tf.keras.layers.Conv3D,
                tf.keras.layers.DepthwiseConv2D,
                tf.keras.layers.SeparableConv2D
            ))
        
        # Function to check if a layer is a dense layer
        def is_dense_layer(layer):
            return isinstance(layer, tf.keras.layers.Dense)

        if freeze_all:
            # Freeze only convolutional layers
            for layer in model.layers:
                if is_conv_layer(layer):
                    layer.trainable = False
                elif is_dense_layer(layer):
                    layer.trainable = True
                # Other layers (like BatchNorm) associated with conv layers should also be frozen
                elif not is_dense_layer(layer) and not isinstance(layer, (tf.keras.layers.Dropout, tf.keras.layers.Flatten)):
                    layer.trainable = False
        elif (freeze_till is not None) and (freeze_till > 0):
            # Find the last convolutional layer
            last_conv_index = 0
            for i, layer in enumerate(model.layers):
                if is_conv_layer(layer):
                    last_conv_index = i
            
            # Freeze layers up to the last convolutional layer
            for i, layer in enumerate(model.layers):
                if i <= last_conv_index:
                    layer.trainable = False
                else:
                    layer.trainable = True

        if not keep_dense:
            # Get the output of the last convolutional layer
            last_conv_layer = None
            for layer in model.layers:
                if is_conv_layer(layer):
                    last_conv_layer = layer
            
            if last_conv_layer is None:
                raise ValueError("No convolutional layers found in the model")
            
            x = last_conv_layer.output

            # Flatten the output
            x = tf.keras.layers.Flatten()(x)

            # Add Dense layers similar to your original model
            x = tf.keras.layers.Dense(
                768, 
                activation='relu',
                kernel_initializer='glorot_uniform',
                bias_initializer='zeros',
                kernel_regularizer=tf.keras.regularizers.l2(0.01),
                trainable=True  # Explicitly set dense layers as trainable
            )(x)
            x = tf.keras.layers.Dropout(0.25)(x)
            x = tf.keras.layers.Dense(
                128,
                activation='relu',
                kernel_initializer='glorot_uniform',
                bias_initializer='zeros',
                kernel_regularizer=tf.keras.regularizers.l2(0.01),
                trainable=True
            )(x)
            x = tf.keras.layers.Dropout(0.25)(x)

            # Final output layer
            prediction = tf.keras.layers.Dense(
                classes,
                activation='softmax',
                kernel_initializer='glorot_uniform',
                bias_initializer='zeros',
                trainable=True
            )(x)

            full_model = tf.keras.models.Model(inputs=model.input, outputs=prediction)
        else:
            full_model = model

        full_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            metrics=['accuracy']
        )

        # Print trainable status of layers
        logger.info("Layer trainable status:")
        # print("\nLayer trainable status:")
        for layer in full_model.layers:
            # print(f"{layer.name}: {layer.trainable}")
            logger.info(f"{layer.name}: {layer.trainable}")


        full_model.summary()
        return full_model

    def update_base_model(self):
        self.full_model = self._prepare_full_model(
            model=self.model,
            classes=self.config.params_classes,
            freeze_all=self.config.params_freeze_all,
            freeze_till=self.config.params_freeze_till,
            learning_rate=self.config.params_learning_rate,
            keep_dense=self.config.params_keep_dense,
        )

        self.save_model(path=self.config.updated_base_model_path, model=self.full_model)

    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)

In [85]:
try:
    config = ConfigurationManager()
    prepare_base_model_config = config.get_prepare_base_model_config()
    prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
    prepare_base_model.get_base_model()
    prepare_base_model.update_base_model()
except Exception as e:
    raise e

[2024-10-21 19:10:30,010: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-10-21 19:10:30,013: INFO: common: yaml file: params.yaml loaded successfully]
[2024-10-21 19:10:30,014: INFO: common: created directory at: artifacts]
[2024-10-21 19:10:30,015: INFO: common: created directory at: artifacts/prepare_base_model]
[2024-10-21 19:10:30,016: INFO: 3512771067: Downloading model from https://drive.google.com/file/d/1cjGBSfGIQDtlhq5-0H5DuQZS346zqiZZ/view?usp=sharing into file artifacts\prepare_base_model\base_model.h5]


Downloading...
From: https://drive.google.com/uc?/export=download&id=1cjGBSfGIQDtlhq5-0H5DuQZS346zqiZZ
To: d:\Code\Git\myprojects\Equation-Solver-End-to-End\Handwritten_Equation_Solver\artifacts\prepare_base_model\base_model.h5
100%|██████████| 44.8M/44.8M [00:05<00:00, 8.09MB/s]

[2024-10-21 19:10:40,234: INFO: 3512771067: Downloaded model from https://drive.google.com/file/d/1cjGBSfGIQDtlhq5-0H5DuQZS346zqiZZ/view?usp=sharing into file artifacts\prepare_base_model\base_model.h5]





[2024-10-21 19:10:40,495: INFO: 3512771067: 
Layer trainable status:]
[2024-10-21 19:10:40,496: INFO: 3512771067: rescaling: False]
[2024-10-21 19:10:40,497: INFO: 3512771067: conv2d: False]
[2024-10-21 19:10:40,498: INFO: 3512771067: conv2d_1: False]
[2024-10-21 19:10:40,499: INFO: 3512771067: conv2d_2: False]
[2024-10-21 19:10:40,500: INFO: 3512771067: conv2d_3: False]
[2024-10-21 19:10:40,501: INFO: 3512771067: conv2d_4: False]
[2024-10-21 19:10:40,501: INFO: 3512771067: flatten: True]
[2024-10-21 19:10:40,502: INFO: 3512771067: dense: True]
[2024-10-21 19:10:40,503: INFO: 3512771067: dropout: True]
[2024-10-21 19:10:40,504: INFO: 3512771067: dense_1: True]
[2024-10-21 19:10:40,505: INFO: 3512771067: dropout_1: True]
[2024-10-21 19:10:40,506: INFO: 3512771067: dense_2: True]
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 45, 45, 1)         0   