In [1]:
import os

In [2]:
%pwd

'd:\\Dev\\Upwork\\GSL\\GSL-Project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\Dev\\Upwork\\GSL\\GSL-Project'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class PrepareBaseModelConfig:
    root_dir: Path
    cnn_model_path: Path
    transformer_model_path: Path
    tokenizer_path: Path
    updated_model_path: Path
    params_image_size: list
    params_learning_rate: float
    params_include_top: bool
    params_weights: str
    params_pooling: str


In [6]:
from gslTranslater.constants import *
from gslTranslater.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath=CONFIG_FILE_PATH, 
        params_filepath=PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        create_directories([self.config.artifacts_root])

    def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
        config = self.config.prepare_base_model
        create_directories([config.root_dir])
        prepare_base_model_config = PrepareBaseModelConfig(
            root_dir=Path(config.root_dir),
            cnn_model_path=Path(config.cnn_model_path),
            transformer_model_path=Path(config.transformer_model_path),
            tokenizer_path=Path(config.tokenizer_path),
            updated_model_path=Path(config.updated_model_path),
            params_image_size=self.params.IMAGE_SIZE,
            params_learning_rate=self.params.LEARNING_RATE,
            params_include_top=self.params.INCLUDE_TOP,
            params_weights=self.params.WEIGHTS,
            params_pooling=self.params.POOLING
        )
        return prepare_base_model_config

In [8]:
import tensorflow as tf
from transformers import BertTokenizer, TFBertModel
import time

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
class PrepareBaseModel:
    def __init__(self, config: PrepareBaseModelConfig):
        self.config = config

    def get_cnn_model(self):
        self.cnn_model = tf.keras.applications.ResNet50(
            input_shape=self.config.params_image_size,
            weights=self.config.params_weights,
            include_top=self.config.params_include_top,
            pooling=self.config.params_pooling
        )
        self.save_model(path=self.config.cnn_model_path, model=self.cnn_model)

    def get_transformer_model(self, retries=5, delay=10):
        for attempt in range(retries):
            try:
                self.tokenizer = BertTokenizer.from_pretrained('nlpaueb/bert-base-greek-uncased-v1')
                self.transformer_model = TFBertModel.from_pretrained('nlpaueb/bert-base-greek-uncased-v1')
                self.transformer_model.save_pretrained(self.config.transformer_model_path)
                self.tokenizer.save_pretrained(self.config.tokenizer_path)
                return
            except Exception as e:
                if attempt < retries - 1:
                    time.sleep(delay)
                else:
                    raise e

    def _prepare_full_model(self, cnn_model, transformer_model, learning_rate, freeze_all, freeze_till):
        if freeze_all:
            for layer in cnn_model.layers:
                layer.trainable = False
        elif (freeze_till is not None) and (freeze_till > 0):
            for layer in cnn_model.layers[:-freeze_till]:
                layer.trainable = False

        cnn_output = cnn_model.output
        flatten_cnn = tf.keras.layers.Flatten()(cnn_output)
        
        input_ids = tf.keras.layers.Input(shape=(512,), dtype=tf.int32, name="input_ids")
        attention_mask = tf.keras.layers.Input(shape=(512,), dtype=tf.int32, name="attention_mask")

        bert_output = transformer_model(input_ids, attention_mask=attention_mask)[0]
        flatten_bert = tf.keras.layers.Flatten()(bert_output)
        
        concat_output = tf.keras.layers.Concatenate()([flatten_cnn, flatten_bert])
        
        prediction = tf.keras.layers.Dense(
            units=cnn_model.output_shape[-1],
            activation="softmax"
        )(concat_output)

        full_model = tf.keras.models.Model(
            inputs=[cnn_model.input, input_ids, attention_mask],
            outputs=prediction
        )

        full_model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
            loss=tf.keras.losses.CategoricalCrossentropy(),
            metrics=["accuracy"]
        )

        full_model.summary()
        return full_model

    def update_base_model(self):
        self.full_model = self._prepare_full_model(
            cnn_model=self.cnn_model,
            transformer_model=self.transformer_model,
            learning_rate=self.config.params_learning_rate,
            freeze_all=True,
            freeze_till=None
        )
        self.save_model(path=self.config.updated_model_path, model=self.full_model)

    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)

In [10]:
try:
    config = ConfigurationManager()
    prepare_base_model_config = config.get_prepare_base_model_config()
    prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
    prepare_base_model.get_cnn_model()
    prepare_base_model.get_transformer_model()
    prepare_base_model.update_base_model()
except Exception as e:
    raise e

[2024-08-06 21:11:23,560: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-08-06 21:11:23,560: INFO: common: yaml file: params.yaml loaded successfully]
[2024-08-06 21:11:23,560: INFO: common: created directory at: artifacts]
[2024-08-06 21:11:23,560: INFO: common: created directory at: artifacts/prepare_base_model]


Error while downloading from https://cdn-lfs.huggingface.co/nlpaueb/bert-base-greek-uncased-v1/74622e76100bd51565a9c1c7821c2a6279b591999e54cf1b5a945c4b72cefdf7?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27tf_model.h5%3B+filename%3D%22tf_model.h5%22%3B&Expires=1723227088&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMzIyNzA4OH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9ubHBhdWViL2JlcnQtYmFzZS1ncmVlay11bmNhc2VkLXYxLzc0NjIyZTc2MTAwYmQ1MTU2NWE5YzFjNzgyMWMyYTYyNzliNTkxOTk5ZTU0Y2YxYjVhOTQ1YzRiNzJjZWZkZjc%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=fvAk-z6xPI2GAL0Ta6XdMrJJOs9J1MR33t-uS1TjEGiK6%7EyOAEbzExLogMc047sDONTDs4E2nz2fq-TSBSkc%7E%7E06cZtYOASBNk38Bqn6WX-U%7EHbkiFPbWJfp949qs4qT2%7E4vkTCmlAGADthIGzNH%7EY0lZ%7EbwmLimEXS2K6OEmtmv4LUDId-fcshbsrk9EuhrkZ%7ELHvJd75AL-BVi7rpep9Ll7y6gJltoAZI1JKAMJdLTFULWV0zSRDOnjgfLZOage%7EMrr7rXst8hezCqrx2UjrKqEa6MrOi5vzzGPl%7EmbD5wW3gW7luItu33VOegTIdMvREDDixw7-VDBG3j

Trying to resume download...]


Error while downloading from https://cdn-lfs.huggingface.co/nlpaueb/bert-base-greek-uncased-v1/74622e76100bd51565a9c1c7821c2a6279b591999e54cf1b5a945c4b72cefdf7?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27tf_model.h5%3B+filename%3D%22tf_model.h5%22%3B&Expires=1723228472&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMzIyODQ3Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9ubHBhdWViL2JlcnQtYmFzZS1ncmVlay11bmNhc2VkLXYxLzc0NjIyZTc2MTAwYmQ1MTU2NWE5YzFjNzgyMWMyYTYyNzliNTkxOTk5ZTU0Y2YxYjVhOTQ1YzRiNzJjZWZkZjc%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=CPkc2RRCsZc5ZxVjqxyRCjMk4-M%7ExbjHd379mMxi-PweqYFViZ9VpzkM17iU659hPRg%7EWuEReRA8cJf5TksU7d-o4FL7HUElYjdYvPvrcWaq7Tn8Gwm-R5pCNIMWhqoIW0yCcZLl4HaGvUFFTACJ2vfeS05b0zGmlXAeVj%7EBgDbsQAuhRSQukvtEmtbWtVqQTXcuweNXpehmRBqU3UfSiY2xPdCV12uls9Fb3st2XIrE6j04LgZEgsen2OWlfsoIr3Sd44m5%7ERSAcwLWZJbSbAwwW%7E2iNOn-QcFCCE--jD4DR7c1fohj5%7EPM7e3lufnt5aObzbmEKhVTkdCVY0T5cg__

Trying to resume download...]


Some layers from the model checkpoint at nlpaueb/bert-base-greek-uncased-v1 were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at nlpaueb/bert-base-greek-uncased-v1.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                              