In [1]:
import os
from dataclasses import dataclass
from pathlib import Path
import torch
from torch import nn
from torchvision import models
from transformers import BertTokenizer, BertModel
from gslTranslater.constants import *
from gslTranslater.utils.common import read_yaml, create_directories

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\Dev\\Upwork\\GSL\\GSL-Project'

In [5]:
@dataclass(frozen=True)
class PrepareBaseModelConfig:
    root_dir: Path
    cnn_model_path: Path
    transformer_model_path: Path
    tokenizer_path: Path
    updated_model_path: Path

In [7]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath=CONFIG_FILE_PATH, 
        params_filepath=PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        create_directories([self.config.artifacts_root])

    def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
        config = self.config.prepare_base_model
        create_directories([config.root_dir])
        prepare_base_model_config = PrepareBaseModelConfig(
            root_dir=Path(config.root_dir),
            cnn_model_path=Path(config.cnn_model_path),
            transformer_model_path=Path(config.transformer_model_path),
            tokenizer_path=Path(config.tokenizer_path),
            updated_model_path=Path(config.updated_model_path)
        )
        return prepare_base_model_config

In [9]:
class PrepareBaseModel:
    def __init__(self, config: PrepareBaseModelConfig):
        self.config = config

    def get_cnn_model(self):
        self.cnn_model = models.resnet50(pretrained=True)
        self.cnn_model = nn.Sequential(*list(self.cnn_model.children())[:-1])
        torch.save(self.cnn_model.state_dict(), self.config.cnn_model_path)

    def get_transformer_model(self):
        self.tokenizer = BertTokenizer.from_pretrained('nlpaueb/bert-base-greek-uncased-v1')
        self.transformer_model = BertModel.from_pretrained('nlpaueb/bert-base-greek-uncased-v1')
        torch.save(self.transformer_model.state_dict(), self.config.transformer_model_path)
        self.tokenizer.save_pretrained(self.config.tokenizer_path)

    def update_base_model(self):
        # Combine CNN and Transformer models
        self.cnn_model.load_state_dict(torch.load(self.config.cnn_model_path))
        self.transformer_model.load_state_dict(torch.load(self.config.transformer_model_path))

        # Define the full model combining both models
        class SignLanguageTranslator(nn.Module):
            def __init__(self, cnn_model, transformer_model):
                super(SignLanguageTranslator, self).__init__()
                self.cnn_model = cnn_model
                self.fc = nn.Linear(2048, 512)
                self.transformer_model = transformer_model
                self.classifier = nn.Linear(512 + transformer_model.config.hidden_size, len(self.tokenizer))

            def forward(self, features, input_ids, attention_mask):
                features = self.cnn_model(features)
                features = features.view(features.size(0), -1)
                features = torch.relu(self.fc(features))

                bert_outputs = self.transformer_model(input_ids=input_ids, attention_mask=attention_mask)
                bert_cls = bert_outputs.last_hidden_state[:, 0, :]

                combined = torch.cat((features, bert_cls), dim=1)
                outputs = self.classifier(combined)
                return outputs

        self.full_model = SignLanguageTranslator(self.cnn_model, self.transformer_model)
        self.print_model_summary(self.full_model)
        self.save_model(self.full_model, self.config.updated_model_path)
    
    @staticmethod
    def save_model(model: nn.Module, path: Path):
        torch.save(model.state_dict(), path)
        
    @staticmethod
    def print_model_summary(model: nn.Module):
        print(model)


In [10]:
try:
    config = ConfigurationManager()
    prepare_base_model_config = config.get_prepare_base_model_config()
    prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
    prepare_base_model.get_cnn_model()
    prepare_base_model.get_transformer_model()
    prepare_base_model.update_base_model()
except Exception as e:
    raise e

[2024-08-06 21:11:23,560: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-08-06 21:11:23,560: INFO: common: yaml file: params.yaml loaded successfully]
[2024-08-06 21:11:23,560: INFO: common: created directory at: artifacts]
[2024-08-06 21:11:23,560: INFO: common: created directory at: artifacts/prepare_base_model]


Error while downloading from https://cdn-lfs.huggingface.co/nlpaueb/bert-base-greek-uncased-v1/74622e76100bd51565a9c1c7821c2a6279b591999e54cf1b5a945c4b72cefdf7?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27tf_model.h5%3B+filename%3D%22tf_model.h5%22%3B&Expires=1723227088&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMzIyNzA4OH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9ubHBhdWViL2JlcnQtYmFzZS1ncmVlay11bmNhc2VkLXYxLzc0NjIyZTc2MTAwYmQ1MTU2NWE5YzFjNzgyMWMyYTYyNzliNTkxOTk5ZTU0Y2YxYjVhOTQ1YzRiNzJjZWZkZjc%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=fvAk-z6xPI2GAL0Ta6XdMrJJOs9J1MR33t-uS1TjEGiK6%7EyOAEbzExLogMc047sDONTDs4E2nz2fq-TSBSkc%7E%7E06cZtYOASBNk38Bqn6WX-U%7EHbkiFPbWJfp949qs4qT2%7E4vkTCmlAGADthIGzNH%7EY0lZ%7EbwmLimEXS2K6OEmtmv4LUDId-fcshbsrk9EuhrkZ%7ELHvJd75AL-BVi7rpep9Ll7y6gJltoAZI1JKAMJdLTFULWV0zSRDOnjgfLZOage%7EMrr7rXst8hezCqrx2UjrKqEa6MrOi5vzzGPl%7EmbD5wW3gW7luItu33VOegTIdMvREDDixw7-VDBG3j

Trying to resume download...]


Error while downloading from https://cdn-lfs.huggingface.co/nlpaueb/bert-base-greek-uncased-v1/74622e76100bd51565a9c1c7821c2a6279b591999e54cf1b5a945c4b72cefdf7?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27tf_model.h5%3B+filename%3D%22tf_model.h5%22%3B&Expires=1723228472&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMzIyODQ3Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9ubHBhdWViL2JlcnQtYmFzZS1ncmVlay11bmNhc2VkLXYxLzc0NjIyZTc2MTAwYmQ1MTU2NWE5YzFjNzgyMWMyYTYyNzliNTkxOTk5ZTU0Y2YxYjVhOTQ1YzRiNzJjZWZkZjc%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=CPkc2RRCsZc5ZxVjqxyRCjMk4-M%7ExbjHd379mMxi-PweqYFViZ9VpzkM17iU659hPRg%7EWuEReRA8cJf5TksU7d-o4FL7HUElYjdYvPvrcWaq7Tn8Gwm-R5pCNIMWhqoIW0yCcZLl4HaGvUFFTACJ2vfeS05b0zGmlXAeVj%7EBgDbsQAuhRSQukvtEmtbWtVqQTXcuweNXpehmRBqU3UfSiY2xPdCV12uls9Fb3st2XIrE6j04LgZEgsen2OWlfsoIr3Sd44m5%7ERSAcwLWZJbSbAwwW%7E2iNOn-QcFCCE--jD4DR7c1fohj5%7EPM7e3lufnt5aObzbmEKhVTkdCVY0T5cg__

Trying to resume download...]


Some layers from the model checkpoint at nlpaueb/bert-base-greek-uncased-v1 were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at nlpaueb/bert-base-greek-uncased-v1.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                              