In [None]:
!pip install --upgrade accelerator

In [None]:
!pip uninstall y transformers accelerate

In [None]:
!pip install transformers accelerate

In [1]:
import os

In [2]:
%pwd

'/Users/iqbal/Desktop/LangWiz-AI/research'

In [7]:
os.chdir('../')

In [8]:
%pwd

'/Users/iqbal/Desktop/LangWiz-AI'

In [9]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class BaseModelConfig:
    root_dir: Path
    data_path:Path
    model_ckpt:Path
    num_train_epochs: int
    evaluation_strategy: str
    save_strategy: str
    learning_rate: float
    per_device_train_batch_size: int
    per_device_eval_batch_size: int
    weight_decay: float
    save_total_limit: int
    predict_with_generate: bool
    fp16: bool
    push_to_hub: bool
    device: str

In [10]:
from LangWizAI.constants import *
from LangWizAI.utils.common import read_yaml, create_directories

In [11]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
        
    def get_prepare_base_model_config(self) -> BaseModelConfig:
        config =self.config.prepare_base_model
        params = self.params.TrainingArguments
        
        create_directories([config.root_dir])
        
        print(params)
        
        base_model_config = BaseModelConfig(
            root_dir = config.root_dir,
            data_path = config.data_path,
            model_ckpt = config.model_ckpt,
            num_train_epochs = params.num_train_epochs,
            evaluation_strategy = params.evaluation_strategy,
            save_strategy = params.save_strategy,
            learning_rate = params.learning_rate,
            per_device_train_batch_size = params.per_device_train_batch_size,
            per_device_eval_batch_size= params.per_device_eval_batch_size,
            weight_decay= params.weight_decay,
            save_total_limit= params.save_total_limit,
            predict_with_generate= params.predict_with_generate,
            fp16 = params.fp16,
            push_to_hub = params.push_to_hub,
            device= params.device
        )
        return base_model_config
        
        

In [12]:
from transformers import TrainingArguments, Trainer
from transformers import DataCollatorForSeq2Seq
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset, load_from_disk
import torch

[2024-07-27 07:51:14,799: INFO: config: PyTorch version 2.1.2 available.]


In [28]:
class PrepareBaseModel:
    def __init__(self, config: BaseModelConfig):
        self.config = config
        self.device = torch.device(self.config.device)
        self.tokenizer = AutoTokenizer.from_pretrained(self.config.model_ckpt)
        self.model_fr_en = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_ckpt).to(self.device)
        self.seq2seq_data_collator = DataCollatorForSeq2Seq(self.tokenizer,model = self.model_fr_en)
        
        self.data_pt = load_from_disk(self.config.data_path)
        
    def train(self):
        
        self.freeze_layers()
        trainer_args = TrainingArguments(
            output_dir=self.config.root_dir,
            num_train_epochs = self.config.num_train_epochs,
            evaluation_strategy = self.config.evaluation_strategy,
            save_strategy = self.config.save_strategy,
            learning_rate = self.config.learning_rate,
            per_device_train_batch_size = self.config.per_device_train_batch_size,
            per_device_eval_batch_size= self.config.per_device_eval_batch_size,
            weight_decay= self.config.weight_decay,
            save_total_limit= self.config.save_total_limit,
            # predict_with_generate= self.config.predict_with_generate,
            fp16= self.config.fp16,
            push_to_hub= self.config.push_to_hub,
            no_cuda= True
        )
        
        trainer = Trainer(
            model = self.model_fr_en, args=trainer_args,
            tokenizer = self.tokenizer, data_collator = self.seq2seq_data_collator,
            train_dataset = self.data_pt['train'],
            
        )
        trainer.train()
        
        self.model_fr_en.save_pretrained(os.path.join(self.config.root_dir,'fr_en_model'))
        self.tokenizer.save_pretrained(os.path.join(self.config.root_dir,'fr_en_tokenizer'))
        
    
    def freeze_layers(self):
        for param in self.model_fr_en.model.encoder.parameters():
            param.requires_grad = False
        
        # Unfreeze the decoder layers and the final linear layer
        for param in self.model_fr_en.model.decoder.parameters():
            param.requires_grad = False
        for param in self.model_fr_en.lm_head.parameters():
            param.requires_grad = True

In [29]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_prepare_base_model_config()
    model_trainer_config = PrepareBaseModel(config= model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2024-07-27 08:04:33,444: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-07-27 08:04:33,446: INFO: common: yaml file: params.yaml loaded successfully]
[2024-07-27 08:04:33,447: INFO: common: created directory at: artifacts]
[2024-07-27 08:04:33,447: INFO: common: created directory at: artifacts/prepare_base_model]
{'num_train_epochs': 3, 'evaluation_strategy': 'no', 'save_strategy': 'epoch', 'learning_rate': 2e-05, 'per_device_train_batch_size': 64, 'per_device_eval_batch_size': 64, 'weight_decay': 0.01, 'save_total_limit': 3, 'predict_with_generate': True, 'fp16': False, 'push_to_hub': False, 'device': 'mps'}


Step,Training Loss


KeyboardInterrupt: 

In [27]:
!pip freeze | grep transformers

transformers==4.42.3
