In [1]:
import os

In [2]:
%pwd

'd:\\NLP\\project\\Text-Summarization\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'd:\\NLP\\project\\Text-Summarization'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_ckpt: Path
    num_train_epochs: int
    warmup_steps: int
    per_device_train_batch_size: int
    weight_decay: float
    logging_steps: int
    evaluation_strategy: str
    eval_steps: int
    save_steps: float
    gradient_accumulation_steps: int

In [6]:
from src.textSummarization.constants import *
from src.textSummarization.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(self, config_filepath = CONFIG_FILE_PATH, params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.TrainingArguments

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
            model_ckpt=config.model_ckpt,
            num_train_epochs=int(params.num_train_epochs), 
            warmup_steps=int(params.warmup_steps),  
            per_device_train_batch_size=int(params.per_device_train_batch_size),  
            weight_decay=float(params.weight_decay),  
            logging_steps=int(params.logging_steps),  
            evaluation_strategy=params.evaluation_strategy,
            eval_steps=int(params.eval_steps),  
            save_steps=int(params.save_steps), 
            gradient_accumulation_steps=int(params.gradient_accumulation_steps)
        )

        return model_trainer_config

In [8]:
from transformers import TrainingArguments, Trainer
from transformers import DataCollatorForSeq2Seq
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset, load_from_disk
import torch

  from .autonotebook import tqdm as notebook_tqdm


[2024-09-09 13:46:33,967: INFO: config: PyTorch version 2.4.0 available.]
[2024-09-09 13:46:34,008: INFO: config: TensorFlow version 2.13.0 available.]


In [9]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def train(self):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        tokenizer = AutoTokenizer.from_pretrained(self.config.model_ckpt)
        model = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_ckpt).to(device)
        seq2seq_data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
        
        dataset_samsum_pt = load_from_disk(self.config.data_path)

        # trainer_args = TrainingArguments(
        #     output_dir=self.config.root_dir, num_train_epochs=self.config.num_train_epochs, warmup_steps=self.config.warmup_steps,
        #     per_device_train_batch_size=self.config.per_device_train_batch_size, per_device_eval_batch_size=self.config.per_device_train_batch_size,
        #     weight_decay=self.config.weight_decay, logging_steps=self.config.logging_steps,
        #     evaluation_strategy=self.config.evaluation_strategy, eval_steps=self.config.eval_steps, save_steps=self.config.save_steps,
        #     gradient_accumulation_steps=self.config.gradient_accumulation_steps
        # )

        trainer_args = TrainingArguments(
            output_dir=self.config.root_dir, num_train_epochs=1, warmup_steps=500,
            per_device_train_batch_size=1, per_device_eval_batch_size=1,
            weight_decay=0.01, logging_steps=50,
            evaluation_strategy='steps', eval_steps=1000, save_steps=1000000,
            gradient_accumulation_steps=16
        ) 

        trainer = Trainer(
            model=model, args=trainer_args,
            tokenizer=tokenizer, data_collator=seq2seq_data_collator,
            train_dataset=dataset_samsum_pt["train"], 
            eval_dataset=dataset_samsum_pt["validation"]
        )
        
        trainer.train()

        # Save model and tokenizer
        model.save_pretrained(os.path.join(self.config.root_dir, "distilbart-cnn-12-6-model"))
        tokenizer.save_pretrained(os.path.join(self.config.root_dir, "tokenizer"))


In [10]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2024-09-09 13:46:35,187: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-09-09 13:46:35,248: INFO: common: yaml file: params.yaml loaded successfully]
[2024-09-09 13:46:35,252: INFO: common: creating directory at: artifacts]
[2024-09-09 13:46:35,255: INFO: common: creating directory at: artifacts/model_trainer]


  5%|▌         | 50/920 [15:01<3:57:33, 16.38s/it]

{'loss': 1.3145, 'grad_norm': 10.465394020080566, 'learning_rate': 5e-06, 'epoch': 0.05}


 11%|█         | 100/920 [29:45<3:53:20, 17.07s/it]

{'loss': 0.9618, 'grad_norm': 5.787104606628418, 'learning_rate': 1e-05, 'epoch': 0.11}


 16%|█▋        | 150/920 [43:36<3:26:25, 16.08s/it]

{'loss': 0.4775, 'grad_norm': 1.6165976524353027, 'learning_rate': 1.5e-05, 'epoch': 0.16}


 22%|██▏       | 200/920 [57:44<3:28:08, 17.34s/it]

{'loss': 0.2893, 'grad_norm': 1.2543509006500244, 'learning_rate': 2e-05, 'epoch': 0.22}


 27%|██▋       | 250/920 [1:11:40<3:02:05, 16.31s/it]

{'loss': 0.1863, 'grad_norm': 1.462594747543335, 'learning_rate': 2.5e-05, 'epoch': 0.27}


 33%|███▎      | 300/920 [1:25:22<2:46:08, 16.08s/it]

{'loss': 0.1327, 'grad_norm': 1.0424412488937378, 'learning_rate': 3e-05, 'epoch': 0.33}


 38%|███▊      | 350/920 [1:39:29<2:41:30, 17.00s/it]

{'loss': 0.1113, 'grad_norm': 0.7694984078407288, 'learning_rate': 3.5e-05, 'epoch': 0.38}


 43%|████▎     | 400/920 [1:53:22<2:16:58, 15.81s/it]

{'loss': 0.0934, 'grad_norm': 2.734358072280884, 'learning_rate': 4e-05, 'epoch': 0.43}


 49%|████▉     | 450/920 [2:07:02<2:00:06, 15.33s/it]

{'loss': 0.0809, 'grad_norm': 0.5678751468658447, 'learning_rate': 4.5e-05, 'epoch': 0.49}


 54%|█████▍    | 500/920 [2:20:44<1:53:01, 16.15s/it]

{'loss': 0.0732, 'grad_norm': 0.7252897024154663, 'learning_rate': 5e-05, 'epoch': 0.54}


 60%|█████▉    | 550/920 [2:34:34<1:47:11, 17.38s/it]

{'loss': 0.0639, 'grad_norm': 0.8811004161834717, 'learning_rate': 4.404761904761905e-05, 'epoch': 0.6}


 65%|██████▌   | 600/920 [2:48:25<1:34:01, 17.63s/it]

{'loss': 0.0636, 'grad_norm': 0.6335009336471558, 'learning_rate': 3.809523809523809e-05, 'epoch': 0.65}


 71%|███████   | 650/920 [3:02:20<1:15:47, 16.84s/it]

{'loss': 0.0595, 'grad_norm': 0.5656167268753052, 'learning_rate': 3.2142857142857144e-05, 'epoch': 0.71}


 76%|███████▌  | 700/920 [3:16:04<1:04:04, 17.48s/it]

{'loss': 0.052, 'grad_norm': 0.6779705882072449, 'learning_rate': 2.6190476190476192e-05, 'epoch': 0.76}


 82%|████████▏ | 750/920 [3:29:39<46:02, 16.25s/it]  

{'loss': 0.048, 'grad_norm': 0.5826255083084106, 'learning_rate': 2.023809523809524e-05, 'epoch': 0.81}


 87%|████████▋ | 800/920 [3:43:18<32:18, 16.16s/it]

{'loss': 0.0508, 'grad_norm': 0.4350247383117676, 'learning_rate': 1.4285714285714285e-05, 'epoch': 0.87}


 92%|█████████▏| 850/920 [3:56:40<17:33, 15.05s/it]

{'loss': 0.0473, 'grad_norm': 0.35604918003082275, 'learning_rate': 8.333333333333334e-06, 'epoch': 0.92}


 98%|█████████▊| 900/920 [4:10:03<05:09, 15.50s/it]

{'loss': 0.0478, 'grad_norm': 0.5250070095062256, 'learning_rate': 2.3809523809523808e-06, 'epoch': 0.98}


100%|██████████| 920/920 [4:15:24<00:00, 16.66s/it]

{'train_runtime': 15324.4147, 'train_samples_per_second': 0.961, 'train_steps_per_second': 0.06, 'train_loss': 0.22678281185419663, 'epoch': 1.0}



