In [1]:
import os

In [2]:
%pwd

'/Users/sachinmishra/Desktop/AI-mock-interview/research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/Users/sachinmishra/Desktop/AI-mock-interview'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_ckpt: Path
    tokenizer_name: Path
    output_dir: Path
    num_train_epochs: int
    max_steps: int
    learning_rate: float
    optim: str
    warmup_steps: int
    per_device_train_batch_size: int
    weight_decay: float
    logging_steps: int
    logging_dir: Path
    save_strategy: str
    save_steps: int
    evaluation_strategy: str
    eval_steps: int
    do_eval: bool
    report_to: None
    overwrite_output_dir: bool
    group_by_length: bool
    gradient_checkpointing: bool
    gradient_accumulation_steps: int

In [6]:
from interviewBot.constants import *
from interviewBot.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.TrainingArguments

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir = config.root_dir,
            data_path = config.data_path,
            model_ckpt = config.model_ckpt,
            tokenizer_name = config.tokenizer_name,
            output_dir = params.output_dir,
            num_train_epochs = params.num_train_epochs,
            max_steps = params.max_steps,
            learning_rate = params.learning_rate,
            optim = params.optim,
            warmup_steps = params.warmup_steps,
            per_device_train_batch_size = params.per_device_train_batch_size,
            weight_decay = params.weight_decay,
            logging_steps = params.logging_steps,
            logging_dir = params.logging_dir,
            save_strategy = params.save_strategy,
            save_steps = params.save_steps,
            evaluation_strategy = params.evaluation_strategy,
            eval_steps = params.eval_steps,
            do_eval = params.do_eval,
            report_to = params.report_to,
            overwrite_output_dir = params.overwrite_output_dir,
            group_by_length = params.group_by_length,
            gradient_checkpointing = params.gradient_checkpointing,
            gradient_accumulation_steps = params.gradient_accumulation_steps
        )
        return model_trainer_config

In [9]:
import transformers
from transformers import TrainingArguments, AutoModelForCausalLM
from transformers import DataCollatorForSeq2Seq
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset, load_from_disk
import torch
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training


In [10]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        # Load the model before passing it to count_parameters
        self.original_model_name = AutoModelForCausalLM.from_pretrained(config.model_ckpt)

        self.tokenizer = AutoTokenizer.from_pretrained(config.tokenizer_name,trust_remote_code=True,padding_side="left",add_eos_token=True,add_bos_token=True,use_fast=False)
        self.tokenizer.pad_token = self.tokenizer.eos_token
    

    def model_quantization(self, original_model):
        lora_config = LoraConfig(
        r=32, #Rank
        lora_alpha=32,
        target_modules=[
            'q_proj',
            'k_proj',
            'v_proj',
            'dense'
        ],
        bias="none",
        lora_dropout=0.05,  # Conventional
        task_type="CAUSAL_LM")
        # 1 - Enabling gradient checkpointing to reduce memory usage during fine-tuning
        original_model.gradient_checkpointing_enable()

        #Before applying LoRA, ensure the model is prepped for quantization-aware training:
        peft_model = prepare_model_for_kbit_training(original_model)  # Add this step
        peft_model = get_peft_model(original_model, lora_config)

        for param in peft_model.parameters():
            if param.dtype in [torch.float16, torch.float32, torch.bfloat16]:
                param.requires_grad = True

        return peft_model
    
    # Function to count total and trainable parameters
    def count_parameters(self, model):
        total_params = sum(p.numel() for p in model.parameters())
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        percent_trainable = (trainable_params / total_params) * 100
        return total_params, trainable_params, percent_trainable

    def load_dataset(self):
        #loading data 
        train_dataset = load_from_disk(os.path.join(self.config.data_path, "train"))
        eval_dataset = load_from_disk(os.path.join(self.config.data_path, "validation"))
        return train_dataset, eval_dataset
    def count_parameters(self, original_model):
        total_params = sum(p.numel() for p in original_model.parameters())
        trainable_params = sum(p.numel() for p in original_model.parameters() if p.requires_grad)
        percent_trainable = (trainable_params / total_params) * 100
        return total_params, trainable_params, percent_trainable

    def model_train(self):
        peft_training_args = TrainingArguments(
        output_dir = self.config.output_dir,
        num_train_epochs = self.config.num_train_epochs,
        max_steps = self.config.max_steps,
        learning_rate = self.config.learning_rate,
        optim = self.config.optim,
        warmup_steps = self.config.warmup_steps,
        per_device_train_batch_size = self.config.per_device_train_batch_size,
        weight_decay = self.config.weight_decay,
        logging_steps = self.config.logging_steps,
        logging_dir = self.config.logging_dir,
        save_strategy = self.config.save_strategy,
        save_steps = self.config.save_steps,
        evaluation_strategy = self.config.evaluation_strategy,
        eval_steps = self.config.eval_steps,
        do_eval = self.config.do_eval,
        report_to = self.config.report_to,
        overwrite_output_dir = self.config.overwrite_output_dir,
        group_by_length = self.config.group_by_length,
        gradient_checkpointing = self.config.gradient_checkpointing,
        gradient_accumulation_steps = self.config.gradient_accumulation_steps
        )

        
        ## Counting trainable and non trainable parameters
        total_params_before, trainable_params_before, percent_trainable_before= self.count_parameters(self.original_model_name)

        print(f"Before LoRA:")
        print(f"Total Parameters: {total_params_before:,}")
        print(f"Trainable Parameters: {trainable_params_before:,}")
        print(f"Percentage of Trainable Parameters: {percent_trainable_before:.4f}%\n")



        
        peft_model = self.model_quantization(self.original_model_name)
        peft_model.config.use_cache = False

        # After applying LoRA
        total_params_after, trainable_params_after, percent_trainable_after = self.count_parameters(peft_model)
        print(f"After LoRA:")
        print(f"Total Parameters: {total_params_after:,}")
        print(f"Trainable Parameters: {trainable_params_after:,}")
        print(f"Percentage of Trainable Parameters: {percent_trainable_after:.4f}%")

        
        
        ## load training & validation data
        train_dataset, eval_dataset = self.load_dataset()

        peft_trainer = transformers.Trainer(
        model=peft_model,
        train_dataset= eval_dataset,
        eval_dataset= eval_dataset,
        args=peft_training_args,
        data_collator=transformers.DataCollatorForLanguageModeling(self.tokenizer, mlm=False),
        )

        peft_trainer.train()

            



In [11]:



try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.model_train()
except Exception as e:
    raise e


[2025-03-23 23:53:18,893: INFO: common: yaml file: config/config.yaml loaded successfully]
[2025-03-23 23:53:18,902: INFO: common: yaml file: params.yaml loaded successfully]
[2025-03-23 23:53:18,904: INFO: common: created directory at: artifacts]
[2025-03-23 23:53:18,906: INFO: common: created directory at: artifacts/model_trainer]


Loading checkpoint shards: 100%|██████████| 2/2 [00:10<00:00,  5.13s/it]
  warn("The installed version of bitsandbytes was compiled without GPU support. "


Before LoRA:
Total Parameters: 2,779,683,840
Trainable Parameters: 2,779,683,840
Percentage of Trainable Parameters: 100.0000%

'NoneType' object has no attribute 'cadam32bit_grad_fp32'
After LoRA:
Total Parameters: 2,800,655,360
Trainable Parameters: 2,800,655,360
Percentage of Trainable Parameters: 100.0000%


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


KeyboardInterrupt: 