# Installations and imports

In [1]:
!pip3 install transformers
!pip3 install torch
!pip3 install datasets
!pip3 install sentencepiece
!pip3 install gdown
!pip3 install accelerate -U



In [2]:
import torch
from transformers import (
    LlamaForCausalLM, LlamaConfig, LlamaTokenizer,
    Trainer, TrainingArguments, DataCollatorForLanguageModeling,
    EarlyStoppingCallback
)
from datasets import load_dataset
import sentencepiece as spm
import os
import logging
import json
import sys
import argparse

  from .autonotebook import tqdm as notebook_tqdm


# Setting up the model with config
* Function created to return the model with required config

In [4]:
def create_config_model(path):
    config = LlamaConfig.from_pretrained(path)

    model = LlamaForCausalLM(config)

    if torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model.to(device)
    model_size = sum(t.numel() for t in model.parameters())

    print(f"GPT Model size: {model_size/1000**2:.1f}M parameters")
    
    return model

# Setting up training the model
* Function created to setup training for the model

In [None]:
def train_model(model, tokenizer, train_dataset, test_dataset, out_folder_path):
    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

    training_args = TrainingArguments(
        output_dir=out_folder_path,
        overwrite_output_dir=True,
        num_train_epochs=100,
        per_device_train_batch_size=8,
        save_steps=10000,
        logging_steps=10,
        eval_steps=1000,
        logging_dir=f'{out_folder_path}/logs',
        evaluation_strategy="steps",
        load_best_model_at_end=True,
        metric_for_best_model="loss",
        greater_is_better=False,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        data_collator=data_collator,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=2, early_stopping_threshold=0.001)]
    )

    trainer.train()
    model.save_pretrained(out_folder_path)