In [112]:
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import get_peft_model, LoraConfig, TaskType
from datasets import Dataset

In [113]:
## Load model

model_name = "sshleifer/tiny-gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  ## in case of tiny-gpt2

model = AutoModelForCausalLM.from_pretrained(model_name)
model.config.pad_token_id = tokenizer.eos_token_id   ## in case of tiny-gpt2

In [114]:
### Lora Configration

lora_config = LoraConfig(
    r = 2, 
    lora_alpha = 8,
    lora_dropout = 0.05
    task_type = TaskType.CAUSAL_LM,
)

model = get_peft_model(model, lora_config)



In [115]:
# Sample data
data = [
    {"text": "### Instruction: Say hello\n### Response: Hello there!"},
    {"text": "### Instruction: What's the capital of France?\n### Response: The capital of France is Paris."},
    {"text": "### Instruction: Translate 'Good morning' to Spanish\n### Response: Buenos días."},
    {"text": "### Instruction: Add 5 and 7\n### Response: The result is 12."},
    {"text": "### Instruction: List three colors\n### Response: Red, blue, and green."},
    {"text": "### Instruction: Who wrote 'Hamlet'?\n### Response: William Shakespeare wrote 'Hamlet'."},
    {"text": "### Instruction: Convert 100 Celsius to Fahrenheit\n### Response: 100°C is equal to 212°F."},
    {"text": "### Instruction: Name a programming language\n### Response: Python."},
    {"text": "### Instruction: What's the square root of 64?\n### Response: The square root of 64 is 8."},
    {"text": "### Instruction: Tell a joke\n### Response: Why did the computer go to therapy? Because it had too many bytes!"}
]

train_data = data[:-2]
eval_data = data[-2:]

train_data = Dataset.from_list(train_data)
eval_data = Dataset.from_list(eval_data)

train_dataset = train_data.map(lambda e: tokenizer(e["text"], truncation=True, padding="max_length", max_length=64))
eval_dataset = eval_data.map(lambda e: tokenizer(e["text"], truncation=True, padding="max_length", max_length=64))


Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

In [116]:
### training Arguments and trainer

training_args = TrainingArguments(
    output_dir = "lora_out",
    per_device_train_batch_size=2, num_train_epochs=3,
    logging_strategy="epoch",
    save_strategy="epoch",       # save after each epoch (default)
    save_total_limit=3,          # keep only last 3 checkpoints to save disk space
)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = dataset,
    eval_dataset = eval_dataset,
    tokenizer=tokenizer,
    data_collator= data_collator
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [117]:
trainer.train()
trainer.save_model("lora_out/custom-tiny-gpt2")
tokenizer.save_pretrained("lora_out/custom-tiny-gpt2")

Step,Training Loss
5,10.2402
10,10.252
15,10.2479




('lora_out/custom-tiny-gpt2/tokenizer_config.json',
 'lora_out/custom-tiny-gpt2/special_tokens_map.json',
 'lora_out/custom-tiny-gpt2/vocab.json',
 'lora_out/custom-tiny-gpt2/merges.txt',
 'lora_out/custom-tiny-gpt2/added_tokens.json',
 'lora_out/custom-tiny-gpt2/tokenizer.json')

In [118]:
### model evaluations
import math

### perplexity
eval_loss = trainer.evaluate()["eval_loss"]
perplexity = math.exp(eval_loss)
print("Perplexity:", perplexity)




Perplexity: 50122.37076443425


In [119]:
### inferance  

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import evaluate

### load model 

trained_model_path  = "lora_out/custom-tiny-gpt2"
model = AutoModelForCausalLM.from_pretrained(trained_model_path)
tokenizer = AutoTokenizer.from_pretrained(trained_model_path)

device = torch.device("cpu")
model.to(device)

# test prompts (instructions)
prompts = [
    "Say hello:",
    "Translate 'Good morning' to Spanish:",
    "Add 5 and 7:"
]

# Reference answers
references = [
    "Hello there!",
    "Buenos días.",
    "12"
]


predictions = []

for prompt in prompts:
    inputs = tokenizer(prompt, return_tensors= "pt").to(device)
    output = model.generate(
        **inputs,
        # inputs["input_ids"],
        max_new_tokens=50,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        num_return_sequences=1
        
        # max_new_tokens = 50,
        # do_sample = False  # deterministic output; set True for randomness
    )
    pred = tokenizer.decode(output[0], skip_special_tokens = True)
    pred_text = pred[len(prompt):].strip()
    predictions.append(pred_text)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [120]:
# print("Predictions:", predictions)
# print("References:", references)

#### Evaluation Metrices
##### Bleu (Bilingual Evaluation Understudy):
> Measures how many n-grams in the generated text match the reference, focusing on precision.
##### ROUGE (Recall-Oriented Understudy for Gisting Evaluation):
> Measures how much of the reference text is captured in the generated output, focusing on recall.
##### METEOR (Metric for Evaluation of Translation with Explicit ORdering):
> Combines precision, recall, synonym matching, and word order into a harmonic score for better semantic evaluation.
##### Perplexity:
> Evaluates a language model’s performance by computing the inverse probability of the test set, normalized by length—lower values mean less uncertainty in predicting the next token.

In [122]:
### evaluation

bleu   = evaluate.load("bleu")
rouge  = evaluate.load("rouge")
meteor = evaluate.load("meteor")

bleu_result = bleu.compute(predictions=predictions, references=references)
rouge_result = rouge.compute(predictions=predictions, references=references)
meteor_result = meteor.compute(predictions=predictions, references=references)

print("BLEU:", bleu_result)
print("ROUGE:", rouge_result)
print("METEOR:", meteor_result)


Downloading builder script:   0%|          | 0.00/7.02k [00:00<?, ?B/s]

[nltk_data] Downloading package wordnet to /home/vishal/nltk_data...
[nltk_data] Downloading package punkt_tab to /home/vishal/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/vishal/nltk_data...


BLEU: {'bleu': 0.0, 'precisions': [0.0, 0.0, 0.0, 0.0], 'brevity_penalty': 1.0, 'length_ratio': 15.0, 'translation_length': 105, 'reference_length': 7}
ROUGE: {'rouge1': np.float64(0.0), 'rouge2': np.float64(0.0), 'rougeL': np.float64(0.0), 'rougeLsum': np.float64(0.0)}
METEOR: {'meteor': np.float64(0.0)}
