<a href="https://colab.research.google.com/github/ahmedsa04/MIT-go.coll/blob/main/lab3/LLM_Finetuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<table align="center">
  <td align="center"><a target="_blank" href="http://introtodeeplearning.com">
        <img src="https://i.ibb.co/Jr88sn2/mit.png" style="padding-bottom:5px;" />
      Visit MIT Deep Learning</a></td>
  <td align="center"><a target="_blank" href="https://colab.research.google.com/github/MITDeepLearning/introtodeeplearning/blob/master/lab3/LLM_Finetuning.ipynb">
        <img src="https://i.ibb.co/2P3SLwK/colab.png"  style="padding-bottom:5px;" />Run in Google Colab</a></td>
  <td align="center"><a target="_blank" href="https://github.com/MITDeepLearning/introtodeeplearning/blob/master/lab3/LLM_Finetuning.ipynb">
        <img src="https://i.ibb.co/xfJbPmL/github.png"  height="70px" style="padding-bottom:5px;"  />View Source on GitHub</a></td>
</table>

# Copyright Information

In [1]:
!pip install torch transformers datasets peft accelerate bitsandbytes
!pip install wandb

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from 

In [40]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float32,
    device_map="auto"
).to(device)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
print("Model and tokenizer loaded successfully!")

Model and tokenizer loaded successfully!


In [34]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="formatted_chat.json", split="train")

# Ensure correct format
def format_chat(example):
    return {"input_text": example["instruction"], "target_text": example["response"]}

print(dataset[0])
print("Dataset loaded successfully!")


{'instruction': 'شكراً حبيبي عندها بعد صور هدى اخذتلي ماعندي التلي مالها', 'response': 'امممم هاج'}
Dataset loaded successfully!


In [36]:
def tokenize_function(example):
    texts = [instr + " " + resp for instr, resp in zip(example["instruction"], example["response"])]
    tokenized = tokenizer(
        texts,
        truncation=True,
        padding="max_length",
        max_length=512
    )

    tokenized["labels"] = tokenized["input_ids"][:]  # Set labels correctly
    return tokenized

tokenized_datasets = dataset.map(tokenize_function, batched=True,remove_columns=dataset.column_names)
tokenized_datasets.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
print("Dataset tokenized successfully!")

Map:   0%|          | 0/11224 [00:00<?, ? examples/s]

Dataset tokenized successfully!


In [37]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./tinyllama_trained",
    per_device_train_batch_size=2,  # Adjust batch size based on memory
    gradient_accumulation_steps=1,
    num_train_epochs=3,
    save_steps=500,
    logging_steps=1,
    save_total_limit=2,
    evaluation_strategy="steps",
    eval_steps=500,
    learning_rate=2e-4,
    fp16=False,
    optim="adamw_torch",
    warmup_steps=100,
    weight_decay=0.01,
    push_to_hub=False
)



In [38]:
import torch.nn as nn
from torch.optim import AdamW

# Define optimizer and loss function
optimizer = AdamW(model.parameters(), lr=5e-5)  # Lower LR for stability
loss_fn = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)



In [39]:
from torch.utils.data import DataLoader
import torch.nn as nn
from torch.optim import AdamW

# Use float32 to avoid precision issues

# Create DataLoader (Reduce batch size to prevent OOM)
train_dataloader = DataLoader(tokenized_datasets, batch_size=1, shuffle=True)  # Use batch_size=1 to save memory
for batch in train_dataloader:
    input_ids = batch["input_ids"].to(device)
    attention_mask = batch["attention_mask"].to(device)
    labels = batch["labels"].to(device)

    outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
    loss = outputs.loss

    print(f"Loss: {loss.item()}")  # Should NOT be NaN
    print(f"Predicted Output: {tokenizer.decode(outputs.logits.argmax(-1)[0])}")
    break  # Only check first batch

# Training loop
model.train()
for epoch in range(3):
    total_loss = 0
    step = 0

    for batch in train_dataloader:
        optimizer.zero_grad()

        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        # 🚨 Stop training if NaN loss
        if torch.isnan(loss):
            print(f"Skipping step {step} due to NaN loss")
            continue

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        step += 1

        # Print loss per batch
        print(f"Epoch {epoch + 1}, Step {step}, Batch Loss: {loss.item()}")

        # Free memory
        torch.cuda.empty_cache()

    # Print average loss
    avg_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch + 1} completed. Average Loss: {avg_loss}")

# Save model
model.save_pretrained("tinyllama_finetuned")
tokenizer.save_pretrained("tinyllama_finetuned")
print("Model training complete and saved!")

KeyboardInterrupt: 

In [None]:
!pip install llama-cpp-python  # Install conversion tool

from llama_cpp import convert_hf_model

convert_hf_model("tinyllama_finetuned", "tinyllama.gguf", format="gguf")
