In [None]:
import os
os.chdir("/content/bootcamp_day2/notebooks")
print("Working directory:", os.getcwd())
        
import torch

print("CUDA available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")



Working directory: /content/bootcamp_day2/notebooks
CUDA available: True
GPU name: Tesla T4


In [None]:
!pip install --upgrade pip
!pip install transformers datasets peft bitsandbytes accelerate


Collecting pip
  Downloading pip-25.3-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.3-py3-none-any.whl (1.8 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m82.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.3


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
import json


In [None]:
dataset = load_dataset(
    "json",
    data_files={
        "train": "../data/train.jsonl",
        "validation": "../data/val.jsonl"
    }
)

print("Train samples:", len(dataset["train"]))
print("Validation samples:", len(dataset["validation"]))


Train samples: 1000
Validation samples: 100


In [None]:
def format_instruction(example):
    if example["input"]:
        return f"""### Instruction:
{example['instruction']}

### Input:
{example['input']}

### Response:
{example['output']}"""
    else:
        return f"""### Instruction:
{example['instruction']}

### Response:
{example['output']}"""


In [None]:
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto"
)


In [None]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 2,252,800 || all params: 1,102,301,184 || trainable%: 0.2044


In [None]:
def tokenize(example):
    text = format_instruction(example)
    tokenized = tokenizer(
        text,
        truncation=True,
        padding="max_length",
        max_length=512
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_train = dataset["train"].map(
    tokenize,
    remove_columns=dataset["train"].column_names
)

tokenized_val = dataset["validation"].map(
    tokenize,
    remove_columns=dataset["validation"].column_names
)


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
def add_labels(batch):
    batch["labels"] = batch["input_ids"].copy()
    return batch

tokenized_train = tokenized_train.map(add_labels, batched=True)
tokenized_val = tokenized_val.map(add_labels, batched=True)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
training_args = TrainingArguments(
    output_dir="../adapters",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    learning_rate=2e-4,
    num_train_epochs=3,
    fp16=False,
    bf16=False,
    logging_steps=50,
    save_strategy="epoch",
    eval_strategy="epoch",
    report_to="none",
    save_total_limit=1
)


In [None]:
from transformers import default_data_collator
from transformers import Trainer

data_collator = default_data_collator

trainer = Trainer(
    model=model,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    args=training_args,
    data_collator=data_collator
)

In [None]:
trainer.train()


Epoch,Training Loss,Validation Loss
1,0.2172,0.21088
2,0.2292,0.209114
3,0.2134,0.208364


TrainOutput(global_step=750, training_loss=0.415849676767985, metrics={'train_runtime': 530.7972, 'train_samples_per_second': 5.652, 'train_steps_per_second': 1.413, 'total_flos': 9554827935744000.0, 'train_loss': 0.415849676767985, 'epoch': 3.0})

In [None]:
model.save_pretrained("../adapters/adapter_model")
print("Adapter weights saved successfully!")


Adapter weights saved successfully!
