In [1]:
# Cell 1: install libs (run once)

!pip install -q --upgrade pip
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers accelerate datasets bitsandbytes peft safetensors sentencepiece
!pip install -q trl


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m1.7/1.8 MB[0m [31m51.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m36.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Cell 2: Mount Google Drive for dataset & checkpoints
from google.colab import drive
drive.mount('/content/drive')
# Create an output folder in Drive
!mkdir -p /content/drive/MyDrive/llama_finetune


Mounted at /content/drive


In [3]:

!cp /content/drive/MyDrive/llama_finetune/dataset.jsonl /content/dataset.jsonl || true




In [4]:
# Cell 4: Quick dataset split & preview
import json, random, os
fn = "/content/dataset.jsonl"
if not os.path.exists(fn):
    raise FileNotFoundError(f"{fn} not found. Upload dataset.jsonl to Colab or copy from Drive.")
lines = [l for l in open(fn, encoding='utf-8').read().splitlines() if l.strip()]
random.seed(42)
random.shuffle(lines)
n = len(lines)
val_count = max(10, int(0.1 * n))  # ~10% or at least 10
train_lines = lines[val_count:]
valid_lines = lines[:val_count]
open("/content/train.jsonl","w",encoding="utf-8").write("\n".join(train_lines))
open("/content/valid.jsonl","w",encoding="utf-8").write("\n".join(valid_lines))
print("Total examples:", n, "-> train:", len(train_lines), "valid:", len(valid_lines))
print("\nOne train example preview (truncated):")
print(train_lines[0][:1000])


Total examples: 481 -> train: 433 valid: 48

One train example preview (truncated):
{"prompt": "Extract 'experience' and 'education' as strict JSON.\n\nResume:\nPratiksha Dhotre{new_line} Software Test Engineer{new_line} Pune * dhotrepratiksha01@gmail.com *{new_line} +91 9322425860{new_line}{new_line} CORE COMPETENCIES{new_line}Manual Testing{new_line} Test Planning and Documentation{new_line}Functional & Non - Functional{new_line} Testing{new_line}Regression Testing{new_line}Black - box testing{new_line}Integration Testing{new_line}SDLC and STLC{new_line}Test Automation{new_line}Bug Tracking and Defect{new_line} Management{new_line}API testing{new_line}Continuous Learning{new_line}{new_line}CAREER OBJECTIVE{new_line}To work towards achieving the greater success through hard work,\t \tconsistency and the ability to work with others to achieve\t \torganizational and personal goals.{new_line}{new_line} PROFILE SUMMARY{new_line}Having 3+ years of professional experience as a{new_line} Sof

In [5]:
# Cell 5
MODEL = "meta-llama/Llama-3.2-3B-Instruct"
OUTPUT_DIR = "/content/drive/MyDrive/llama_finetune/lora_llama3_3b"
TRAIN_FILE = "/content/train.jsonl"
VALID_FILE = "/content/valid.jsonl"

# Training hyperparams
EPOCHS = 3
PER_DEVICE_BATCH_SIZE = 1
GRADIENT_ACCUMULATION_STEPS = 8
LEARNING_RATE = 2e-4
MAX_LENGTH = 1024
LOGGING_STEPS = 50
SAVE_STEPS = 200

print('Configuration set. Remember to update MODEL if you do not have access to LLaMA-3-3b.')

Configuration set. Remember to update MODEL if you do not have access to LLaMA-3-3b.


In [6]:
# Cell 6: Tokenizer + create text list from jsonl
import json
from transformers import AutoTokenizer

print("Loading tokenizer for", MODEL)
tokenizer = AutoTokenizer.from_pretrained(MODEL, use_fast=False)
# ensure pad token exists
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

def build_prompt(resume):
    return ("Extract 'experience' and 'education' as strict JSON.\n\nResume:\n" + resume + "\n\nReturn only valid JSON.")

def preprocess_line(line):
    # each line is {"prompt": "...", "response": "..."} from your dataset
    obj = json.loads(line)
    prompt = obj.get("prompt") or build_prompt(obj.get("resume",""))
    response = obj.get("response","{}")
    # combine and let tokenizer create labels
    full = prompt + "\n\n" + response
    return full

def dataset_from_jsonl(path):
    lines = [l for l in open(path, encoding='utf-8').read().splitlines() if l.strip()]
    texts = [preprocess_line(l) for l in lines]
    return texts

train_texts = dataset_from_jsonl(TRAIN_FILE)
valid_texts = dataset_from_jsonl(VALID_FILE)
print("Train examples:", len(train_texts), "Valid examples:", len(valid_texts))


Loading tokenizer for meta-llama/Llama-3.2-3B-Instruct


tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Train examples: 433 Valid examples: 48


In [None]:
# Cell 7: Tokenize & prepare HF datasets
from datasets import Dataset

def tokenize_function(examples):
    toks = tokenizer(examples, truncation=True, max_length=MAX_LENGTH, padding="max_length")
    toks["labels"] = toks["input_ids"].copy()
    return toks

train_ds = Dataset.from_dict({"text": train_texts})
valid_ds = Dataset.from_dict({"text": valid_texts})

train_ds = train_ds.map(lambda x: tokenize_function(x["text"]), batched=True, remove_columns=["text"])
valid_ds = valid_ds.map(lambda x: tokenize_function(x["text"]), batched=True, remove_columns=["text"])

print("Tokenized. Example keys:", train_ds.column_names)


Map:   0%|          | 0/433 [00:00<?, ? examples/s]

Map:   0%|          | 0/48 [00:00<?, ? examples/s]

Tokenized. Example keys: ['input_ids', 'attention_mask', 'labels']


In [None]:
# Cell 8: Load model in 4-bit and prepare for LoRA
from transformers import AutoModelForCausalLM
from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training

print("Loading model (4-bit) - this may take time and VRAM...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL,
    load_in_4bit=True,
    device_map="auto",
    trust_remote_code=True
)

# Prepare for k-bit training
model = prepare_model_for_kbit_training(model)

# LoRA config (safe defaults)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj","k_proj","v_proj","o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
print("Model + LoRA ready. Trainable params:", sum(p.numel() for p in model.parameters() if p.requires_grad))


Loading model (4-bit) - this may take time and VRAM...


config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

Model + LoRA ready. Trainable params: 4587520


In [None]:
# Cell 9: Training - Trainer + DataCollator
from transformers import Trainer, TrainingArguments, DataCollatorForSeq2Seq

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=PER_DEVICE_BATCH_SIZE,
    per_device_eval_batch_size=PER_DEVICE_BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    num_train_epochs=EPOCHS,
    learning_rate=LEARNING_RATE,
    fp16=True,
    logging_steps=LOGGING_STEPS,

    eval_steps=LOGGING_STEPS*2,
    save_steps=SAVE_STEPS,
    save_total_limit=3,
    report_to="none",  # disable wandb
)

data_collator = DataCollatorForSeq2Seq(tokenizer, pad_to_multiple_of=8, return_tensors="pt")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=valid_ds,
    data_collator=data_collator,
)

trainer.train()
# Save LoRA adapter only
model.save_pretrained(OUTPUT_DIR)
print("Saved LoRA adapter to", OUTPUT_DIR)


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
50,2.185
100,1.953
150,1.9176


Saved LoRA adapter to /content/drive/MyDrive/llama_finetune/lora_llama3_3b
