In [None]:
!pip install -q transformers accelerate bitsandbytes peft
from google.colab import drive
drive.mount('/content/drive')

import json

with open("/content/drive/MyDrive/Apziva/train_dataset.json", "r", encoding="utf-8") as f:
    train_dataset = json.load(f)

print(f"Loaded {len(train_dataset)} training samples")
print(train_dataset[:3])  # 查看前 3 个样本



[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive
Loaded 1225 training samples
[{'instruction': 'Compare two job titles for HR role relevance and decide which candidate is more fit.', 'input': 'A: Admissions Representative at Community medical center long beach\nB: Student', 'output': 'Student'}, {'instruction': 'Compare two job titles for HR role relevance and decide which candidate is more fit.', 'input': 'A: Admissions Representative at Community medical center long beach\nB: Native English Teacher at EPIK (English Program in Korea)', 'output': 'Admissions Representative at Community medical center long beach'}, {'instruction': 'Compare two job titles for HR role relevance and decide which candidate is more fit.', 'input': 'A: Admissions Representative at Community medical center long beach\nB: Always set them up for Success', 'output': 'Admissions Representative at Community medical

In [None]:
from huggingface_hub import login
login()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch

model_name = "Qwen/Qwen2.5-7B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/339 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/243 [00:00<?, ?B/s]

trainable params: 5,046,272 || all params: 7,620,662,784 || trainable%: 0.0662


In [None]:
from torch.utils.data import Dataset

class PairwiseDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=256):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        text = f"{item['instruction']}\n{item['input']}\nAnswer:"
        labels = item['output']

        input_enc = self.tokenizer(text, truncation=True, max_length=self.max_length, padding="max_length", return_tensors="pt")
        label_enc = self.tokenizer(labels, truncation=True, max_length=32, padding="max_length", return_tensors="pt")

        input_ids = input_enc["input_ids"].squeeze()
        attention_mask = input_enc["attention_mask"].squeeze()

        # labels: input set as -100，only train output token
        label_ids = -100 * torch.ones_like(input_ids)
        label_len = min(label_enc["input_ids"].size(1), len(label_ids))
        label_ids[-label_len:] = label_enc["input_ids"].squeeze()[:label_len]

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": label_ids
        }

train_data = PairwiseDataset(train_dataset, tokenizer)
print(train_data)

<__main__.PairwiseDataset object at 0x7ca258b58f50>


In [None]:
from transformers import Trainer, TrainingArguments, DataCollatorForSeq2Seq

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/Apziva/qwen_lora_pairwise",
    per_device_train_batch_size=1,      # small batch to avoid OOM
    gradient_accumulation_steps=4,     # set gradient accumulation
    learning_rate=2e-4,
    fp16=True,
    num_train_epochs=5,
    max_steps=100,
    logging_steps=5,
    save_steps=50,
    save_total_limit=2,
    remove_unused_columns=False,
)

data_collator = DataCollatorForSeq2Seq(tokenizer, padding=True, return_tensors="pt")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    data_collator=data_collator,
)

trainer.train()


  return fn(*args, **kwargs)


Step,Training Loss
5,3.13324
10,2.738192
15,1.787891
20,1.596558
25,1.421964
30,1.445645
35,1.527629
40,1.323158
45,1.389888
50,1.508538


  return fn(*args, **kwargs)


TrainOutput(global_step=100, training_loss=1.4704117155075074, metrics={'train_runtime': 632.4862, 'train_samples_per_second': 0.632, 'train_steps_per_second': 0.158, 'total_flos': 4347288826675200.0, 'train_loss': 1.4704117155075074, 'epoch': 0.32653061224489793})

In [None]:
trainer.save_model("/content/drive/MyDrive/Apziva/qwen_qlora_pairwise")
tokenizer.save_pretrained("/content/drive/MyDrive/Apziva/qwen_qlora_pairwise")
print("QLoRA model saved successfully!")

QLoRA model saved successfully!


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import torch

model_name = "Qwen/Qwen2.5-7B-Instruct"
lora_model_path = "/content/drive/MyDrive/Apziva/qwen_qlora_pairwise"

# 4-bit 量化配置
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,  # bfloat16 有些 GPU 不支持
    bnb_4bit_use_double_quant=True,
)

# tokenizer
tokenizer = AutoTokenizer.from_pretrained(lora_model_path)
tokenizer.pad_token = tokenizer.eos_token

# base model
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",        # 自动分配到 GPU/CPU
    trust_remote_code=True
)

# 加载训练好的 LoRA 权重
model = PeftModel.from_pretrained(base_model, lora_model_path)
model.eval()

# -------------------------------
# 推理示例
# -------------------------------
title_a = "Aspiring Human Resources Professional"
title_b = "Sales Associate"

prompt = f"Compare two job titles for HR role relevance:\nA: {title_a}\nB: {title_b}\nAnswer:"

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=20,
    do_sample=False,
    eos_token_id=tokenizer.eos_token_id
)

result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Model output:", result)


ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [None]:
import pandas as pd
df_path = "/content/drive/MyDrive/Apziva/ProjectC.csv"
df = pd.read_csv(df_path).drop_duplicates(subset=["job_title"])
print("Loaded df shape:", df.shape)
print(df.head())

def lora_pairwise_winner(title_a, title_b):

    prompt = (
        "Compare two job titles for HR role relevance.\n"
        f"A: {title_a}\n"
        f"B: {title_b}\n"
        "Answer with only 'A' or 'B'.\n"
        "Answer:"
    )
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=2,
        do_sample=False,   # key
        temperature=0.0,
        pad_token_id=tokenizer.eos_token_id
    )
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = decoded.split("Answer:")[-1].strip()
    if answer.startswith("A"):
        return title_a
    elif answer.startswith("B"):
        return title_b
    else:
        return "Tie"


def pairwise_sort(df, title_col="job_title"):
    titles = df[title_col].tolist()
    n = len(titles)
    scores = [0] * n
    pairwise_results = []

    for i in range(n):
        print(f"Processing {i+1}/{n} titles...")
        for j in range(i+1, n):
            winner = lora_pairwise_winner(titles[i], titles[j])

            pairwise_results.append({
                "A": titles[i],
                "B": titles[j],
                "winner": winner
            })

            if winner == titles[i]:
                scores[i] += 1
            elif winner == titles[j]:
                scores[j] += 1
            else:  # Tie
                scores[i] += 0.5
                scores[j] += 0.5

    sorted_df = pd.DataFrame({
        "job_title": titles,
        "score": scores
    }).sort_values(by="score", ascending=False).reset_index(drop=True)

    pairwise_df = pd.DataFrame(pairwise_results)

    return sorted_df, pairwise_df

sorted_df, pairwise_df = pairwise_sort(df, title_col="job_title")

print("Top 10 sorted job titles:")
print(sorted_df)


In [None]:
'''
Last step in project: build a rag system.
Rag stands for retrieval augmented generation. A smart technique
use llm in industry (company benefit from pretrained models) - if you want these models to have access to company's data, you have to modify the models

Another way we can do.
A pretrained model, we build the system with the data embedded in vector form, store it in the vector database.
Model: faiss


Models: Grok, can hardly train them. We can use RAG instead.

When we have time, we can fine tune the model.



My own readings and questions:
clawdbot
'''