In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling, BitsAndBytesConfig, pipeline, AutoModelForQuestionAnswering
import pandas as pd
import torch
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Finetune Llama 3.2 with 1B params

load in the base model

In [2]:
quantization_config=BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)

peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="question-answering"
)

In [3]:
checkpoint = "meta-llama/Llama-3.2-1B"


left_model = AutoModelForQuestionAnswering.from_pretrained(
    checkpoint,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    quantization_config=quantization_config
)

right_model = AutoModelForQuestionAnswering.from_pretrained(
    checkpoint,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    quantization_config=quantization_config
)

left_model = get_peft_model(left_model, peft_config)
right_model = get_peft_model(right_model, peft_config)

tokenizer = AutoTokenizer.from_pretrained(checkpoint)

`low_cpu_mem_usage` was None, now set to True since model is quantized.
Some weights of LlamaForQuestionAnswering were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['embed_tokens.weight', 'layers.0.input_layernorm.weight', 'layers.0.mlp.down_proj.weight', 'layers.0.mlp.gate_proj.weight', 'layers.0.mlp.up_proj.weight', 'layers.0.post_attention_layernorm.weight', 'layers.0.self_attn.k_proj.weight', 'layers.0.self_attn.o_proj.weight', 'layers.0.self_attn.q_proj.weight', 'layers.0.self_attn.v_proj.weight', 'layers.1.input_layernorm.weight', 'layers.1.mlp.down_proj.weight', 'layers.1.mlp.gate_proj.weight', 'layers.1.mlp.up_proj.weight', 'layers.1.post_attention_layernorm.weight', 'layers.1.self_attn.k_proj.weight', 'layers.1.self_attn.o_proj.weight', 'layers.1.self_attn.q_proj.weight', 'layers.1.self_attn.v_proj.weight', 'layers.10.input_layernorm.weight', 'layers.10.mlp.down_proj.weight', 'layers.10.mlp.gate_proj.weight', 'layers.10.mlp.up

load in the combined dataset and convert it to text

In [4]:
left_dataset = pd.read_csv("data/combined_left.csv")
right_dataset = pd.read_csv("data/combined_right.csv")

left_dataset = left_dataset["text"]
right_dataset = right_dataset["text"]

with open("data/left.txt", "w") as f:
    for text in left_dataset:
        f.write(text + "\n")
        
with open("data/right.txt", "w") as f:
    for text in right_dataset:
        f.write(text + "\n")

In [5]:
left_dataset = load_dataset("text", data_files="data/left.txt")
right_dataset = load_dataset("text", data_files="data/right.txt")

left_dataset = left_dataset["train"]
right_dataset = right_dataset["train"]

left_dataset = left_dataset.train_test_split(test_size=0.1)
right_dataset = right_dataset.train_test_split(test_size=0.1)

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Fine-tune the model on the left and right context

In [6]:
def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

tokenizer.pad_token = tokenizer.eos_token

left_dataset = left_dataset.map(tokenize, batched=True, batch_size=4)
right_dataset = right_dataset.map(tokenize, batched=True, batch_size=4)

Map:   0%|          | 0/1286 [00:00<?, ? examples/s]

Map:   0%|          | 0/143 [00:00<?, ? examples/s]

Map:   0%|          | 0/1285 [00:00<?, ? examples/s]

Map:   0%|          | 0/143 [00:00<?, ? examples/s]

In [7]:
LR = 5e-5
EPOCHS = 3
BATCH_SIZE = 2
WEIGHT_DECAY = 0.01


left_args = TrainingArguments(
    output_dir="models/Llama-3.2-1B-left",
    eval_strategy="epoch",
    learning_rate=LR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    weight_decay=WEIGHT_DECAY,
    fp16=True,
)

right_args = TrainingArguments(
    output_dir="models/Llama-3.2-1B-right",
    eval_strategy="epoch",
    learning_rate=LR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    weight_decay=WEIGHT_DECAY,
    fp16=True,
)

In [8]:
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

left_trainer = Trainer(
    model=left_model,
    args=left_args,
    data_collator=data_collator,
    train_dataset=left_dataset["train"],
    eval_dataset=left_dataset["test"]
)

right_trainer = Trainer(
    model=right_model,
    args=right_args,
    data_collator=data_collator,
    train_dataset=right_dataset["train"],
    eval_dataset=right_dataset["test"]
)

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


In [9]:
left_trainer.train()
left_model.save_pretrained("models/Llama-3.2-1B-left")
tokenizer.save_pretrained("models/Llama-3.2-1B-left")

  0%|          | 0/1929 [00:00<?, ?it/s]

TypeError: LlamaForQuestionAnswering.forward() got an unexpected keyword argument 'labels'

In [None]:
right_trainer.train()
right_model.save_pretrained("models/Llama-3.2-1B-right")
tokenizer.save_pretrained("models/Llama-3.2-1B-right")

  0%|          | 0/1929 [00:00<?, ?it/s]

{'loss': 3.2251, 'grad_norm': 5.1984453201293945, 'learning_rate': 3.706583722135822e-05, 'epoch': 0.78}


  0%|          | 0/72 [00:00<?, ?it/s]

{'eval_runtime': 2.8752, 'eval_samples_per_second': 49.736, 'eval_steps_per_second': 25.042, 'epoch': 1.0}
{'loss': 3.053, 'grad_norm': 8.332867622375488, 'learning_rate': 2.4105754276827372e-05, 'epoch': 1.56}


  0%|          | 0/72 [00:00<?, ?it/s]

{'eval_runtime': 2.8909, 'eval_samples_per_second': 49.466, 'eval_steps_per_second': 24.906, 'epoch': 2.0}
{'loss': 2.9613, 'grad_norm': 8.098320007324219, 'learning_rate': 1.1145671332296527e-05, 'epoch': 2.33}


  0%|          | 0/72 [00:00<?, ?it/s]

{'eval_runtime': 2.9293, 'eval_samples_per_second': 48.817, 'eval_steps_per_second': 24.579, 'epoch': 3.0}
{'train_runtime': 173.7421, 'train_samples_per_second': 22.188, 'train_steps_per_second': 11.103, 'train_loss': 3.0475286362145217, 'epoch': 3.0}


('models/Llama-3.2-1B-right\\tokenizer_config.json',
 'models/Llama-3.2-1B-right\\special_tokens_map.json',
 'models/Llama-3.2-1B-right\\tokenizer.json')

In [None]:
left_pipeline = pipeline("question-answering", model="models/Llama-3.2-1B-left", tokenizer="models/Llama-3.2-1B-left")
right_pipeline = pipeline("question-answering", model="models/Llama-3.2-1B-right", tokenizer="models/Llama-3.2-1B-right")

Some weights of LlamaForQuestionAnswering were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['embed_tokens.weight', 'layers.0.input_layernorm.weight', 'layers.0.mlp.down_proj.weight', 'layers.0.mlp.gate_proj.weight', 'layers.0.mlp.up_proj.weight', 'layers.0.post_attention_layernorm.weight', 'layers.0.self_attn.k_proj.weight', 'layers.0.self_attn.o_proj.weight', 'layers.0.self_attn.q_proj.weight', 'layers.0.self_attn.v_proj.weight', 'layers.1.input_layernorm.weight', 'layers.1.mlp.down_proj.weight', 'layers.1.mlp.gate_proj.weight', 'layers.1.mlp.up_proj.weight', 'layers.1.post_attention_layernorm.weight', 'layers.1.self_attn.k_proj.weight', 'layers.1.self_attn.o_proj.weight', 'layers.1.self_attn.q_proj.weight', 'layers.1.self_attn.v_proj.weight', 'layers.10.input_layernorm.weight', 'layers.10.mlp.down_proj.weight', 'layers.10.mlp.gate_proj.weight', 'layers.10.mlp.up_proj.weight', 'layers.10.post_attention_layernorm.weight', 'layers.10.s

In [None]:
topic = "gun control"
context = "I am a moderator in a political debate and you are a candidate. You will resopond to my questions about your stance on issues and policies. You will try to convince me and the audience that your stance is the best one."
question = f"Q: What is your stance on {topic}?"

right_answer = right_pipeline(question=question, context=context, temperature=0.7, max_length=512, truncation=True)
left_answer = left_pipeline(question=question, context=context, temperature=0.7, max_length=512, truncation=True)

print(right_answer)

print()

print(left_answer)

{'score': 0.006474463269114494, 'start': 97, 'end': 139, 'answer': ' about your stance on issues and policies.'}

{'score': 0.013316722586750984, 'start': 164, 'end': 184, 'answer': ' me and the audience'}
