In [17]:
import pandas as pd
import torch
import datasets
from datasets import Dataset, DatasetDict, load_dataset
from trl import KTOConfig, KTOTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer

In [13]:
train_dataset = load_dataset("trl-lib/kto-mix-14k", split="train")

In [19]:
train_dataset

Dataset({
    features: ['prompt', 'completion', 'label'],
    num_rows: 13500
})

In [3]:
cache_dir = "../assets/pretrained-models"
model_path = "meta-llama/Llama-3.2-1B-Instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_path, cache_dir=cache_dir, use_safetensors=True
)

tokenizer = AutoTokenizer.from_pretrained(
    model_path, cache_dir=cache_dir, use_safetensors=True, padding_side="left"
)

tokenizer.pad_token = tokenizer.eos_token

In [4]:
messages = [
    {"role": "system", "content": "You are an AI agent used to determine whether or not a sentence is a factual claim. Only respond with Yes or No",},
    {"role": "user", "content": "Is the following sentence a factual claim? __SENTENCE__"},
    {"role": "assistant", "content": ""}
]

In [5]:
# https://hippocampus-garden.com/tiny_llama_kto_lora/
# https://huggingface.co/docs/trl/v0.8.1/en/kto_trainer
Claimbuster = pd.read_json('../data/Claimbuster/train.json')

In [29]:
# KTOTrainer expects a training dataset with prompt, completion, and label
# PROMPT FORMAT: [ { "content": "Q:Question: how old julio cesar chavez when he fought de la hoya I found the following answer on Google: He holds records for most successful consecutive defenses of world titles (27), most title fights (37), most title-fight victories (31) and he is after Joe Louis with (23) for most title defenses won by knockout (21). Is that a correct answer? Yes or no.\nA:", "role": "user" } ]
# COMPLETION FORMAT: [ { "content": "The best answer for the entity related to 'James G. Roudebush' with the relationship of 'occupation' is: surgeon.", "role": "assistant" } ]
train_dataset = Claimbuster.copy(deep=True)
train_dataset['completion'] = train_dataset['label'].apply(lambda x: [{"role": "assistant", "content": "Yes"}] if x == 1 else [{"role": "assistant", "content": "No"}])
train_dataset['prompt'] = train_dataset['text'].apply(lambda x: [{"role": "user", "content": f"Is the following sentence a factual claim? {x}"}])
train_dataset['label'] = True
train_dataset = train_dataset.filter(items=['prompt','completion','label']).head(2)
train_dataset.head()

Unnamed: 0,prompt,completion,label
0,"[{'role': 'user', 'content': 'Is the following...","[{'role': 'assistant', 'content': 'Yes'}]",True
1,"[{'role': 'user', 'content': 'Is the following...","[{'role': 'assistant', 'content': 'Yes'}]",True


In [31]:
td = Dataset.from_pandas(train_dataset)
td

Dataset({
    features: ['prompt', 'completion', 'label'],
    num_rows: 2
})

In [32]:
training_args = KTOConfig(output_dir="..assets/finetuned-models/Llama-3.2-1B-Instruct-KTO", logging_steps=10)
trainer = KTOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=td)
trainer.train()

Extracting prompt from train dataset: 100%|██████████| 2/2 [00:00<00:00, 230.49 examples/s]
Applying chat template to train dataset: 100%|██████████| 2/2 [00:00<00:00, 57.56 examples/s]
Tokenizing train dataset: 100%|██████████| 2/2 [00:00<00:00, 216.45 examples/s]
Processing tokenized train dataset: 100%|██████████| 2/2 [00:00<00:00, 253.72 examples/s]
Extracting KL train dataset: 100%|██████████| 2/2 [00:00<00:00, 328.99 examples/s]
Processing tokenized train KL dataset: 100%|██████████| 2/2 [00:00<00:00, 225.51 examples/s]


Step,Training Loss


TrainOutput(global_step=3, training_loss=0.5, metrics={'train_runtime': 271.1789, 'train_samples_per_second': 0.022, 'train_steps_per_second': 0.011, 'total_flos': 0.0, 'train_loss': 0.5, 'epoch': 3.0})

In [34]:
# How to load a pre-trained model

adapter_path = "..assets/finetuned-models/Llama-3.2-1B-Instruct-KTO"
model_trained = AutoModelForCausalLM.from_pretrained(
    adapter_path, use_safetensors=True
)

messages = [
    {"role": "system", "content": "You are a yes/no answering bot. Only respond to questions with Yes or No",},
    {"role": "user", "content": "Is the capital of New York state New York City?"},
    {"role": "assistant", "content": ""}
]
chat_template_input_ids = tokenizer.apply_chat_template(messages, tokenize=True, continue_final_message=True, add_generation_prompt=False, return_tensors="pt")
chat_template_input_ids = chat_template_input_ids[0, :-1].reshape(1,-1)

print(tokenizer.batch_decode(model_trained.generate(chat_template_input_ids, max_new_tokens = 10))[0])

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 06 Apr 2025

You are a yes/no answering bot. Only respond to questions with Yes or No<|eot_id|><|start_header_id|>user<|end_header_id|>

Is the capital of New York state New York City?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

No<|eot_id|>
