In [None]:
from time import sleep
from typing import Dict, Optional, List

import torch
from datasets import load_dataset
from peft import LoraConfig, AutoPeftModelForCausalLM
from transformers import TrainingArguments, BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
from trl import DPOTrainer, SFTTrainer

from lightning.pytorch import Trainer
from lightning.pytorch.loggers import TensorBoardLogger

## Loading and preparing a dataset 
We are using a Huggingface hosted dataset consisting of Stackoverflow questions

In [None]:
folder = "llm-finetune/"   # When running in lightning root

In [None]:
# folder = ""   # when running from llm-finetune folder

In [None]:
# Dataset ARGS:

# ds_name = "MaestroDmitry/stack-exchange-paired-shorted"

# https://huggingface.co/datasets/truthful_qa
ds_name = "truthful_qa"

In [None]:
# Huggingface DPO trainer needs a dataset containing prompts, chosen, and rejected

def return_prompts_and_responses(sample: Dict[str, List[str]|str], index) -> Dict[str, str]:
    # prompts = [f"Question: {question} \n\nAnswer: " for question in sample["question"]]
    # prompt = f"Question: {sample["question"]} \n\nAnswer: "
    prompt = sample["question"]

    chosen = sample["correct_answers"][min(index, len(sample["correct_answers"])-1)]  # response_j
    rejected = sample["incorrect_answers"][min(index, len(sample["incorrect_answers"])-1)]  # response_k

    return {
        'prompt': prompt,
        'chosen': chosen,
        'rejected': rejected
    }


In [None]:
# Loading the dataset from Huggingface
dataset = load_dataset(
    ds_name, "generation",
    cache_dir=folder+"data"
)
dataset = dataset['validation']

train_dataset = dataset.map(
    function=return_prompts_and_responses,
    batched=False,
    with_indices=False,
    remove_columns=dataset.column_names,
    fn_kwargs={"index": 0}
)
print("Train Dataset:")
print(train_dataset)
print(train_dataset[0])

test_dataset = dataset.map(
    function=return_prompts_and_responses,
    batched=False,
    with_indices=False,
    remove_columns=dataset.column_names,
    fn_kwargs={"index": 1}
)
print("Test Dataset:")
print(test_dataset)
print(test_dataset[0])

eval_dataset = dataset.map(
    function=return_prompts_and_responses,
    batched=False,
    with_indices=False,
    remove_columns=dataset.column_names,
    fn_kwargs={"index": 2}
)
print("Eval Dataset:")
print(eval_dataset)
print(eval_dataset[0])



## Loading a SFT base model

In [79]:
#model_path = "EleutherAI/gpt-neo-1.3B"
#lora_params = ["q_proj", "v_proj"]
#batch_size = 2
#m_name = "neo-1.3B"

model_path = "EleutherAI/gpt-neo-125m"
lora_params = ["q_proj", "v_proj"]
batch_size = 12
m_name = "neo-125m"

# model_path = "ComCom/gpt2-small"
# lora_params = ["c_proj"]

tokenizer = AutoTokenizer.from_pretrained(model_path)

# load the base model in 4-bit quantization
# TODO this only works on cuda
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=bnb_config,
    device_map='auto',  # {"": 0},
    trust_remote_code=True,
    # use_auth_token=True,
    cache_dir=folder+"/model/"+m_name+"/base"
)

base_model.config.use_cache = False


print(tokenizer.eos_token_id)
print(tokenizer.decode(tokenizer.eos_token_id))

#if tokenizer.pad_token is None:
#    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
#    base_model.resize_token_embeddings(len(tokenizer))

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(tokenizer.pad_token)

print(base_model)

50256
<|endoftext|>
<|endoftext|>
GPTNeoForCausalLM(
  (transformer): GPTNeoModel(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(2048, 768)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPTNeoBlock(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPTNeoAttention(
          (attention): GPTNeoSelfAttention(
            (attn_dropout): Dropout(p=0.0, inplace=False)
            (resid_dropout): Dropout(p=0.0, inplace=False)
            (k_proj): Linear4bit(in_features=768, out_features=768, bias=False)
            (v_proj): Linear4bit(in_features=768, out_features=768, bias=False)
            (q_proj): Linear4bit(in_features=768, out_features=768, bias=False)
            (out_proj): Linear4bit(in_features=768, out_features=768, bias=True)
          )
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPTNeoMLP(
          (c_fc): Linear4bit(in_features=768, out_feat

In [75]:
def qa_string(q:str, a:str):
    return f"### Question: {q}\n ### Answer: {a}"

def sft_formatting_func(example):
    # method used for turning both good and bad example to supervised training data
    output_texts = []
    output_texts.append(qa_string(example['prompt'], example['chosen']))
    output_texts.append(qa_string(example['prompt'], example['rejected']))
    return output_texts

In [76]:
# Define list of examples
def print_some_samples(model):
    text_list = [
        "what is the sun?",
        "whats the difference between bash and zash?",
        "How old i the city of Gothenburg?",
        "When will i get to go home?"
    ]
    
    print("model outputs:")
    print("----------------------------")
    for text in text_list:
        # Tokenize text
        text_ = qa_string(text, "")
        inputs = tokenizer.encode(text, return_tensors="pt")
        inputs = inputs.to('cuda')
        
        # Run model
        outputs = model.generate(input_ids=inputs, max_length=200, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id, early_stopping=True, num_beams=1)  #  do_sample=True
        decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
        #f"For question: \n"
        # f"{text} \n\n"
        print(f"{decoded_output} \n\n")

In [77]:
print_some_samples(base_model)

model outputs:
----------------------------




what is the sun?

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The sun is a star.

The 


whats the difference between bash and zash?

A:

zsh is a shell, bash is a shell interpreter.
zsh is a shell, bash is a shell interpreter.

A:

zsh is a shell, bash is a shell interpreter.

A:

zsh is a shell, bash is a shell interpreter.

A:

zsh is a shell, bash is a shell interpreter.

A:

zsh is a shell, bash is a shell interpreter.

A:

zsh is a shell, bash is a shell interpreter.

A:

zsh is a shell, bash is a shell interpreter.

A:

zsh is a shell, bash is a shell interpreter.

A:

z

In [None]:
#
# Lora args:
lora_r = 8
lora_alpha = 8
lora_dropout = 0.0

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    target_modules=lora_params,  # related to choise of model, so model path / printed model
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
sft_training_args: TrainingArguments = TrainingArguments(
    output_dir=folder+"/model/"+m_name+"/sft_train",
    num_train_epochs=500,
    # logging_steps = 20,
    # save_steps=10,  # defaults to 500
    # use_cpu=True,
    per_device_train_batch_size=batch_size,
    per_gpu_eval_batch_size=batch_size,
    logging_dir=folder+"logs/sft_train"
)

trainer = SFTTrainer(
    model=base_model,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    peft_config=peft_config,
    packing=True,  # Used only in case `dataset_text_field` is passed. This argument is used by the `ConstantLengthDataset` to pack the sequences of the dataset.
    max_seq_length=512,  # The maximum sequence length to use for the `ConstantLengthDataset` and for automatically creating the Dataset. Defaults to `512`.
    formatting_func=sft_formatting_func,
    tokenizer=tokenizer,
    args=sft_training_args,  # HF Trainer arguments

)

In [None]:
trainer.train()

# With gpt-neo-125 and qa dataset
# on single A10-G = 25 min


# These are old examples: when i was experimenting with the Stackoverflow dataset
# ONLY RUNNING 3 EPOCHS!!

# For: gpt-neo-1.3B
# On single A10-G = 15h
# On dual rtx 3090 = 8h

# For: gpt-neo-125m
# on single A10-G = TODO
# on dual rtx 3090 = 2h

In [80]:
sft_model_path = folder+"/model/"+m_name+"/sft_train/checkpoint-4000"  # <- TODO
sft_model = AutoModelForCausalLM.from_pretrained(
    sft_model_path,
    # quantization_config=bnb_config,
    device_map='auto',  # {"": 0},
)
print(sft_model)

GPTNeoForCausalLM(
  (transformer): GPTNeoModel(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(2048, 768)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPTNeoBlock(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPTNeoAttention(
          (attention): GPTNeoSelfAttention(
            (attn_dropout): Dropout(p=0.0, inplace=False)
            (resid_dropout): Dropout(p=0.0, inplace=False)
            (k_proj): Linear(in_features=768, out_features=768, bias=False)
            (v_proj): lora.Linear(
              (base_layer): Linear(in_features=768, out_features=768, bias=False)
              (lora_dropout): ModuleDict(
                (default): Identity()
              )
              (lora_A): ModuleDict(
                (default): Linear(in_features=768, out_features=8, bias=False)
              )
              (lora_B): ModuleDict(
                (default): Linear(in_features=8, out_features=768, bias

In [81]:
print_some_samples(sft_model)

model outputs:
----------------------------
what is the sun?
- 0.00003
 






whats the difference between bash and zash?
<jrib> zash: yes, but you can't say that bash is better
<jrib> zash: but you can say that zash is better
<jrib> zash: but you can say that bash is better
<jrib> zash: but you can say that bash is better
<jrib> zash: but you can say that bash is better
<jrib> zash: but you can say that bash is better
<jrib> zash: but you can say that bash is better
<jrib> zash: but you can say that bash is better
<jrib> zash: but you can say that bash is better
<jrib> zash: but you can say that bash is better
<jrib> zash: but you can say that bash is better
<jrib> zash: but you 


How old i the city of Gothenburg?

I am a city of Gothenburg, Sweden. I live in Gothenburg, Sweden. I have a house in Gothenburg, Sweden. I have a car in Gothenburg, Sweden. I have a house in Gothenburg, Sweden. I have a car in Gothenburg, Sweden. I have a house in Gothenburg, Sweden. I have a house in Gothenburg, Sweden. I have a house in Gothenburg, Sweden. I have a house in Gothen

## DPO Training

In [None]:
dpo_beta: float = 0.1
dpo_training_args: Optional[TrainingArguments] = TrainingArguments(
    output_dir=folder+"/model/"+m_name+"/dpo_train",
    # use_cpu=True,
    per_device_train_batch_size=batch_size,
    per_gpu_eval_batch_size=batch_size,
    remove_unused_columns=False,
    num_train_epochs=500,
    # logging_steps = 200,
)

dpo_model = folder+"/model/"+m_name+"/sft_train/checkpoint-4000"  # <- TODO
model = AutoPeftModelForCausalLM.from_pretrained(
    dpo_model,  # location of saved SFT model
    device_map='auto',  # {"": 0},
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    quantization_config=bnb_config,
    is_trainable=True,
)

dpo_model_ref = folder+"/model/"+m_name+"/sft_train/checkpoint-4000"  # <- TODO
model_ref = AutoPeftModelForCausalLM.from_pretrained(
    dpo_model_ref,  # same model as the main one
    device_map='auto',  # {"": 0},
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    quantization_config=bnb_config,
)


dpo_trainer = DPOTrainer(
    model,
    model_ref,
    args=dpo_training_args,
    beta=dpo_beta,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    max_length=512,
    max_prompt_length=256,
)



In [None]:
dpo_trainer.train()
# dpo_trainer.save_model()

# With gpt-neo-125 and qa dataset
# on single A10-G = 50 min


In [82]:
dpo_model_path = folder+"/model/"+m_name+"/dpo_train/checkpoint-500"  # <- TODO
dpo_model = AutoModelForCausalLM.from_pretrained(
    dpo_model_path,
    # quantization_config=bnb_config,
    device_map='auto'  # {"": 0},
)
# print(dpo_model)

In [83]:
print_some_samples(dpo_model)

model outputs:
----------------------------




what is the sun?
True No.
# The sun is not a planet?
# question#
 No# The sun is not a planet?
# The question is not the sun?
# The only planet in the solar system that has no moon is not a planet?
# The only planet in the solar system that has no moon is not a planet?
# The only planet in the solar system that has no moon is not a planet?
# The only planet in the solar system that has no moon is not a planet?
# The only planet in the solar system that has no moon is not a planet?
# The only planet in the solar system that has no moon is not a planet?
# The only planet in the solar system that has no moon is not a planet?
# The only planet in the solar system that has no moon is not a planet?
# The only planet in the solar system that has no moon is not 


whats the difference between bash and zash?
True No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No N