In [1]:
from src.dataset.feedback_utils_v2 import Feedback
from src.dataset.format_v2 import to_dpo, to_sft, to_full, to_distill_sft
import json

feedback = Feedback(content = "Do not talk about elephant")
# sft_dataset = to_sft(feedback)
dataset = to_distill_sft(feedback)

Loaded 201 prompts
Loaded 201 search infos


In [4]:
from peft import LoraConfig, PeftModel
from datasets import Dataset, concatenate_datasets
from trl import DPOTrainer, SFTTrainer, DataCollatorForCompletionOnlyLM

# Once Again, I feel the possibility of intense simplification: 
# LLM predicts an entire vector | not a single token
# Supervision with a one-hot vector is less effective and less efficient for the model
# Distillation loss makes more sense and is more effective, as per experiment result from this work


# Why don't we few-shot prompt the model, and then fine-tune it with distillation loss?
# The model will learn to generate the entire vector, not just a single token
# -- Note that this is a specific case for our steering adaptation equation (!)

# Case 1: Loss(pred, one-hot(target))
# Case 2: Loss(pred, pred(one-shot(target)))
# We use distillation loss to mimic the representation, and not the token itself | Different model has different understanding of the new token combination | Adaptive training makes more sense here



# Load model directly
from src.sft_distill import SelfDistillTrainer
from transformers import AutoTokenizer, AutoModelForCausalLM
from src.utils import find_all_linear_names, TrainingArguments, PeftSavingCallback
from transformers import HfArgumentParser

tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")


In [11]:
training_args = TrainingArguments(output_dir = "")

peft_config = LoraConfig(
    r=training_args.lora_r, 
    lora_alpha=training_args.lora_alpha, 
    target_modules = find_all_linear_names(model.model, training_args.lora_exclude),
    lora_dropout=training_args.lora_dropout, 
    bias=training_args.lora_bias,
    task_type="CAUSAL_LM"
)

tokenizer.padding_side = 'left'
response_template = "[/INST]"

training_args.packing = True

In [12]:
from src.sft_distill import SelfDistillTrainer
from trl import DataCollatorForCompletionOnlyLM


tokenizer.padding_side = 'right'
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)
trainer = SFTTrainer(
    model=model.model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    tokenizer=tokenizer,
    data_collator=collator,
    max_seq_length=2048,
    peft_config=peft_config,
    callbacks=[PeftSavingCallback] if training_args.lora_enable else None
)

ValueError: Target module Dropout(p=0.05, inplace=False) is not supported. Currently, only the following modules are supported: `torch.nn.Linear`, `torch.nn.Embedding`, `torch.nn.Conv2d`, `transformers.pytorch_utils.Conv1D`.

In [10]:
training_args

