In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, pipeline
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model, AutoPeftModelForCausalLM
from trl import SFTTrainer
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
    PyTorch 2.1.2+cu121 with CUDA 1201 (you have 2.2.2+cu121)
    Python  3.9.18 (you have 3.9.16)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


In [2]:
tokenizer = AutoTokenizer.from_pretrained("./rg_output/checkpoint-24825/")
model = AutoModelForCausalLM.from_pretrained("./rg_output/checkpoint-24825/",torch_dtype=torch.float32)
model.to('cuda:0')

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.42s/it]


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): lora.Linear(
            (base_layer): Linear(in_features=4096, out_features=4096, bias=False)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.1, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=4096, out_features=64, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=64, out_features=4096, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
          )
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): lora.Linear(
            (base_layer): Linear(in_features=4096, out_features=4096, bias=False)
            (lora_dropout): Module

In [3]:
def form_func(sample, token_in, with_guideline):
    if 'agent_1' in sample['response']:
        p_in = 'agent_1'
        not_p_in = 'agent_2'
    else:
        p_in = 'agent_2'
        not_p_in = 'agent_1'

    mod_dial = sample['dialogue'].replace('person 1', 'agent_1')
    mod_dial = mod_dial.replace('person 2', 'agent_2')
    messages = [
        {
        "role":"system",
        "content": f"You are participating in the conversation. You are specifically {p_in}."
        }
    ]
    
    if with_guideline:
        messages.append({
        "role": "user",
        "content": f"Generate the next conversation turn for {p_in} responding to {not_p_in} in this conversation: {mod_dial} Limit the generated response to 1-2 sentences and compliant with this guideline: {sample['guideline']}"
        })
    else:
        messages.append({
        "role": "user",
        "content": f"Generate the next conversation turn for {p_in} responding to {not_p_in} in this conversation: {mod_dial} Limit the generated response to 1-2 sentences."
        })

    prompt = token_in.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    return prompt

In [4]:
data_files = {'train':'../data/lora_ft_train2_copy.csv', 
              'test':'../data/lora_ft_test2_copy.csv'}
dataset = load_dataset('csv', data_files=data_files, delimiter='\t', column_names=['dialogue','response','guideline'])

In [5]:
dataset['train'][1]

{'dialogue': 'agent_1:hi, do you like netflix?',
 'response': "agent_2:Hello!  I'm a big fan and have had it for years.  What about you?",
 'guideline': "agent_1 likes hobby. agent_2's response should fall into one of the following 3 topics: ['hobby', 'TV', 'Netflix'].",
 '__index_level_0__': 8610.0}

In [10]:
user_in = dataset['train'][1]['dialogue']
guideline = dataset['train'][1]['guideline']
current_person_predict = dataset['train'][1]['response']

llama_in = form_func({'dialogue':f'{user_in}', 'response':f'{current_person_predict}', 'guideline':f'{guideline}'}, 
                                         tokenizer, True)
blend_in_ids = tokenizer(llama_in, max_length=1024, return_tensors='pt', truncation=True).to('cuda:0')
blend_example = model.generate(blend_in_ids.input_ids, max_new_tokens=100, temperature=0.8, top_k=50, top_p = 0.85)
our_response = tokenizer.batch_decode(blend_example, skip_special_tokens=True)[0].split('[/INST]')[-1]
our_response = our_response.replace('agent_1:', '')
our_response = our_response.replace('agent_2:', '')
print(our_response)

 Hello!  I'm a big fan and have had it for years.  What about you? 


# llama 2

In [None]:
# os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
# print(os.environ)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token='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',torch_dtype=torch.float32)
model.to('cuda:0')

# Makes training faster but a little less accurate
model.config.pretraining_tp = 1

# setting padding instructions for tokenizer
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

In [None]:
def form_func(sample):
    if 'agent_1' in sample['response']:
        p_in = 'agent_1'
        not_p_in = 'agent_2'
    else:
        p_in = 'agent_2'
        not_p_in = 'agent_1'

    messages = [
        {
        "role":"system",
        "content": f"You are participating in the conversation. You are specifically {p_in}."
        },
        {
        "role": "user",
        "content": f"Generate the next conversation turn for {p_in} responding to {not_p_in} in this conversation: {sample['dialogue']} Limit the generated response to 1-2 sentences."
        },
        {
        "role": "system",
        "content": f"{p_in}:{sample['response']}"
        }
    ]

    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
    return prompt

In [None]:
# Create the trainer
trainingArgs = TrainingArguments(
    output_dir=f'rg_output',
    num_train_epochs=5,
    per_device_train_batch_size=1,
    save_strategy="epoch",
    learning_rate=2e-3
)

peft_config = LoraConfig(
      lora_alpha=16,
      lora_dropout=0.1,
      r=64,
      bias="none",
      task_type="CAUSAL_LM",
)


trainer = SFTTrainer(
    model=model,
    train_dataset=dataset[f'train'],
    eval_dataset = dataset[f'test'],
    peft_config=peft_config,
    tokenizer=tokenizer,
    packing=True,
    formatting_func=form_func,
    args=trainingArgs,
)

In [None]:
trainer.train()

# vicuna

In [None]:
tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-7b-v1.5")
model = AutoModelForCausalLM.from_pretrained("lmsys/vicuna-7b-v1.5",torch_dtype=torch.float32)
model.to('cuda:0')

# Makes training faster but a little less accurate
model.config.pretraining_tp = 1

# setting padding instructions for tokenizer
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

In [None]:
data_files = {'train':'../data/lora_ft_train.csv', 
              'test':'../data/lora_ft_test.csv'}
dataset = load_dataset('csv', data_files=data_files, delimiter='\t', column_names=['dialogue', 'response'])

In [None]:
def form_func(sample):
    prompt = sample['dialogue'] + '\n' + sample['response']
    prompt = prompt.replace('agent_1', 'person 1')
    prompt = prompt.replace('agent_2', 'person 2')
    return prompt
    

In [None]:
# Create the trainer
trainingArgs = TrainingArguments(
    output_dir=f'rg_output_vicuna',
    num_train_epochs=5,
    per_device_train_batch_size=1,
    save_strategy="epoch",
    learning_rate=2e-3
)

peft_config = LoraConfig(
      lora_alpha=16,
      lora_dropout=0.1,
      r=64,
      bias="none",
      task_type="CAUSAL_LM",
)


trainer = SFTTrainer(
    model=model,
    train_dataset=dataset[f'train'],
    eval_dataset = dataset[f'test'],
    peft_config=peft_config,
    tokenizer=tokenizer,
    packing=True,
    formatting_func=form_func,
    args=trainingArgs,
)

In [None]:
trainer.train()