In [1]:
import pandas as pd 
import torch
import torch.nn as nn
import transformers
import json
from datasets import load_dataset
import datasets

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging,Trainer
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os,torch
from trl import SFTTrainer



In [3]:
model_name = "mistralai/Mistral-7B-v0.1"

bnb_config = BitsAndBytesConfig(  
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)
model = AutoModelForCausalLM.from_pretrained(
        model_name,
        load_in_4bit=True,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
)
model.config.use_cache = False # silence the warnings
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.39s/it]


In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.padding_side = 'left'

tokenizer.add_eos_token = True
tokenizer.add_bos_token=True
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_bos_token, tokenizer.add_eos_token

(True, True)

In [3]:
def formatting_text(data):
    prompt = '''Below is an English context with a question, situational background information, and a response. Your task is to judge whether the response to the question is appropriate or not according to the situational background information. The question, situational background information and response are specified with [Question], [Background] and [Response] in the context respectively. If you think the response is appropriate, please say "Yes"; if you think the response is inappropriate, please say "No".'''

    background = "".join(data['s'])
    question = data['u']
    response = data['r']
    # label = "yes "if data['r.label'] ==1 else "no"
    text= f"{prompt} \n### Context:\n[Question]: {question} \n[Background]: {background} \n[Response]: {response}\n### Answer:"
    return text
def formatting_label(data):
    label = "yes"if data['r.label'] ==1 else "no"
    if label=="yes":
        label_text = f"{label.capitalize()}, the response is appropriate."
    else:
        label_text = f"{label.capitalize()}, the response is inappropriate."
    return label_text
def format_prompt(data):
    prompt = f"{data['text']}\n{data['label']}"
    return prompt
def read_json(mode):
    with open(f"dataset/{mode}.json", 'r') as file:
        data = json.load(file)
    data = pd.DataFrame(data)
    return data
train_data = read_json("train")
valid_data = read_json("val")
test_data = read_json("test")
train_data['text'] = train_data.apply(formatting_text,axis=1)
train_data['label'] = train_data.apply(formatting_label,axis=1)
train_data['prompt'] = train_data.apply(format_prompt,axis=1)
train_data = train_data[['text','label','prompt']]
valid_data['text'] = valid_data.apply(formatting_text,axis=1)
valid_data['label'] = valid_data.apply(formatting_label,axis=1)
valid_data['prompt'] = valid_data.apply(format_prompt,axis=1)
valid_data = valid_data[['text','label','prompt']]

In [9]:
print(train_data['prompt'][1],train_data['label'][1]) 

Below is an English context with a question, situational background information, and a response. Your task is to judge whether the response to the question is appropriate or not according to the situational background information. The question, situational background information and response are specified with [Question], [Background] and [Response] in the context respectively. If you think the response is appropriate, please say "Yes"; if you think the response is inappropriate, please say "No". 
### Context:
[Question]: Can you bring my bag? 
[Background]: [user] has a car.[user] has a cup.There is a place where [user] can put up an ad.There is coffee in the kitchen.[user] is home.[user] has a Canadian travel book.[user] is dressed up for work.[user]'s bag is at the front door.It's morning now.[user] has bags outside.[user]'s keys are in the bag.It is morning now. 
[Response]: Ok, would you like help with your bags?
### Answer:
No, the response is inappropriate. No, the response is i

In [11]:
def preprocess(data): #full prompt
    model_inputs = tokenizer(data['prompt'], padding="max_length", max_length=1150,truncation=True, return_tensors='pt')
    model_inputs["labels"] = model_inputs["input_ids"].clone()
    return model_inputs
# def preprocess(data):
#     model_inputs = tokenizer(data['text'],padding="max_length", max_length=1124,truncation=True, return_tensors='pt')

#     labels = tokenizer(data["label"], padding="max_length", max_length=20, truncation=True, return_tensors='pt')

#     model_inputs["labels"] = labels["input_ids"]
#     return model_inputs

new_train_data = datasets.Dataset.from_pandas(train_data,split='train')
new_valid_data = datasets.Dataset.from_pandas(valid_data,split='train')
tokenized_train_data = new_train_data.map(preprocess,remove_columns = new_train_data.column_names,batched=True)
tokenized_valid_data = new_valid_data.map(preprocess,remove_columns = new_valid_data.column_names,batched=True)

Map:   0%|          | 0/3696 [00:00<?, ? examples/s]

Map: 100%|██████████| 3696/3696 [00:00<00:00, 4428.94 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 4702.28 examples/s]


In [21]:
len(tokenized_train_data),len(tokenized_valid_data)

(3696, 792)

In [22]:
data_collator = transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)

In [23]:
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[ "q_proj",
        "k_proj",""
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",]
)
model = get_peft_model(model, peft_config)

In [24]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )
print_trainable_parameters(model)

trainable params: 85041152 || all params: 3837112320 || trainable%: 2.2162799758751914


In [25]:
from accelerate import FullyShardedDataParallelPlugin, Accelerator
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig

fsdp_plugin = FullyShardedDataParallelPlugin(
    state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
    optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),
)

accelerator = Accelerator(fsdp_plugin=fsdp_plugin)
model = accelerator.prepare_model(model)

In [26]:
training_arguments = TrainingArguments(
    output_dir="./modes/baseline_with_complete_prompt", #baseline_with_complete_prompt
    logging_dir="./logs/logs_baseline_with_complete_prompt",
    num_train_epochs=2,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=100,
    save_total_limit = 2,
    logging_steps=100,
    evaluation_strategy="steps",
    save_strategy="steps",
    learning_rate=2e-5,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    do_eval=True,
    lr_scheduler_type="constant",
    metric_for_best_model = "eval_loss",
    
)
trainer = Trainer(
    model=model,
    train_dataset=tokenized_train_data,
    eval_dataset = tokenized_valid_data,
    tokenizer=tokenizer,
    args=training_arguments,
    data_collator=data_collator,
)

In [27]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
100,0.8295,0.683165
200,0.6317,0.603795
300,0.5576,0.562482
400,0.5101,0.537568
500,0.4825,0.525927
600,0.4597,0.512168
700,0.4312,0.50477
800,0.4126,0.498978
900,0.3905,0.509928
1000,0.3332,0.52182




KeyboardInterrupt: 

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
base_model_id = "mistralai/Mistral-7B-v0.1"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,  # Mistral, same as before
    quantization_config=bnb_config,  # Same quantization config as before
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(base_model_id, add_bos_token=True, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
ft_model = PeftModel.from_pretrained(base_model, "./models/baseline_with_complete_prompt2/checkpoint-1800")


Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.39s/it]


In [7]:
import json
import pandas as pd
from datasets import load_dataset
import torch
def formatting_text(data):
    prompt = '''Below is an English context with a question, situational background information, and a response. Your task is to judge whether the response to the question is appropriate or not according to the situational background information. The question, situational background information and response are specified with [Question], [Background] and [Response] in the context respectively. If you think the response is appropriate, please say "Yes"; if you think the response is inappropriate, please say "No".'''

    background = "".join(data['s'])
    question = data['u']
    response = data['r']
    # label = "yes "if data['r.label'] ==1 else "no"
    text= f"{prompt} \n### Context:\n[Question]: {question} \n[Background]: {background} \n[Response]: {response}\n### Answer:"
    return text
def read_json(mode):
    with open(f"dataset/{mode}.json", 'r') as file:
        data = json.load(file)
    data = pd.DataFrame(data)
    return data

In [10]:
from peft import PeftModel
import re
from tqdm import tqdm, trange
test_data = read_json("test")
test_data['text'] = test_data.apply(formatting_text,axis=1)
# ft_model = PeftModel.from_pretrained(base_model, "baseline_with_complete_prompt/checkpoint-1000")
ft_model.eval()
predictions = []
with torch.no_grad():
    for i in trange(len(test_data)):
        eval_prompt = test_data['text'][i]
        model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
        with torch.no_grad():
            pred = tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=10, repetition_penalty=1.15)[0], skip_special_tokens=True)
            predictions.append(pred)
        # break
ans = []
for pred in predictions:
    pattern = r"### Answer:\s*(.*)"
    # Search for the pattern in the text
    match = re.search(pattern, pred)
    # Check if a match was found
    if match:
        # Extract the matched text
        found_text = match.group(1)
        ans.append(found_text)
    else:
        ans.append("No answer")
        # print("Found text:", found_text)
    # break

  0%|          | 0/792 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  0%|          | 1/792 [00:00<06:32,  2.01it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  0%|          | 2/792 [00:00<06:19,  2.08it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  0%|          | 3/792 [00:01<06:48,  1.93it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  1%|          | 4/792 [00:02<06:34,  2.00it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  1%|          | 5/792 [00:02<06:33,  2.00it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  1%|          | 6/792 [00:02<06:26,  2.04it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  1%|          | 7/792 [00:03<06:20,  2.06it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  1%|          | 8/792 [00:03<06:17,  2.08it/s]Setting `pad_token_id` to `eos_token_id`:

In [16]:
len([1 for a in ans if a.split(",")[0]=='Yes']),len([1 for a in ans if a.split(",")[0]=='No'])

(249, 543)

In [18]:
final_ans = [1 if a.split(",")[0]=='Yes' else 0 for a in ans]
pd.DataFrame({"response_quality":final_ans}).to_csv("submission.csv",index_label="index")