In [None]:
# Install Dependencies
%pip install -q -U bitsandbytes wandb datasets sentence_transformers faiss-gpu
%pip install -q -U git+https://github.com/huggingface/transformers.git
%pip install -q -U git+https://github.com/huggingface/peft.git
%pip install -q -U git+https://github.com/huggingface/accelerate.git
%pip install -q -U git+https://github.com/huggingface/trl.git

# Dependencies and Environment setting

In [1]:
import torch
import transformers
from peft import LoraConfig, get_peft_model, PeftModel

from datasets import load_dataset, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from trl import SFTTrainer, SFTConfig, DataCollatorForCompletionOnlyLM
from tqdm import tqdm
import os
import gc
import re
import json
from random import sample


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = '2'
# Check if a GPU is available
device = "cuda:0" if torch.cuda.is_available() else 'cpu'
print(torch.cuda.device_count()) 
# Set the device to given device
device = torch.device(device)


1


In [10]:
from huggingface_hub import notebook_login
from getpass import getpass
os.environ["HF_TOKEN"] = getpass("Enter your Hugging Face token: ")


# 1. Finetune the llama2 7B model on the Guanaco dataset


In [3]:
# Load LLaMA2-7B weight and LoRA config
model_name = "meta-llama/Llama-2-7b-hf"
if 'model' in locals():
    del model
    gc.collect()
    torch.cuda.empty_cache()
if 'tokenizer' in locals():
    del tokenizer
    gc.collect()
if 'lora_config' in locals():
    del lora_config
    gc.collect()
    torch.cuda.empty_cache()
gc.collect()
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name)
EOS_token = tokenizer.eos_token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.unk_token
    #see https://github.com/meta-llama/llama-cookbook/blob/main/src/llama_recipes/utils/train_utils.py#L28C3-L28C3
    # or see https://fancyerii.github.io/2024/01/04/padding/
print(f"the tokenizer's pad token is {tokenizer.pad_token}")
lora_config = LoraConfig(
    r=8,  # LoRA 的秩
    lora_alpha=32,  # LoRA 的缩放因子
    target_modules=["q_proj", "v_proj"],  # 目标模块
    lora_dropout=0.05,  # Dropout 概率
    bias="none",  # 是否添加偏置
    task_type="CAUSAL_LM"  # 任务类型
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)
print(model.hf_device_map)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

the tokenizer's pad token is <unk>
{'': 0}


## 1.1. Load and Preprocess the data

In [5]:
#最难的难点在这，解决方法：https://huggingface.co/docs/trl/en/sft_trainer#using-tokenids-directly-for-responsetemplate

def print_tokens_with_ids(txt):
    tokens = tokenizer.tokenize(txt, add_special_tokens=False)
    token_ids = tokenizer.encode(txt, add_special_tokens=False)
    print(list(zip(tokens, token_ids)))

prompt = """### Assistant: Hi, how can I help you? ### Human: Hello\n\n """
print_tokens_with_ids(prompt)  # [..., ('▁Hello', 15043), ('<0x0A>', 13), ('<0x0A>', 13), ('##', 2277), ('#', 29937), ('▁Ass', 4007), ('istant', 22137), (':', 29901), ...]

response_template = "### Human:"
print_tokens_with_ids(response_template)  # [('▁###', 835), ('▁Ass', 4007), ('istant', 22137), (':', 29901)]


[('▁###', 835), ('▁Ass', 4007), ('istant', 22137), (':', 29901), ('▁Hi', 6324), (',', 29892), ('▁how', 920), ('▁can', 508), ('▁I', 306), ('▁help', 1371), ('▁you', 366), ('?', 29973), ('▁###', 835), ('▁Human', 12968), (':', 29901), ('▁Hello', 15043), ('<0x0A>', 13), ('<0x0A>', 13), ('▁', 29871)]
[('▁###', 835), ('▁Human', 12968), (':', 29901)]


In [6]:
# 加载 OpenAssistant-Guanaco 训练数据
guanaco_dataset_train = load_dataset("timdettmers/openassistant-guanaco", split="train")
# guanaco_dataset_valid = load_dataset("timdettmers/openassistant-guanaco", split="test")

print("Preprocessing Guanaco dataset...")
def filter_and_reverse_dialogue(example):
    text = example["text"]
    # 找到第二次出现 "### Human:" 的位置
    first_human_idx = text.find("### Human:")
    second_human_idx = text.find("### Human:", first_human_idx + 1)
    
    # 如果没有第二次出现 "### Human:"，保留原文本；否则截断到第二次出现之前
    if second_human_idx != -1:
        text = text[:second_human_idx]
    
    # 分割对话为 Human 和 Assistant 的部分
    dialogues = text.split("### Human:")
    reversed_dialogues = []
    
    for dialogue in dialogues[1:]:  # 跳过第一个空白部分
        if "### Assistant:" in dialogue:
            human_part, assistant_part = dialogue.split("### Assistant:", 1)
            # 反转 Human 和 Assistant 的顺序，保留空格
            reversed_dialogues.append(f"### Assistant: {assistant_part.strip()} ### Human: {human_part.strip()} {EOS_token}")
    
    # 将反转后的对话重新拼接
    reversed_text = " ".join(reversed_dialogues)
    return {"text": reversed_text}
guanaco_dataset_train = guanaco_dataset_train.map(filter_and_reverse_dialogue)
print(guanaco_dataset_train["text"][0])
instruction_template = '### Assistant:'
response_template = '### Human:'
response_template_with_context = ' ### Human:'
response_template_ids = tokenizer.encode(response_template_with_context, add_special_tokens=False)[1:]
collator = DataCollatorForCompletionOnlyLM(instruction_template=instruction_template, response_template=response_template, tokenizer=tokenizer)


Repo card metadata block was not found. Setting CardData to empty.


Preprocessing Guanaco dataset...


Map:   0%|          | 0/9846 [00:00<?, ? examples/s]

### Assistant: "Monopsony" refers to a market structure where there is only one buyer for a particular good or service. In economics, this term is particularly relevant in the labor market, where a monopsony employer has significant power over the wages and working conditions of their employees. The presence of a monopsony can result in lower wages and reduced employment opportunities for workers, as the employer has little incentive to increase wages or provide better working conditions.

Recent research has identified potential monopsonies in industries such as retail and fast food, where a few large companies control a significant portion of the market (Bivens & Mishel, 2013). In these industries, workers often face low wages, limited benefits, and reduced bargaining power, leading to a situation where they are dependent on the employer for their livelihood. This dependence can result in further suppression of wages and a decline in working conditions.

Overall, the concept of monop

In [7]:
# 选取数据集中的一个样本
sample = guanaco_dataset_train[8888]["text"]

# **先进行 tokenization**
inputs = tokenizer(sample, return_tensors="pt", padding=False, truncation=True, max_length=1024)

# `collator` 期望 `input_ids` 结构的数据，而不是 `text`
batch = collator([{"input_ids": inputs["input_ids"].squeeze(0), "attention_mask": inputs["attention_mask"].squeeze(0)}])

#查看batch的结构
print(batch.keys())
# print(batch["input_ids"].shape)
print(batch)
print(batch["attention_mask"])
print(batch["labels"])


dict_keys(['input_ids', 'attention_mask', 'labels'])
{'input_ids': tensor([[    1,   835,  4007, 22137, 29901,   997, 27073,   868, 15198, 19426,
         14411,   707, 25651, 29899, 29903, 19992,   313,  8463,  2108,   882,
         16794,  1318, 10186,  1648, 13742,   835, 12968, 29901,   751,  1808,
           707,   425, 27073,   868,   413,   834, 19426, 14411, 29973, 29871,
             2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]), 'labels': tensor([[ -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,   751,  1808,
           707,   425, 27073,   868,   413,   834, 19426, 14411, 29973, 29871,
             2]])}
tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1

## 1.2. Initialize the Trainer


In [None]:
training_args = SFTConfig(
    output_dir="./results",
    per_device_train_batch_size=1,
    num_train_epochs=1,
    save_steps=500,
    save_total_limit=4,
    eval_strategy="no",  # 每个 epoch 评估一次
    logging_dir="./logs",
    logging_steps=100,
    learning_rate=2e-5,
    max_seq_length=1024,
    dataset_text_field="text",
)

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=guanaco_dataset_train,
    data_collator=collator,
)


## 1.3. Finetune the model

In [None]:
print("Start training the model...")
trainer.train()
model.save_pretrained("./finetuned-backward-model")
tokenizer.save_pretrained("./finetuned-backward-model")
print("Model saved successfully!")

In [8]:
# 加载反向模型和 LoRA 配置
if 'model' in locals():
    del model
    gc.collect()
    torch.cuda.empty_cache()
if 'tokenizer' in locals():
    del tokenizer
    gc.collect()
if 'lora_config' in locals():
    del lora_config
    gc.collect()
    torch.cuda.empty_cache()
gc.collect()
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf", 
    torch_dtype=torch.bfloat16,
    device_map="auto")


# 加载分词器
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
EOS_token = tokenizer.eos_token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.unk_token
# 加载 LoRA 适配器权重
model = PeftModel.from_pretrained(model=model, model_id="./finetuned-backward-model", is_trainable=True,device_map="auto")
print("LoRA adapter weights loaded successfully!")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

LoRA adapter weights loaded successfully!


## Push the instruction fine tuned model to HF hub and paste the url here


In [5]:
# 将反向模型推送到 HF
model.push_to_hub("YipKo/DSAA6000Q_Assignment3_backwards_model")
tokenizer.push_to_hub("YipKo/DSAA6000Q_Assignment3_backwards_model")
print("Model uploaded successfully!")

adapter_model.safetensors:   0%|          | 0.00/16.8M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/1.81k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Model uploaded successfully!


# 2. Self-Augmentation

In [None]:

def is_single_turn(example): # check if the example is single-turn
    return (len(example["conversations"]) == 2) # or (example["source"] != "multi_turn")

lima_dataset = load_dataset("GAIR/LIMA", split="train").shuffle(seed=3407) # Load GAIR/LIMA dataset and Randomly shuffle the dataset
generated_instructions = []
sample_size = 150  # Sample 150 generated instructions

with tqdm(total=sample_size, desc="Generating instructions") as pbar:
    for example in lima_dataset:
        if not is_single_turn(example):
            continue  # skip multi-turn examples

        instruction_gt = example["conversations"][0]  # the first one is human's instruction
        response_gt = example["conversations"][1]  # the second one is assistant's response
        response_gt = "### Assistant: " + response_gt + " ### Human: " 
        try:
            output = model.generate(tokenizer.encode(response_gt, return_tensors="pt").to(device),max_new_tokens=1024) # generate instruction from response using the backward model
            ouput = tokenizer.decode(output[0], skip_special_tokens=True).split("### Human: ", 1)[-1].strip()
            generated_instructions.append((ouput, example["conversations"][0], example["conversations"][1]))
        except Exception as e:
            print(f"Error: {e}")
            continue
        
        pbar.update(1)
        
        # make sure just sampling 'sample_size' of instructions
        if len(generated_instructions) >= sample_size:
            break  

print("Sample generated instructions:")
for i in range(1):
    print(f"Output: {generated_instructions[i][0]}")
    print("-" * 50)

the structure of generated instructions:

[[instruction_output, instruction_gt, response]*n]

In [None]:
# save generated instructions; Format: (generated_instruction, instruction_gt, response_gt)
file_path = "./generated_instructions.json"
#如果文件已经存在，删除
if os.path.exists(file_path):
    os.remove(file_path)
with open(file_path, "w", encoding="utf-8") as f:
    json.dump(generated_instructions, f, ensure_ascii=False, indent=4)

print(f"Generated instructions saved to {file_path}")

In [4]:
# read generated instructions
file_path = "./generated_instructions.json"
generated_instructions = []
with open(file_path, "r", encoding="utf-8") as f:
    generated_instructions = json.load(f)
# 查看generated_instructions的长度
print(len(generated_instructions))
# . Print out 5 examples of generated instructions.
# print(generated_instructions[0])
for i in range(5):
    print(f"Output: {generated_instructions[i][0]}")
    print("-" * 50)
    print(f"GT: {generated_instructions[i][1]}")
    print("-" * 50)
    print(f"Response: {generated_instructions[i][2]}")
    print("=" * 100)

150
Output: What are the traditional branches of philosophy?
What are some contemporary branches of philosophy?
--------------------------------------------------
GT: What are the major branches of philosophy?
--------------------------------------------------
Response: The traditional branches of philosophy generally include
* Aesthetics
* Epistemology 
* Ethics
* Logic
* Metaphysics / Ontology

We can go ahead and add a few contemporary branches on to this (more examples could certainly be adduced):
* Philosophy of Science [referring to the hard sciences] (Mathematics, Technology, etc.)
* Philosophy of Politics [referring to the social sciences] (Society, etc.)
* Philosophy of Religion [actually two things -- question related to the existence of God or gods AND questions relating to the sociological phenomenon of religion. Generally, referring to the former]
* Philosophy of Value (Axiology)
* Philosophy of Language
* Philosophy of Mind 

Philosophy of language and philosophy of mind 

# 3. Self-Curation：filter out high-quality and low-quality instructions

In [5]:
# Use LLaMA 2-7B Chat as the evaluation model
eval_model_name = "meta-llama/Llama-2-7b-chat-hf"
if 'model' in locals():
    del model
    gc.collect()
    torch.cuda.empty_cache()
if 'tokenizer' in locals():
    del tokenizer
    gc.collect()
if 'lora_config' in locals():
    del lora_config
    gc.collect()
    torch.cuda.empty_cache()
if 'eval_model' in locals():
    del eval_model
    gc.collect()
    torch.cuda.empty_cache()
if 'eval_tokenizer' in locals():
    del eval_tokenizer
    gc.collect()
    torch.cuda.empty_cache()
torch.cuda.empty_cache()
eval_tokenizer = AutoTokenizer.from_pretrained(eval_model_name)
eval_EOS_token = eval_tokenizer.eos_token
eval_model = AutoModelForCausalLM.from_pretrained(eval_model_name, torch_dtype=torch.bfloat16,device_map="auto")

few_shot_examples = """
Below is an instruction from an user and a candidate answer. Evaluate whether or
not the answer is a good example of how AI Assistant should respond to the user’s
instruction. Please assign a score using the following 5-point scale:
1: It means the answer is incomplete, vague, off-topic, controversial, or not
exactly what the user asked for. For example, some content seems missing, numbered
list does not start from the beginning, the opening sentence repeats user’s question.
Or the response is from another person’s perspective with their personal experience
(e.g. taken from blog posts), or looks like an answer from a forum. Or it contains
promotional text, navigation text, or other irrelevant information.
2: It means the answer addresses most of the asks from the user. It does not
directly address the user’s question. For example, it only provides a high-level
methodology instead of the exact solution to user’s question.
3: It means the answer is helpful but not written by an AI Assistant. It addresses
all the basic asks from the user. It is complete and self contained with the
drawback that the response is not written from an AI assistant’s perspective, but
from other people’s perspective. The content looks like an excerpt from a blog post,
web page, or web search results. For example, it contains personal experience or
opinion, mentions comments section, or share on social media, etc.
4: It means the answer is written from an AI assistant’s perspective with a
clear focus of addressing the instruction. It provide a complete, clear, and
comprehensive response to user’s question or instruction without missing or
irrelevant information. It is well organized, self-contained, and written in a
helpful tone. It has minor room for improvement, e.g. more concise and focused.
5: It means it is a perfect answer from an AI Assistant. It has a clear focus on
being a helpful AI Assistant, where the response looks like intentionally written
to address the user’s question or instruction without any irrelevant sentences. The
answer provides high quality content, demonstrating expert knowledge in the area, is
very well written, logical, easy-to-follow, engaging and insightful.
Please first provide a brief reasoning you used to derive the rating score, and
then write "Score: <rating>" in the last line.
"""

prompt_template = few_shot_examples + "\n{instruction}\n{response}\n"

rated_examples = []
with tqdm(total=len(generated_instructions), desc="Filtering out high-quality and low-quality instructions") as pbar:
    for instruction_output, instruction_gt, response in generated_instructions:
        prompt = prompt_template.format(instruction=instruction_output, response=response)
        # print(f"Prompt: {prompt}")
        inputs = eval_tokenizer(prompt, return_tensors="pt").to(device)
        pbar.update(1)
        # generate rating
        try:
            rating_output = eval_model.generate(
                **inputs,
                max_new_tokens=512,
                temperature=0.1,  # lower temperature for more confident predictions
                do_sample=False  # greedy decoding
            )

            rating_text = eval_tokenizer.decode(rating_output[0], skip_special_tokens=True).strip()
            # print(f"rating_text: {rating_text}")
        except Exception as e:
            print(f"Error: {e}")
            continue
        
        # extract rating (make sure it works!!!!)
        try:
            match = re.search(r"Score:\s*(\d+)", rating_text)
            if match:
                rating = int(match.group(1))  # 提取并转换为整数
                if 1 <= rating <= 5:  # 确保评分在有效范围内
                    rated_examples.append((instruction_output, response, rating))
                else:
                    raise ValueError("Rating is not in the valid range [1, 5]")
            else:
                raise ValueError("Rating not found")
        except ValueError:
            continue  # if not a valid rating, skip the example

# Save rated_examples
file_path = "./rated_examples.json"
if os.path.exists(file_path):
    os.remove(file_path)
with open(file_path, "w", encoding="utf-8") as f:
    json.dump(rated_examples, f, ensure_ascii=False, indent=4)
print(f"There are {len(rated_examples)} rated examples, and it was saved to {file_path}")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Filtering out high-quality and low-quality instructions:  87%|████████▋ | 130/150 [02:28<00:11,  1.71it/s]This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (4096). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.
Filtering out high-quality and low-quality instructions: 100%|██████████| 150/150 [03:18<00:00,  1.32s/it]

There are 129 rated examples, and it was saved to ./rated_examples.json





In [6]:
# Print out 5 examples of high quality examples and 5 examples of low quality examples. 
# Filter out high-quality and low-quality examples
high_quality = [ex for ex in rated_examples if ex[2] > 4]
low_quality = [ex for ex in rated_examples if ex[2] <= 4]

# Some examples
if len(high_quality) > 0:
    print("High-quality examples:")
    for i in range(min(5, len(high_quality))):
        print(f"Instruction: {high_quality[i][0]}")
        print(f"Response: {high_quality[i][1]}")
        print(f"Rating: {high_quality[i][2]}")
        print("-" * 50)
if len(low_quality) > 0:
    print("Low-quality examples:")
    for i in range(min(5, len(low_quality))):
        print(f"Instruction: {low_quality[i][0]}")
        print(f"Response: {low_quality[i][1]}")
        print(f"Rating: {low_quality[i][2]}")
        print("-" * 50)

High-quality examples:
Instruction: 7 words that rhyme with the word light
Response: Here are 7 words that rhyme with the word "light", sorted in alphabetical order:
* bite
* height
* quite
* right
* site
* white
* write
Rating: 5
--------------------------------------------------
Instruction: > I had a call with Nespresso today where the support agent said that the machines should not be connected to a GFCI outlet. I was confused and asked why not. He said that the machines could be damaged by the GFCI. I told him that this is not true and he agreed. 
> This call with Nespresso was the first time I'd ever seen someone claim that GFCI outlets can potentially damage coffee machines. 
> How would you respond to this call?
Response: 
> the Nespresso support agent said that the machines should not be connected to a GFCI outlet

Where do they think Nespresso machines are used? Bedrooms? No, they're kitchen appliances and GFCIs are widely used in kitchens. GFCIs are code requirements and hav

# 4. Fine-tune the base model with the rated examples (forward model)

In [3]:
# Load base model and apply LoRA
if 'model' in locals():
    del model
    gc.collect()
    torch.cuda.empty_cache()
if 'tokenizer' in locals():
    del tokenizer
    gc.collect()
if 'lora_config' in locals():
    del lora_config
    gc.collect()
    torch.cuda.empty_cache()
if 'eval_model' in locals():
    del eval_model
    gc.collect()
    torch.cuda.empty_cache()
if 'eval_tokenizer' in locals():
    del eval_tokenizer
    gc.collect()
    torch.cuda.empty_cache()
torch.cuda.empty_cache()
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
tokenizer.pad_token = tokenizer.unk_token
EOS_token = tokenizer.eos_token
final_lora_config = LoraConfig(
    r=8,  # LoRA 的秩
    lora_alpha=32,  # LoRA 的缩放因子
    target_modules=["q_proj", "v_proj"],  # 目标模块
    lora_dropout=0.05,  # Dropout 概率
    bias="none",  # 是否添加偏置
    task_type="CAUSAL_LM"  # 任务类型
)
model = get_peft_model(model, final_lora_config)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
# Load rated_examples
file_path = "./rated_examples.json"
rated_examples = []
with open(file_path, "r", encoding="utf-8") as f:
    rated_examples = json.load(f)

# Format the dataset
def format_dataset(rated_examples,rating=4):
    formatted_datasets = {"text": []}  # 用字典存储数据，符合 Dataset 格式
    for example in rated_examples:
        instruction_output = example[0]
        response = example[1]
        rate = int(example[2])
        text = f"### Human: {instruction_output} ### Assistant: {response}{EOS_token}"
        if rate >= rating:
            formatted_datasets["text"].append(text)  # 追加到列表中
    return Dataset.from_dict(formatted_datasets)  # 转换为 Transformer 库的 Dataset 格式

final_fine_tune_dataset = format_dataset(rated_examples,rating=4)

## Push the high quality dataset to HF hub and paste the url here

In [11]:
# Push the dataset to HF
final_fine_tune_dataset.push_to_hub("YipKo/DSAA6000Q_Assignment3_self-curation_dataset")
print("Final fine-tune dataset uploaded successfully!")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/266 [00:00<?, ?B/s]

Final fine-tune dataset uploaded successfully!


In [5]:
# Data collator
forward_instruction_template = '### Human:'
forward_response_template = '### Assistant:'
forward_response_template_with_context = ' ### Assistant:'
forward_response_template_ids = tokenizer.encode(forward_response_template_with_context, add_special_tokens=False)[1:]
forward_collator = DataCollatorForCompletionOnlyLM(instruction_template=forward_instruction_template, response_template=forward_response_template_ids, tokenizer=tokenizer)


In [6]:
print(final_fine_tune_dataset["text"][1])

### Human: What is the difference between an NPC and a bot? ### Assistant: The key distinction is that a Bot represents an automated player; an NPC, by contrast, isn't playing the game at all.

In general, an NPC is a part of the game; a placed object which is designed by the creator and exists to interact with the player. Examples would include vendors, quest givers, or enemies. In some games, (particularly RPG's), the term is also used to refer to characters under the control of, but not generated by the player. They are often distinguished by having distinct personalities and dialog, whereas a ""Player Character"" is meant as more of an extension of the player themselves. Think of the companion party members in the Mass Effect series for an example.

A Bot, by contrast, is essentially a player of the game controlled by a computer. This can be populated within a single instance, as a feature in some games (i.e. AI opponents in a normally multiplayer game), or, in some cases, actually

In [7]:
# 选取数据集中的一个样本
sample = final_fine_tune_dataset["text"][1]

# **先进行 tokenization**
inputs = tokenizer(sample, return_tensors="pt", padding=False, truncation=True, max_length=1024)

# `collator` 期望 `input_ids` 结构的数据，而不是 `text`
batch = forward_collator([{"input_ids": inputs["input_ids"].squeeze(0), "attention_mask": inputs["attention_mask"].squeeze(0)}])

#查看batch的结构
print(batch.keys())
# print(batch["input_ids"].shape)
print(batch)
print(batch["attention_mask"])
print(batch["labels"])

dict_keys(['input_ids', 'attention_mask', 'labels'])
{'input_ids': tensor([[    1,   835, 12968, 29901,  1724,   338,   278,  4328,  1546,   385,
           405,  9026,   322,   263,  9225, 29973,   835,  4007, 22137, 29901,
           450,  1820, 21578,   338,   393,   263, 11273, 11524,   385,  3345,
           630,  4847, 29936,   385,   405,  9026, 29892,   491, 12814, 29892,
          3508, 29915, 29873,  8743,   278,  3748,   472,   599, 29889,    13,
            13,   797,  2498, 29892,   385,   405,  9026,   338,   263,   760,
           310,   278,  3748, 29936,   263,  7180,  1203,   607,   338,  8688,
           491,   278,   907,  1061,   322,  4864,   304, 16254,   411,   278,
          4847, 29889,  1222,  9422,   723,  3160,  9691,   943, 29892, 21126,
           330,  1536, 29892,   470, 22595, 29889,   512,   777,  8090, 29892,
           313,  1595, 16311,   368,   390, 16903, 29915, 29879,   511,   278,
          1840,   338,   884,  1304,   304,  2737,   304,  4890,

In [8]:
# Fine-tune the model on high-quality examples

training_args = SFTConfig(
    output_dir="./results",
    per_device_train_batch_size=1,
    num_train_epochs=1,
    save_steps=10,
    save_total_limit=4,
    eval_strategy="no",  # 每个 epoch 评估一次
    logging_dir="./logs",
    logging_steps=100,
    learning_rate=2e-6,
    max_seq_length=1024,
    dataset_text_field="text",
)
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=final_fine_tune_dataset,
    data_collator=forward_collator,
)

print("Start fine-tuning the model...")
trainer.train()
model.save_pretrained("./final-finetuned-model")
tokenizer.save_pretrained("./final-finetuned-model")
print("Final fine-tuned model saved successfully!")

Converting train dataset to ChatML:   0%|          | 0/76 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/76 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/76 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/76 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Start fine-tuning the model...


Step,Training Loss




Final fine-tuned model saved successfully!


In [10]:
# 加载正向模型和 LoRA 配置
if 'model' in locals():
    del model
    gc.collect()
    torch.cuda.empty_cache()
if 'tokenizer' in locals():
    del tokenizer
    gc.collect()
if 'lora_config' in locals():
    del lora_config
    gc.collect()
    torch.cuda.empty_cache()
gc.collect()
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf", 
    torch_dtype=torch.bfloat16,
    device_map="auto")


# 加载分词器
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
EOS_token = tokenizer.eos_token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.unk_token
# 加载 LoRA 适配器权重
model = PeftModel.from_pretrained(model=model, model_id="./final-finetuned-model", is_trainable=True,device_map="auto")
print("LoRA adapter weights loaded successfully!")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

LoRA adapter weights loaded successfully!


## Generate 5 responses using the final fine-tuned model


In [9]:
temp = []
with tqdm(total=5, desc="Generate 5 responses using the final fine-tuned model") as pbar:
    for sample in final_fine_tune_dataset["text"]:
        text = sample  # the first one is human's instruction
        #删除### Assistant: 后面的字符
        if "### Assistant: " in text:
            text = text.split("### Assistant: ", 1)[0] + "### Assistant: "  # retain "### Assistant: " and remove the rest
        else:
            continue
        try:
            output = model.generate(tokenizer.encode(text, return_tensors="pt").to(device),max_new_tokens=256) # generate instruction from response using the backward model
            output = tokenizer.decode(output[0], skip_special_tokens=True)
            temp.append(output)
        except Exception as e:
            print(f"Error: {e}")
            continue
        # print(f"output: {output}")
        pbar.update(1)
        if len(temp) >= 5:
            break
for i in range(5):
    print(f"Output: {temp[i]}")
    print("-" * 50)

Generate 5 responses using the final fine-tuned model: 100%|██████████| 5/5 [00:49<00:00,  9.82s/it]

Output: ### Human: What are the traditional branches of philosophy?
What are some contemporary branches of philosophy? ### Assistant: 1. The Philosophy of Science 2. The Philosophy of Mind 3. The Philosophy of Language 4. The Philosophy of Logic 5. The Philosophy of Mathematics 6. The Philosophy of Art 7. The Philosophy of Law 8. The Philosophy of History 9. The Philosophy of Religion 10. The Philosophy of Politics 11. The Philosophy of Ethics 12. The Philosophy of Education 13. The Philosophy of Aesthetics 14. The Philosophy of Science 15. The Philosophy of Logic 16. The Philosophy of Mathematics 17. The Philosophy of Art 18. The Philosophy of Law 19. The Philosophy of History 20. The Philosophy of Religion 21. The Philosophy of Politics 22. The Philosophy of Ethics 23. The Philosophy of Education 24. The Philosophy of Aesthetics 25. The Philosophy of Science 26. The Philosophy of Logic 27. The Philosophy of Mathematics 28. The Philosophy
----------------------------------------------




## Push the instruction fine tuned model to HF hub and paste the url here

In [11]:
# 将反向模型推送到 HF
model.push_to_hub("YipKo/DSAA6000Q_Assignment3_forward_model")
tokenizer.push_to_hub("YipKo/DSAA6000Q_Assignment3_forward_model")
print("Model uploaded successfully!")

README.md:   0%|          | 0.00/1.81k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/16.8M [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


Model uploaded successfully!
