In [1]:
import random, os, tqdm, time, json
import numpy as np
import pandas as pd
from matplotlib.font_manager import FontProperties
import matplotlib.pyplot as plt

import sys
sys.path.append("../../../../")

random.seed(618)
np.random.seed(907)

new_base_path = os.path.join(
    "/Users/minkexiu/Downloads/",
    "/".join(
        os.getcwd().split("/")[-1*(len(sys.path[-1].split("/")) - 1):]
    ),
)
print("storage dir:", new_base_path)
print("code dir:", os.getcwd())

## 创建文件夹。
if not os.path.exists(new_base_path):
    os.makedirs(
        new_base_path
    )
if not os.path.exists(os.path.join(new_base_path, "preprocessedData")):
    os.makedirs(
        os.path.join(new_base_path, "preprocessedData")
    )
if not os.path.exists(os.path.join(new_base_path, "originalData")):
    os.makedirs(
        os.path.join(new_base_path, "originalData")
    )
if not os.path.exists(os.path.join(new_base_path, "trained_models")):
    os.makedirs(
        os.path.join(new_base_path, "trained_models")
    )

def create_originalData_path(filename_or_path):
    return os.path.join(new_base_path, "originalData", filename_or_path)
def create_preprocessedData_path(filename_or_path):
    return os.path.join(new_base_path, "preprocessedData", filename_or_path)
def create_trained_models_path(filename_or_path):
    return os.path.join(new_base_path, "trained_models", filename_or_path)

def millisec2datetime(timestamp):
    time_local = time.localtime(timestamp/1000)
    return time.strftime("%Y-%m-%d %H:%M:%S", time_local)
    
def run_finish():
    # 假设你的字体文件是 'myfont.ttf' 并且位于当前目录下  
    font = FontProperties(fname="/Users/minkexiu/Documents/GitHub/ML_Tryout/SimHei.ttf", size=24)  
    # 创建一个空白的图形  
    fig, ax = plt.subplots()  
    ax.imshow(
        plt.imread("/Users/minkexiu/Downloads/wallhaven-dgxpyg.jpg")
    )
    # 在图形中添加文字  
    ax.text(
        ax.get_xlim()[1] * 0.5, 
        ax.get_ylim()[0] * 0.5, 
        f"程序于这个点跑完：\n{millisec2datetime(time.time()*1000)}", fontproperties=font, ha="center", va="center", color="red"
    )  
    # 设置图形的布局  
    # ax.set_xlim(0, 1)  
    # ax.set_ylim(0, 1)  
    ax.set_xticks([])  
    ax.set_yticks([])  
    ax.patch.set_color("blue")
    # 显示图形  
    plt.show()
        
tqdm.tqdm.pandas() ## 引入这个，就可以在apply的时候用progress_apply了。

import IPython
def kill_current_kernel():
    '''杀死当前的kernel释放内存空间。'''
    IPython.Application.instance().kernel.do_shutdown(True) 
    
def simply_show_data(df1):
    print(df1.shape)
    display(df1.head())
    
def wait_flag(saved_flag_path, time_interval_sec=10):
    print("waiting for", saved_flag_path)
    time_count = 0
    while True:
        if os.path.exists(saved_flag_path):
            break
        time.sleep(time_interval_sec)
        time_count+=time_interval_sec
        print(time_count, end=" ")
    print("finish!!")

storage dir: /Users/minkexiu/Downloads/GitHub/ML_runCodeFromBook/大规模语言模型：从理论到实践/RLHF_PPO
code dir: /Users/minkexiu/Documents/GitHub/ML_runCodeFromBook/大规模语言模型：从理论到实践/RLHF_PPO


In [2]:
from datetime import datetime
class TimerContext:  
    def __enter__(self):  
        self.start_time = str(datetime.now())
        print("start time:", self.start_time)
        return self  
    def __exit__(self, exc_type, exc_val, exc_tb):  
        print("start time:", self.start_time)
        print("end time", str(datetime.now()))

def read_feaList_from_file(fpath, do_lowering = True):
    with open(fpath, "r") as f:
        if do_lowering:
            feas = [i.strip().lower() for i in f.readlines()] #  if i.strip() != ""
        else:
            feas = [i.strip() for i in f.readlines()]
    print(len(feas), fpath)
    return feas

def save_feaList_to_file(feas, fpath, mode = "w"):
    if len(feas) == 0:
        print(f"Finished writing file: {fpath}. Wrote nothing.")
        return
    dir_path = os.path.split(fpath)[0]
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    with open(fpath, mode) as f:
        f.write("\n".join(feas) + "\n")
    print(f"Finished writing file: {fpath}")

In [4]:
from peft import LoraConfig, get_peft_model, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

In [5]:
## 尝试初始化一下原始的文本生成模型.
## 这个模型是一个生成模型哦。
model = AutoModelForCausalLM.from_pretrained(
    create_trained_models_path("Qwen1.5-0.5B-Chat")
).to("cpu").eval()

In [6]:
tokenizer = AutoTokenizer.from_pretrained(create_trained_models_path("Qwen1.5-0.5B-Chat"))

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [7]:
## 准备数据吧。
messages = [
    {"role": "system", "content": "你是一个有文化的文明人"},
    {"role": "user", "content": "饭店服务员的态度太差，使用委婉积极的态度投诉"},
]

In [8]:
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

In [9]:
model_inputs = tokenizer([text], return_tensors="pt")
model_inputs

{'input_ids': tensor([[151644,   8948,    198,  56568, 101909,  18830, 107705, 100704,  17340,
         151645,    198, 151644,    872,    198, 107514, 112822, 105421,  99222,
          99572,   3837,  37029,  99199, 106783,  99666, 105421, 104943, 151645,
            198, 151644,  77091,    198]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1]])}

In [35]:
model_inputs.input_ids.shape

torch.Size([1, 31])

In [48]:
model(
    model_inputs.input_ids, model_inputs.attention_mask, 
    output_hidden_states=True ## 这个是个什么玩意儿呢？就是hiddenstate。这个东西经过一个 hiddenstate -> token数 的映射之后，变成了logits。
).hidden_states[0].shape

torch.Size([1, 31, 1024])

In [23]:
torch.argmax(
    model(model_inputs.input_ids, model_inputs.attention_mask).logits, 
    dim=2
)

tensor([[ 39349,  39349, 151644, 151645, 151645, 151645, 151645, 151645, 151645,
            198, 151644,  20412, 151645, 151644, 151645, 151645, 151645, 151645,
         151645, 151645, 151645, 106783, 151645, 151645, 151645, 151645,    198,
         151644, 102056, 109723, 109723]])

In [24]:
tokenizer.batch_decode([ 39349,  39349, 151644, 151645, 151645, 151645, 151645, 151645, 151645,
            198, 151644,  20412, 151645, 151644, 151645, 151645, 151645, 151645,
         151645, 151645, 151645, 106783, 151645, 151645, 151645, 151645,    198,
         151644, 102056, 109723, 109723])

['.mybatis',
 '.mybatis',
 '<|im_start|>',
 '<|im_end|>',
 '<|im_end|>',
 '<|im_end|>',
 '<|im_end|>',
 '<|im_end|>',
 '<|im_end|>',
 '\n',
 '<|im_start|>',
 '是',
 '<|im_end|>',
 '<|im_start|>',
 '<|im_end|>',
 '<|im_end|>',
 '<|im_end|>',
 '<|im_end|>',
 '<|im_end|>',
 '<|im_end|>',
 '<|im_end|>',
 '婉',
 '<|im_end|>',
 '<|im_end|>',
 '<|im_end|>',
 '<|im_end|>',
 '\n',
 '<|im_start|>',
 '如果你',
 '尊敬',
 '尊敬']

In [32]:
model.generate(
    model_inputs.input_ids, 
    max_length = 50
)

tensor([[151644,   8948,    198,  56568, 101909,  18830, 107705, 100704,  17340,
         151645,    198, 151644,    872,    198, 107514, 112822, 105421,  99222,
          99572,   3837,  37029,  99199, 106783,  99666, 105421, 104943, 151645,
            198, 151644,  77091,    198,  99491, 115546, 104188,  87026,  32664,
         112822,  47874, 102316,   9370, 106974,   1773,  43288,  87267, 104404,
         101214, 106076,  33108, 106076,  80443]])

In [33]:
tokenizer.batch_decode([151644,   8948,    198,  56568, 101909,  18830, 107705, 100704,  17340,
         151645,    198, 151644,    872,    198, 107514, 112822, 105421,  99222,
          99572,   3837,  37029,  99199, 106783,  99666, 105421, 104943, 151645,
            198, 151644,  77091,    198,  99491, 115546, 104188,  87026,  32664,
         112822,  47874, 102316,   9370, 106974,   1773,  43288,  87267, 104404,
         101214, 106076,  33108, 106076,  80443])

['<|im_start|>',
 'system',
 '\n',
 '你',
 '是一个',
 '有',
 '文化的',
 '文明',
 '人',
 '<|im_end|>',
 '\n',
 '<|im_start|>',
 'user',
 '\n',
 '饭店',
 '服务员',
 '的态度',
 '太',
 '差',
 '，',
 '使用',
 '委',
 '婉',
 '积极',
 '的态度',
 '投诉',
 '<|im_end|>',
 '\n',
 '<|im_start|>',
 'assistant',
 '\n',
 '非常',
 '抱歉',
 '听到',
 '您',
 '对',
 '服务员',
 '服务',
 '态度',
 '的',
 '不满',
 '。',
 '这',
 '可能',
 '是因为',
 '您的',
 '期望',
 '和',
 '期望',
 '没有']

In [34]:
len([151644,   8948,    198,  56568, 101909,  18830, 107705, 100704,  17340,
         151645,    198, 151644,    872,    198, 107514, 112822, 105421,  99222,
          99572,   3837,  37029,  99199, 106783,  99666, 105421, 104943, 151645,
            198, 151644,  77091,    198,  99491, 115546, 104188,  87026,  32664,
         112822,  47874, 102316,   9370, 106974,   1773,  43288,  87267, 104404,
         101214, 106076,  33108, 106076,  80443])

50

## Lora

In [10]:
from dataclasses import dataclass, field

In [11]:
lora_config = LoraConfig(
    r=2, ## 把秩降到这个数。
    lora_alpha=8, ## 这个是一个扩张系数。
    target_modules=['k_proj',  'v_proj'],
    lora_dropout=0,
    task_type="CAUSAL_LM",
)

In [12]:
lora_model = PeftModel(model, lora_config)
v_head = torch.nn.Linear(1024, 1, bias=False).to("cpu")
for name, module in lora_model.named_modules():
    if 'lora_' in name:
        for param in module.parameters():
            param.requires_grad = True

In [13]:
generated_ids = lora_model.generate(
    model_inputs.input_ids, 
    max_new_tokens=512, 
    top_p=1.0,
    num_beams=1,
    do_sample=False
)

In [14]:
generated_ids.shape

torch.Size([1, 224])

In [38]:
AutoModelForCausalLM.from_pretrained(
    create_trained_models_path("Qwen1.5-0.5B-Chat")
)

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 1024)
    (layers): ModuleList(
      (0-23): 24 x Qwen2DecoderLayer(
        (self_attn): Qwen2SdpaAttention(
          (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
          (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
          (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
          (o_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (rotary_emb): Qwen2RotaryEmbedding()
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=1024, out_features=2816, bias=False)
          (up_proj): Linear(in_features=1024, out_features=2816, bias=False)
          (down_proj): Linear(in_features=2816, out_features=1024, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm()
        (post_attention_layernorm): Qwen2RMSNorm()
      )
    )
    (norm): Qwen2RMSNorm()
  )
  (lm_head): Line

In [40]:
lora_model.super()#.base_model

AttributeError: 'Qwen2ForCausalLM' object has no attribute 'super'