In [None]:
from transformers import AutoModel
import torch


# torch.set_default_tensor_type(torch.cuda.HalfTensor)
model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code = True, load_in_8bit=True, device_map='auto')

Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.


In [24]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)

Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.


In [None]:
print(model)

In [33]:
from peft import get_peft_model, LoraConfig, TaskType

# peft_path = "output/adapter_model.bin"
# peft_path = "output/adapter_model_4x2.bin"
peft_path = "output/adapter_model_r16.bin"

# 注意 r(attention dimension) 需要根据lora model 不同进行设置，如 8 或者 16
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, inference_mode=False,
    r=16,
    lora_alpha=32, lora_dropout=0.1 # lora scaling parameter and the dropout probability for Lora layers
)

model = get_peft_model(model, peft_config)
model.load_state_dict(torch.load(peft_path), strict=False)
torch.set_default_tensor_type(torch.cuda.FloatTensor)

In [34]:
print(model)


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): PeftModelForCausalLM(
      (base_model): LoraModel(
        (model): PeftModelForCausalLM(
          (base_model): LoraModel(
            (model): ChatGLMForConditionalGeneration(
              (transformer): ChatGLMModel(
                (word_embeddings): Embedding(150528, 4096)
                (layers): ModuleList(
                  (0): GLMBlock(
                    (input_layernorm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
                    (attention): SelfAttention(
                      (rotary_emb): RotaryEmbedding()
                      (query_key_value): MergedLinear8bitLt(
                        in_features=4096, out_features=12288, bias=True
                        (lora_dropout): Dropout(p=0.1, inplace=False)
                        (lora_A): Linear(in_features=4096, out_features=32, bias=False)
                        (lora_B): Conv1d(32, 8192, kernel_size=(1,), stride=(1,), groups=2, bias=

In [27]:
print(tokenizer)

ChatGLMTokenizer(name_or_path='THUDM/chatglm-6b', vocab_size=150344, model_max_length=2048, is_fast=False, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<sop>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'mask_token': '[MASK]'})


In [28]:
import json

# instructions = json.load(open("data/valc/valc_data.json", encoding='utf-8'))
instructions = json.load(open("data/valc/valc_data.json", encoding='utf-8'))

In [29]:
answers = []
from convert_json import format_example

def generate(input_text, temperature):
    ids = tokenizer.encode(input_text)
    input_ids = torch.LongTensor([ids])
    out = model.generate(
        input_ids=input_ids,
        max_length=300,
        do_sample=False,
        temperature=temperature
    )
    out_text = tokenizer.decode(out[0])
    return out_text

with torch.no_grad():
    for idx, item in enumerate(instructions[:3]):
        feature = format_example(item)
        input_text = feature['context']
        out_text = generate(input_text, 0)
        answer = out_text.replace(input_text, "").replace("\nEND", "").strip()
        item['infer_answer'] = answer
        print(out_text)
        print(f"### {idx+1}.Instruct-Answer:\n", item.get('output'), '\n\n')
        answers.append({'index': idx, **item})

The dtype of attention mask (torch.int64) is not bool


Instruction: 请给出下面的网络词汇的含义
Input: 打工人
Answer: 打工人指的是在职场中从事体力劳动的人,通常是一些服务员、工厂工人、快递小哥等。
### 1.Answer:
 字面意义，给别人打工的人，调侃中带着心酸，只要不是自己当老板或是做股东，本质上都是在给别人卖命。 


Instruction: 请给出下面的网络词汇的含义
Input: 干饭人
Answer: 指那些只吃饭不做事的人。
### 2.Answer:
 努力吃饭的人，自我调侃，没什么远大的目标，只在乎美食。 


Instruction: 请给出下面的网络词汇的含义
Input: 凡尔赛文学
Answer: 指代一种以欧洲贵族社会为背景,描写贵族们的生活、爱情、婚姻、战争、政治斗争等的文学作品,因凡尔赛宫的原型就是当时的凡尔赛宫,因此这种文学也被称为“凡尔赛文学”。
### 3.Answer:
 通过反向表述，来刻意地“不经意”透露出自己生活优越的表达方式。 




In [30]:
generate('你是谁', 0)



'你是谁 我是一个名为 ChatGLM-6B 的人工智能助手,是基于清华大学 KEG 实验室和智谱 AI 公司于 2023 年共同训练的语言模型开发的。我的任务是针对用户的问题和要求提供适当的答复和支持。'

In [31]:
generate('写一句土味情话', 0)



'写一句土味情话 你是我的小甜心。'