In [84]:
import torch
from peft import PeftModel, PeftConfig
from transformers import LlamaForCausalLM, LlamaTokenizer
from datasets import load_dataset
import os
import sys
from functools import partial
from torch.utils.data import Dataset
import json
import time
from torch.utils.data import DataLoader

## 加载tokenizer 和 model
base_model = '/home/ldn/.cache/huggingface/hub/models--lmsys--vicuna-7b-v1.3/snapshots/ac066c83424c4a7221aa10c0ebe074b24d3bcdb6'
tokenizer = LlamaTokenizer.from_pretrained(base_model)
tokenizer.pad_token_id = (
        0  # unk. we want this to be different from the eos token
    )
tokenizer.padding_side = "left"  # Allow batched inference

print(tokenizer)


device = "cuda:2"
device_map = {"":2}

model = LlamaForCausalLM.from_pretrained(
        base_model,
        load_in_8bit=True,
        torch_dtype=torch.float16,
        # device_map="auto",
        device_map=device_map
    )

peft_model_id = '/home/public/ldn/zpLLM/output/checkpoint-5400'
model = PeftModel.from_pretrained(model, peft_model_id, device_map=device_map)

print(model)

LlamaTokenizer(name_or_path='/home/ldn/.cache/huggingface/hub/models--lmsys--vicuna-7b-v1.3/snapshots/ac066c83424c4a7221aa10c0ebe074b24d3bcdb6', vocab_size=32000, model_max_length=2048, is_fast=False, padding_side='left', truncation_side='right', special_tokens={'bos_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'unk_token': AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'pad_token': '<unk>'}, clean_up_tokenization_spaces=False)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096, padding_idx=0)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): Linear(
                in_features=4096, out_features=4096, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
              (v_proj)

In [85]:
## 预处理数据集
from torch.utils.data import Dataset
import json

## 读取数据集
class ZpData(Dataset):
    def __init__(self, data_file):
        self.data = self.load_data(data_file)
    
    def load_data(self, data_file):
        with open(data_file, mode='r') as f:
            data = f.read()
        samples = json.loads(data)
        
        Data = {}
        for idx, sample in enumerate(samples):
            formatted_prompt = self.create_prompt_formats(sample)
            Data[idx] = {'text': formatted_prompt, 'label': sample['output']}
        
        return Data

    def create_prompt_formats(self, sample):
        INTRO_BLURB = "以下是描述一个任务的指示，请编写一个适当的回答，完成该任务。"
        INSTRUCTION_KEY = "### Instruction:"
        INPUT_KEY = "Input:"
        RESPONSE_KEY = "### Response:"
        
        blurb = f"{INTRO_BLURB}"
        instruction = f"{INSTRUCTION_KEY}\n{sample['instruction']}"
        input_context = f"{INPUT_KEY}\n{sample['input']}" if sample["input"] else None
        response = f"{RESPONSE_KEY}"
        parts = [part for part in [blurb, instruction, input_context, response] if part]
        formatted_prompt = "\n\n".join(parts)
        return formatted_prompt

    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

test_data = ZpData('/home/public/ldn/zpLLM/test/testdata.json')
print(test_data[0],test_data[1],test_data[2])
print(len(test_data))

{'text': '以下是描述一个任务的指示，请编写一个适当的回答，完成该任务。\n\n### Instruction:\n判断一段话是否为诈骗话术，输出0或1，这段话为-->\n\nInput:\n你的支付宝是以学生注册，与支付宝相关的所有贷款以及消费都是高利息，影响个人征信，你需要关闭吗，你添加qq，昵称：K10297，账号：2669737518，你在应用商店下载投屏软件\n\n### Response:', 'label': '1'} {'text': '以下是描述一个任务的指示，请编写一个适当的回答，完成该任务。\n\n### Instruction:\n判断一段话是否为诈骗话术，输出0或1，这段话为-->\n\nInput:\n我们有专业的指导老师陈家琳带队，指导散户入市，如果你有意愿，可以添加老师QQ\n\n### Response:', 'label': '1'} {'text': '以下是描述一个任务的指示，请编写一个适当的回答，完成该任务。\n\n### Instruction:\n判断一段话是否为诈骗话术，输出0或1，这段话为-->\n\nInput:\n你在京东上购买的一件快递丢失，圆通快递要给你退赔80元钱，你下载会讯通app，方便视频沟通\n\n### Response:', 'label': '1'}
9604


In [86]:
from torch.utils.data import DataLoader
## 将数据分批处理
def collote_fn(batch_samples):
    batch_sentence_1 = []
    batch_label = []
    for sample in batch_samples:
        batch_sentence_1.append(sample['text'])
        batch_label.append(int(sample['label']))
    X = tokenizer(
        batch_sentence_1, 
        padding=True, 
        truncation=True, 
        return_tensors="pt"
    )
    y = torch.tensor(batch_label)
    return X, y

# test_dataloader = DataLoader(test_data, batch_size=32, shuffle=True, collate_fn=collote_fn)
test_dataloader = DataLoader(test_data, batch_size=32, collate_fn=collote_fn)
size = len(test_dataloader.dataset)
batches = size/32
print(size)
print(batches)

9604
300.125


In [87]:
## 处理一个批次
import numpy as np
correct = 0
total_true = 0
total_false = 0
confidence = []
time_arr = []
predictions = []
labels_arr = []
model.eval()
with torch.no_grad():
    start_time = time.time()
    
    X, y = next(iter(test_dataloader))
    X, y = X.to(device), y.to(device)
    print(f'------------------------------X shape and y shape------------------------------')
    print('batch_X shape:', {k: v.shape for k, v in X.items()})
    print('batch_y shape:', y.shape)
    #如果shuffle的话 获取该批次的原始文本
    # Xtext = tokenizer.batch_decode(X.input_ids,skip_special_tokens=True)
    labels_arr += np.array(y.cpu()).tolist()

    
    output = model.generate(**X, max_new_tokens=10,return_dict_in_generate=True, output_scores=True)

    ## 统计输出概率
    transition_scores = model.compute_transition_scores(
            output.sequences, output.scores, normalize_logits=True)
    input_length = X.input_ids.shape[1]
    generated_tokens = output.sequences[:, input_length:]
    n = generated_tokens.shape[0]
    for idx in range(n):
        # for tok, score in zip(generated_tokens[idx], transition_scores[idx]):
        #     print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.cpu().numpy():.3f} | {np.exp(score.cpu().numpy()):.2%}")
        # print("==========================================================================")
        print(f"| {generated_tokens[idx][1]:5d} | {tokenizer.decode(generated_tokens[idx][1]):8s} | {np.exp(transition_scores[idx][1].cpu().numpy()):.2f}")
        confidence.append(np.exp(transition_scores[idx][1].cpu().numpy()))
        
    
    output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

    result = []
    for str in output:
        predictions.append(int(str[1]))
        result.append(int(str[1]))
    preds = torch.tensor(result, device=device)
    print("------------------------------predictions------------------------------")
    print(f"{preds}\n")
    print("------------------------------labels------------------------------")
    print(f"{y}\n")
    correct += (preds == y).sum().item()
    # 计算真正例（True Positives）
    true_positives = ((preds == 1) & (y == 1)).sum().item()
    # 计算假负例（False Negatives）
    false_negatives = ((preds == 0) & (y == 1)).sum().item()
    total_true += true_positives
    total_false += false_negatives
    print("----------------------------------total correct---------------------------------")
    print(correct)
    print(total_true)
    print(total_false)

    end_time = time.time()
    execution_time = end_time - start_time
    print(f"推理时间为：{execution_time} 秒")
    time_arr.append(execution_time)
    print(time_arr,sum(time_arr))

print(f"total_correct: {correct}")
print(f"true_positives: {total_true}")
print(f"false_positives: {total_false}")
# 准确率
correct /= 32
# 召回率
recall = total_true / (total_true + total_false)

print(f"test Accuracy: {(100*correct):>0.1f}%")
print(f"recall:{recall}")
print(f"f1:{2 *recall*correct /(recall + correct)}")

samples = []
for i in range(32):
    samples.append(test_dataloader.dataset[i])


print(samples,len(samples))
print(labels_arr,len(labels_arr))
print(predictions,len(predictions))
print(confidence,len(confidence))

logs = zip(samples,labels_arr,predictions,confidence)

result_arr = []
for sample,label,pred,conf in logs:
    result_arr.append({"sample":sample,"lable":label,"pred":pred,"conf":conf})
print(result_arr,len(result_arr))


------------------------------X shape and y shape------------------------------
batch_X shape: {'input_ids': torch.Size([32, 378]), 'attention_mask': torch.Size([32, 378])}
batch_y shape: torch.Size([32])
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1    

In [70]:
## 评估整个data_loader
## 处理一个批次
import numpy as np
correct = 0
total_true = 0
total_false = 0
confidence = []
time_arr = []
predictions = []
labels_arr = []
model.eval()
with torch.no_grad():
    for X, y in test_dataloader:
        start_time = time.time()
        X, y = X.to(device), y.to(device)
        print(f'------------------------------X shape and y shape------------------------------')
        print('batch_X shape:', {k: v.shape for k, v in X.items()})
        print('batch_y shape:', y.shape)
        #如果shuffle的话 获取该批次的原始文本
        # Xtext = tokenizer.batch_decode(X.input_ids,skip_special_tokens=True)
        labels_arr += np.array(y.cpu()).tolist()

    
        output = model.generate(**X, max_new_tokens=10,return_dict_in_generate=True, output_scores=True)
    
        ## 统计输出概率
        transition_scores = model.compute_transition_scores(
                output.sequences, output.scores, normalize_logits=True)
        input_length = X.input_ids.shape[1]
        generated_tokens = output.sequences[:, input_length:]
        n = generated_tokens.shape[0]
        for idx in range(n):
            # for tok, score in zip(generated_tokens[idx], transition_scores[idx]):
            #     print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.cpu().numpy():.3f} | {np.exp(score.cpu().numpy()):.2%}")
            # print("==========================================================================")
            print(f"| {generated_tokens[idx][1]:5d} | {tokenizer.decode(generated_tokens[idx][1]):8s} | {np.exp(transition_scores[idx][1].cpu().numpy()):.2f}")
            confidence.append(np.exp(transition_scores[idx][1].cpu().numpy()))
        
    
        output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    
        result = []
        for str1 in output:
            predictions.append(int(str1[1]))
            result.append(int(str1[1]))
        preds = torch.tensor(result, device=device)
        print("------------------------------predictions------------------------------")
        print(f"{preds}\n")
        print("------------------------------labels------------------------------")
        print(f"{y}\n")
        correct += (preds == y).sum().item()
        # 计算真正例（True Positives）
        true_positives = ((preds == 1) & (y == 1)).sum().item()
        # 计算假负例（False Negatives）
        false_negatives = ((preds == 0) & (y == 1)).sum().item()
        total_true += true_positives
        total_false += false_negatives
        print("----------------------------------total correct---------------------------------")
        print(correct)
        print(total_true)
        print(total_false)
    
        end_time = time.time()
        execution_time = end_time - start_time
        print(f"推理时间为：{execution_time} 秒")
        time_arr.append(execution_time)
        print(time_arr,sum(time_arr))


print(f"total_correct: {correct}")
print(f"true_positives: {total_true}")
print(f"false_positives: {total_false}")
# 准确率
correct /= size
# 召回率
recall = total_true / (total_true + total_false)

print(f"test Accuracy: {(100*correct):>0.1f}%")
print(f"recall:{recall}")
print(f"f1:{2 *recall*correct /(recall + correct)}")

samples = []
for i in range(size):
    samples.append(test_dataloader.dataset[i])


print(samples,len(samples))
print(labels_arr,len(labels_arr))
print(predictions,len(predictions))
print(confidence,len(confidence))
print(time_arr,sum(time_arr))

logs = zip(samples,labels_arr,predictions,confidence)

result_arr = []
for sample,label,pred,conf in logs:
    conf = format(conf, ".7f")
    result_arr.append({"sample":sample,"lable":label,"pred":pred,"conf":conf})
print(result_arr,len(result_arr))


import json
filepath = "log.json"
with open(filepath, 'w') as file:
    # 将数组写入文件
    json.dump(result_arr, file,ensure_ascii=False,indent=4)
    file.write("\n")

------------------------------X shape and y shape------------------------------
batch_X shape: {'input_ids': torch.Size([32, 378]), 'attention_mask': torch.Size([32, 378])}
batch_y shape: torch.Size([32])
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1        | 1.00
| 29896 | 1    

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [71]:
# 准确率 
print(f"test Accuracy: {(100*correct):>0.1f}%")
# 召回率
print(f"recall:{recall}")
# F1值
print(f"f1:{2 *recall*correct /(recall + correct)}")

test Accuracy: 98.1%
recall:0.9924242424242424
f1:0.9869147315569807


In [82]:
# 时间信息
print(len(time_arr))
import numpy as np
time_mean = np.mean(time_arr)
time_standard_deviation = np.std(time_arr)
print("一批次样本检测时间平均值：", time_mean)
print("一批次样本检测时间标准差：", time_standard_deviation)

301
一批次样本检测时间平均值： 6.8790952192984545
一批次样本检测时间标准差： 2.108515250699051


In [83]:
# 正样本检测置信度的平均值和标准差，负样本检测置信度的平均值和标准差
positive_confidence = [confidence[idx] for idx, pred in enumerate(predictions) if pred == 1]
negtive_confidence = [confidence[idx] for idx, pred in enumerate(predictions) if pred == 0]
# print(len(positive_confidence))
# print(len(negtive_confidence))
# print(len(positive_confidence) + len(negtive_confidence))

positive_mean = np.mean(positive_confidence)
positive_standard_deviation = np.std(positive_confidence)
print("正平均值：", positive_mean)
print("正标准差：", positive_standard_deviation)

negtive_mean = np.mean(negtive_confidence)
negtive_standard_deviation = np.std(negtive_confidence)
print("负平均值：", negtive_mean)
print("负标准差：", negtive_standard_deviation)

正平均值： 0.9853559
正标准差： 0.06417833
负平均值： 0.97711635
负标准差： 0.07341936
