In [7]:
import gradio as gr
from typing import Optional, List, Any
import torch
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms.base import LLM
from langchain.callbacks.manager import CallbackManagerForLLMRun
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, TextIteratorStreamer, AutoProcessor
# from modelscope import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from langchain_core.outputs.generation import GenerationChunk
from torch import device
import time
from langchain.memory import ConversationBufferMemory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.chains.history_aware_retriever import create_history_aware_retriever

from langchain.chains.retrieval import create_retrieval_chain
from langchain_core.runnables import RunnableWithMessageHistory
from langchain.chains.combine_documents import create_stuff_documents_chain

from torch.nn.parallel import DistributedDataParallel as DDP
from threading import Thread
import warnings
warnings.filterwarnings("ignore")

In [8]:
device = torch.device("cuda")

In [9]:
# 构建模型
class QianWenLLM(LLM):
    # 基于本地的QianWen7B-Chat模型自定义LLM类
    tokenizer: AutoTokenizer = None
    model: AutoModelForCausalLM = None
    processor: AutoProcessor = None
    
    def __init__(self, model_dir: str):
        # 从本地加载模型
        super().__init__()
        print('正从本地加载模型。。。。。')

        self.tokenizer = AutoTokenizer.from_pretrained(
            pretrained_model_name_or_path=model_dir,
            trust_remote_code=True,
        )
        self.model = AutoModelForCausalLM.from_pretrained(
            pretrained_model_name_or_path=model_dir,
            # device_map='balanced_low_0',
            trust_remote_code=True,
            # torch_dtype=torch.bfloat16,
            # temperature=0.1
            ).to(device)
        self.model = DDP(self.model.eval())
        self.model.generation_config = GenerationConfig.from_pretrained(
            model_dir,
            trust_remote_code=True
        )
        # 可指定不同的生成长度、top_p等相关超参
        self.processor = AutoProcessor.from_pretrained(model_dir)
        print('模型加载完成！')

    def _call(self, question,
              stop: Optional[List[str]] = None,
              run_manager: Optional[CallbackManagerForLLMRun] = None,
              **kwargs: Any):
        # response, history = self.model.chat(self.tokenizer, prompt, history=[])
        template = f"""
        你是一名实体提取和意图识别分类领域专家，请严格遵循以下任务和工作流程的指示输出结果。
        
        任务：
        1-判断用户问题是否存在实体。
        2-抽取用户问题所有实体。
        3-根据实体与给出的意图标签进行判定该用户问题的意图。
        
        意图标签：政策知识，日常知识，招中标知识
        
        工作流程：
        1.-先判断是否存在实体，不存在实体则直接根据不存在实体输出格式输出，存在实体则继续以下工作流程，并通过存在实体输出格式输出。
        2-实体提取：请从用户问题中提取出所有实体。
        3-意图分类：请根据第2点提取的实体以及意图标签，进行意图识别并分类用户问题。
        
        不存在实体输出格式：
            实体提取:[]
            意图分类:日常知识
            
        存在实体输出格式：
            实体提取：[实体1, 实体2, ...]
            意图分类：意图标签
            
        有用的回答：
       """
        messages = [
                    {'role':'system', 'content': template},
                    {'role':'user', 'content': question}
                    ]
        # print('打印messages内容：', messages)
        # print('打印messages类型：', type(messages))
        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        model_inputs = self.tokenizer([text], return_tensors='pt').to(device)
        generated_ids = self.model.generate(
            **model_inputs,
            max_new_tokens=512
        )
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]
        response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        target = response.split('意图分类')[-1]
        def isChinese(target):
            for ch in target: 
                if not '\u4e00' <= ch <= '\u9fff':
                    return False
            return True 
        if not isChinese(target):
            target=re.sub('[^\u4e00-\u9fa5]+','',target)
            # print('意图识别存在非中文字符，调整后：', target)
        # print('意图分类:', target)
        # print('response, messages', response, '\n', messages)
        return target
        
    @property
    def _llm_type(self) -> str:
        return "QwenLM"

In [10]:
# 加载模型
def init_model():
    # 初始化模型
    # model_cache_path = r'autodl-tmp/Qwen/Qwen-7B-Chat'
    model_cache_path = r'autodl-tmp/Qwen/Qwen2.5-7B-Instruct'
    llm = QianWenLLM(model_dir=model_cache_path)
    return llm

In [11]:
# 加载数据
import pandas as pd
import csv
import os
import re
from tqdm import tqdm
# 加载数据文本，提取问题和真实数据源
test_res = []
qa_data = pd.read_excel('问答对.xlsx').iloc[:, 2:]
length = len(qa_data.iloc[0, :])
history = []

# 初始化模型
model = init_model()

file = 'qwen2.5模型意图测试结果.csv'
# data_res = pd.DataFrame([], columns=['Question','Answer', 'reference', 'Metadata', 'Text', '意图分类','问题改写', '回答', 'RAG召回'])
if os.path.exists(file):
    data_tested = pd.read_csv(file, encoding='utf-8')['Question'].tolist()
else:
    data_tested = []
for q in tqdm(range(len(qa_data))):
    question = qa_data.iloc[q, 0]
    
    if question not in data_tested:
        # print('打印问题:', question)
        target = model._call(question)
        data_ = qa_data.iloc[q].values.tolist() + [target]
        with open(file, 'a', newline='', encoding='utf-8') as f:
            fw = csv.writer(f)
            if not os.path.getsize(file):
                header = ['Question', 'Answer', 'reference', 'Metadata', 'Text', '意图分类']
                fw.writerow(header)
            fw.writerow(data_)
    #         # 写入标题行
    # writer.writerow(headers)
    # data_.to_excel('qwen2.5模型测试结果.xlsx', index=False)



正从本地加载模型。。。。。


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.03 GiB (GPU 0; 23.68 GiB total capacity; 23.10 GiB already allocated; 190.69 MiB free; 23.24 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF