In [1]:
import os
import re
# os.environ["CUDA_VISIBLE_DEVICES"] = "6"
import torch
from transformers import AutoModel, AutoTokenizer
import gradio as gr
import mdtex2html
import platform
from transformers import AutoTokenizer, AutoModel
from utility.utils import config_dict
from utility.loggers import logger
from sentence_transformers import util
from local_database import db_operate
from utils import obtain_sql, retrieval_related_table, execute_sql
from prompt import query_template, chatbot_prompt

# RAG
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain import LLMChain
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.llms.base import LLM
from transformers import AutoTokenizer, AutoModel, AutoConfig
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
from torch.mps import empty_cache
import torch
from langchain.chains import RetrievalQA


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class GLM(LLM):
    max_token: int = 2048
    temperature: float = 0.8
    top_p = 0.9
    tokenizer: object = None
    model: object = None
    history_len: int = 1024
    
    def __init__(self):
        super().__init__()
        
    @property
    def _llm_type(self) -> str:
        return "GLM"
            
    def load_model(self, llm_device="gpu",model_name_or_path=None):
        model_config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,trust_remote_code=True)
        self.model = AutoModel.from_pretrained(model_name_or_path, config=model_config, trust_remote_code=True, device='cuda:5').half() # GLM模块装在gpu: 6



    def _call(self,prompt:str,history:List[str] = [],stop: Optional[List[str]] = None):
        response, _ = self.model.chat(
                    self.tokenizer,prompt,
                    history=history[-self.history_len:] if self.history_len > 0 else [],
                    max_length=self.max_token,temperature=self.temperature,
                    top_p=self.top_p)
        return response

In [3]:

loader = TextLoader("/data1/dxw_data/SAFE/future_media_llm/GLMSQL/data/daily.txt", encoding="utf-8")
documents = loader.load()


In [4]:
# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# create the open-source embedding function
model_kwargs = {'device': 'cuda:6'}  # embedding模块装在gpu: 7
embedding_function = HuggingFaceEmbeddings(model_name='/data1/dxw_data/llm/text2vec-large-chinese',model_kwargs=model_kwargs) # 会报错“No sentence-transformers model found”但是不影响使用,这只是huggingface的检测问题。

# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)



Created a chunk of size 104, which is longer than the specified 100
Created a chunk of size 107, which is longer than the specified 100
Created a chunk of size 142, which is longer than the specified 100
Created a chunk of size 135, which is longer than the specified 100
No sentence-transformers model found with name /data1/dxw_data/llm/text2vec-large-chinese. Creating a new one with MEAN pooling.


In [5]:
import sys
modelpath = "/data1/dxw_data/llm/chatglm3-6b"
sys.path.append(modelpath)
llm = GLM()
llm.load_model(model_name_or_path = modelpath)
tokenizer=llm.tokenizer
model=llm.model
#---------------------------至此, 成功加载模型

  return self.fget.__get__(instance, owner)()
Loading checkpoint shards: 100%|██████████| 7/7 [00:10<00:00,  1.43s/it]


In [6]:

def parse_text(text):
    """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
    lines = text.split("\n")
    lines = [line for line in lines if line != ""]
    count = 0
    for i, line in enumerate(lines):
        if "```" in line:
            count += 1
            items = line.split('`')
            if count % 2 == 1:
                lines[i] = f'<pre><code class="language-{items[-1]}">'
            else:
                lines[i] = f'<br></code></pre>'
        else:
            if i > 0:
                if count % 2 == 1:
                    line = line.replace("`", "\`")
                    line = line.replace("<", "&lt;")
                    line = line.replace(">", "&gt;")
                    line = line.replace(" ", "&nbsp;")
                    line = line.replace("*", "&ast;")
                    line = line.replace("_", "&lowbar;")
                    line = line.replace("-", "&#45;")
                    line = line.replace(".", "&#46;")
                    line = line.replace("!", "&#33;")
                    line = line.replace("(", "&#40;")
                    line = line.replace(")", "&#41;")
                    line = line.replace("$", "&#36;")
                lines[i] = "<br>"+line
    text = "".join(lines)
    print(text)
    return text

def predict(input, history):
    max_length = 2048
    top_p = 0.7
    temperature = 0.2
    dboperate = db_operate(config_dict['db_path'])
    input_prompt = chatbot_prompt
    input_prompt = retrieval_related_table(input_prompt, input, history, top_k=3)
    input_prompt += query_template
    query = input_prompt.replace("<user_input>", input)
    
    response, history = model.chat(tokenizer, query, history=history, max_length=max_length, top_p=top_p, temperature=temperature)
    
    response = parse_text(response)
    response = obtain_sql(response)
    
    chatbot = [("", "")]  # 初始化 chatbot 列表并添加一个空条目
    print(f"Debug: Initial chatbot: {chatbot}")  # 打印初始 chatbot 列表
    chatbot = execute_sql(response, chatbot, dboperate)
    
    return response, history



In [17]:
user_input = "请帮我查询是否有人群斗殴事件"

# 使用RAG检索VUCA数据库增强专家知识
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
query = f"根据文档内容,找到问题({user_input})中的同义词。如果没有同义词，就概括类似的事件，提炼出几个同义词。直接回复同义词" # 根据业务逻辑
result = qa({"query": query})
print(result["result"])

问题中的同义词有：激烈的打斗、混战、激烈的争斗、拳脚相向。类似的事件包括：在小巷子里，小明和一群人扭打在一起，场面变得混乱而激烈；小明与一群人发生激烈的争斗，场面变得混乱；小明与一群人激烈的打斗，场面变得混乱。


In [26]:

context = "请精简下列的回答，仅仅输出几个词语总结："+result["result"]
# print(context)
response1, history = model.chat(tokenizer, context, history=[])
print(response1)

激烈的打斗、混战、激烈的争斗、拳脚相向。混乱场面:小明与一群人扭打、发生激烈的争斗、激烈的打斗。


In [31]:
context = "请你根据问题："+user_input +f"。  并根据同义词：{response1}"+"  重新归纳出一个全面的问题，覆盖原问题核心词和同义词。请直接输出最终问题"
# print(context)
response, history = model.chat(tokenizer, context, history=[])
print(response)

请问是否有人群斗殴事件发生，包括激烈的打斗、混战、激烈的争斗和拳脚相向等混乱场面，如小明与一群人扭打、发生激烈的争斗和激烈的打斗等。


In [30]:

# 模拟用户输入和历史记录
user_input = response
history = []

# 调用predict函数
response, history = predict(user_input, history)

# 打印response
print(response)

SELECT COUNT(DISTINCT event_type) FROM event_record WHERE glm_output LIKE '%激烈的打斗%' OR glm_output LIKE '%混战%' OR glm_output LIKE '%激烈的争斗%' OR glm_output LIKE '%拳脚相向%';
Debug: Initial chatbot: [('', '')]
Debug: Response in execute_sql: SELECT COUNT(DISTINCT event_type) FROM event_record WHERE glm_output LIKE '%激烈的打斗%' OR glm_output LIKE '%混战%' OR glm_output LIKE '%激烈的争斗%' OR glm_output LIKE '%拳脚相向%';
Debug: Chatbot before SQL execution: [('', '')]
Debug: Chatbot before updating: [('', '')]
SELECT COUNT(DISTINCT event_type) FROM event_record WHERE glm_output LIKE '%激烈的打斗%' OR glm_output LIKE '%混战%' OR glm_output LIKE '%激烈的争斗%' OR glm_output LIKE '%拳脚相向%';
