In [1]:

# import
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.embeddings import HuggingFaceEmbeddings

from langchain import LLMChain
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.llms.base import LLM
from transformers import AutoTokenizer, AutoModel, AutoConfig
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
from torch.mps import empty_cache
import torch
from langchain.chains import RetrievalQA


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class GLM(LLM):
    max_token: int = 2048
    temperature: float = 0.8
    top_p = 0.9
    tokenizer: object = None
    model: object = None
    history_len: int = 1024
    
    def __init__(self):
        super().__init__()
        
    @property
    def _llm_type(self) -> str:
        return "GLM"
            
    def load_model(self, llm_device="gpu",model_name_or_path=None):
        model_config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,trust_remote_code=True)
        self.model = AutoModel.from_pretrained(model_name_or_path, config=model_config, trust_remote_code=True, device='cuda:5').half() # GLM模块装在gpu: 6



    def _call(self,prompt:str,history:List[str] = [],stop: Optional[List[str]] = None):
        response, _ = self.model.chat(
                    self.tokenizer,prompt,
                    history=history[-self.history_len:] if self.history_len > 0 else [],
                    max_length=self.max_token,temperature=self.temperature,
                    top_p=self.top_p)
        return response

In [3]:
# 访谈记录的原始文本加载

loader = TextLoader("/data1/dxw_data/llm/mkt_llm/mkt_medicine/input.txt", encoding="utf-8")
documents = loader.load()


In [5]:
# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# create the open-source embedding function
model_kwargs = {'device': 'cuda:6'}  # embedding模块装在gpu: 7
embedding_function = HuggingFaceEmbeddings(model_name='/data1/dxw_data/llm/text2vec-large-chinese',model_kwargs=model_kwargs) # 会报错“No sentence-transformers model found”但是不影响使用,这只是huggingface的检测问题。

# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)



Created a chunk of size 109, which is longer than the specified 100
Created a chunk of size 336, which is longer than the specified 100
Created a chunk of size 450, which is longer than the specified 100
Created a chunk of size 419, which is longer than the specified 100
Created a chunk of size 248, which is longer than the specified 100
Created a chunk of size 103, which is longer than the specified 100
Created a chunk of size 105, which is longer than the specified 100
Created a chunk of size 179, which is longer than the specified 100
Created a chunk of size 161, which is longer than the specified 100
Created a chunk of size 229, which is longer than the specified 100
Created a chunk of size 396, which is longer than the specified 100
Created a chunk of size 385, which is longer than the specified 100
Created a chunk of size 192, which is longer than the specified 100
Created a chunk of size 777, which is longer than the specified 100
Created a chunk of size 168, which is longer tha

In [6]:
import sys
modelpath = "/data1/dxw_data/llm/chatglm3-6b"
sys.path.append(modelpath)
llm = GLM()
llm.load_model(model_name_or_path = modelpath)
#---------------------------至此, 成功加载模型

Loading checkpoint shards: 100%|██████████| 7/7 [00:54<00:00,  7.79s/it]


In [6]:
# ----------------------------直接提问，不使用数据库------------------------ #

In [7]:
from langchain.chains import LLMChain
prompt = PromptTemplate(
    input_variables=["env"],
    template="在{env}疾病，心情如何?",
)

chain = LLMChain(llm=llm, prompt=prompt)
print(chain.run("肺癌"))
 

  warn_deprecated(
  warn_deprecated(


肺癌是一种严重的疾病,可能会对患者的心情产生负面影响。患者可能会感到焦虑、恐惧、抑郁、沮丧、愤怒等情绪。这些情绪可能会因为肺癌的诊断、治疗过程和预后等因素而变得更加严重。

在治疗肺癌时,患者可能会经历手术、化疗、放疗、靶向治疗等不同的治疗方式。这些治疗方式可能会对患者的身体和心理健康产生影响,例如恶心、呕吐、脱发、疲劳、疼痛等。这些症状可能会影响患者的心情,使得他们更加容易感到沮丧、焦虑和沮丧。

肺癌的预后可能会对患者的情绪产生影响。如果患者被诊断出晚期肺癌,他们可能会感到绝望和无助,难以面对死亡的威胁。如果患者被诊断出早期肺癌,他们可能会感到欣喜和希望,但也可能会担心复发和治疗后的副作用。

因此,对于肺癌患者来说,保持积极的心情和态度是非常重要的。他们应该接受治疗,与医生和家庭成员保持良好的沟通,积极参与康复和治疗计划。他们还应该寻求专业心理治疗,以帮助应对情绪问题。


In [None]:
# ----------------------------使用RAG利用数据库检索增强------------------------ #

In [8]:
# 使用RAG检索VUCA数据库增强专家知识
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
query = "根据文档内容,请说明靶向药具体被如何使用?" # 根据业务逻辑
qa.run(query)

'靶向药被具体使用如下：\n\n1. 2023年11月-12月，患者使用埃克替尼进行靶向治疗。\n2. 2023年12月-2024年5月，患者使用贝福替尼进行靶向治疗。'