In [None]:
!pip install langchain 
!pip install openai 
!pip install cohere 
!pip install faiss-cpu 
!pip install tiktoken 
!pip install pypdf 
!pip install sentence_transformers

In [None]:
import os
from getpass import getpass
#
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank

#### 文档加载和分割

In [None]:
pdf_folder_path = "./Documenation"
loader = PyPDFDirectoryLoader(pdf_folder_path)
docs = loader.load()
print(len(docs))
print(docs[1].page_content)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
texts = text_splitter.split_documents(docs)
print(len(texts))

#### 加载向量化模型，本地m3e-base模型

In [None]:
model_name = "/home/work/var/data/ssr-share-data/m3e-base/"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cpu'},
    encode_kwargs=encode_kwargs
)

#### 利用FAISS进行向量检索器

In [None]:
vectorstore = FAISS.from_documents(texts, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 20})

query = "what is Controlled Text Generation?"
docs = retriever.get_relevant_documents(query)
pretty_print_docs(docs)

#### 本地加载模型，构建推理llm

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# device = torch.device("cuda:2")
model_name_or_path = "/home/work/var/data/ssr-share-data/chatglm3-6b"

model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto", trust_remote_code=True).eval()
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, trust_remote_code=True)

from langchain.llms import HuggingFacePipeline
from transformers import pipeline

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.1,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)


llm = HuggingFacePipeline(pipeline=pipe)

#### 直接进行检索生成的结果和耗时情况

In [None]:
qa = RetrievalQA.from_chain_type(llm=llm,
                                 chain_type="stuff",
                                 retriever=retriever)

%%time
print(qa.run(query=query))

#### 进行检索重排、压缩后的结果和耗时情况

In [None]:
os.environ["COHERE_API_KEY"] = getpass("6seYkmQCuJGxLiUZzEnPVXgLi118QvPGxbjw1tfL")
compressor = CohereRerank()
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.get_relevant_documents(query)
print(compressed_docs)

qa = RetrievalQA.from_chain_type(llm=llm,
                                 chain_type="stuff",
                                 retriever=compression_retriever )

%%time
print(qa.run(query=query))