In [1]:
# !pip install -qU langchain langchain-community langchain_chroma langchain_openai langchain_cohere langchain-text-splitters pypdf chromadb
# !pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
# !pip install -U 'optimum[openvino,nncf]'
# !pip install sentence-transformers langchain-huggingface


In [2]:
from langchain_community.document_loaders import PyPDFLoader
# from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import ChatOllama
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate


In [3]:
# embeddings = OllamaEmbeddings(model='mxbai-embed-large', base_url='http://localhost:11434')
model_name = "mixedbread-ai/mxbai-embed-large-v1"
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
)
# llm = ChatOllama(model="llama3.2")
llm = ChatOllama(model="qwen2.5")

  from tqdm.autonotebook import tqdm, trange


In [4]:
# 初始化 RecursiveCharacterTextSplitter 类的实例
# 该类用于将文本递归分割成更小的块，以便处理长文档
text_splitter = RecursiveCharacterTextSplitter(
    # 每个块的大小，用于控制分割后的文本块长度
    chunk_size=3000,
    # 块的重叠部分大小，用于在分割时保持上下文连贯性
    chunk_overlap=200,
    # 长度计算函数，这里使用 Python 内置的 len 函数计算字符数
    length_function=len,
    # 是否使用正则表达式作为分隔符，这里设置为 False
    is_separator_regex=False,
)

In [5]:
loader = PyPDFLoader("./books/nio-q2-2024.pdf")
split_documents = loader.load_and_split(text_splitter=text_splitter)

In [6]:
len(split_documents)

21

In [7]:

DOCUMENT_CONTEXT_PROMPT = """

{doc_content}

"""

CHUNK_CONTEXT_PROMPT = """
Here is the chunk we want to situate within the whole document

{chunk_content}


Please give a short succinct context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk.
Answer only with the succinct context and nothing else.
"""

In [8]:
from langchain_core.messages import (
    AIMessage,
    HumanMessage,
    SystemMessage,
    merge_message_runs,
)

In [9]:
def process_documents(split_documents):
    all_content_in_batch = "\n".join([doc.page_content for doc in split_documents])

    # Create the system message
    system_message = SystemMessage([
        {"type": "text", "text": DOCUMENT_CONTEXT_PROMPT.format(doc_content=all_content_in_batch), "cache_control": {"type": "ephemeral"}}
    ])

    for current_doc in split_documents:
        messages = [
            system_message,
            HumanMessage([
                {"type": "text", "text": CHUNK_CONTEXT_PROMPT.format(chunk_content=current_doc.page_content)}
            ])
        ]

        # Invoke the LLM
        response = llm.invoke(messages)
        print(response.content)

        # Update the document's content
        current_doc.page_content = response.content + "\n\n" + current_doc.page_content

In [10]:

# Usage
process_documents(split_documents)

This chunk provides NIO Inc.'s unaudited financial results for the second quarter of 2024, including vehicle deliveries, revenues, margins, and other key financial metrics. The report compares these figures to those from previous quarters and years.
This financial data pertains to the second quarter of 2024, comparing it with the first quarter of 2024 and the second quarter of 2023. It includes vehicle sales and margin figures, as well as cash reserves.
This financial and operational update covers NIO's revenues, profit margins, and losses from operations for a specific period, along with details on recent deliveries, strategic initiatives like the "Power Up Counties" plan, share issuance for incentive plans, and comments from the CEO regarding market performance and technological advancements.
This financial report excerpt discusses NIO's performance for the second quarter of 2024, focusing on revenues, vehicle sales, and cost optimizations. It highlights William Bin Li's emphasis on 

In [11]:
vectorstore = Chroma.from_documents(
    split_documents,
    embedding=embeddings
)
     

In [12]:

retriever = vectorstore.as_retriever()

In [13]:
from langchain_community.document_compressors.openvino_rerank import OpenVINOReranker





In [14]:
rerank_model = 'mixedbread-ai/mxbai-rerank-large-v1'

In [15]:
compressor = OpenVINOReranker(model_name_or_path=rerank_model, 
                              top_n=4,
                            )
                            #   model_kwargs={"device": "cuda"})

Framework not specified. Using pt to export the model.
Using framework PyTorch: 2.4.1+cu121
  op1 = operator(*args, **kwargs)
  op1 = operator(*args, **kwargs)
  if key_layer.size(-2) != query_layer.size(-2):
Compiling the model to CPU ...


In [16]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever

In [17]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)
     

In [18]:
from langchain import hub
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [19]:
prompt = hub.pull("rlm/rag-prompt")



     



In [20]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [21]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": compression_retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [22]:
res = rag_chain.invoke("What is the vehicle sales?")

In [23]:
print(res)

Vehicle sales for the second quarter of 2024 were RMB15,679.6 million, representing an increase of 118.2% from the second quarter of 2023 and an increase of 87.1% from the first quarter of 2024.


In [24]:
# llama3.2
"""
Vehicle sales refer to new vehicles sold during a specific period, typically measured in units or revenue. In this case, NIO's vehicle sales for Q2 2024 were RMB15,679.6 million (US$2,157.6 million). This represents an increase of 87.1% from the first quarter of 2023 and 118.2% from the second quarter of 2023.
"""


# qwen2.5
"""
\nVehicle sales refer to new vehicles sold during a specific period, typically measured in units or revenue. In this case, NIO's vehicle sales for Q2 2024 were RMB15,679.6 million (US$2,157.6 million). This represents an increase of 87.1% from the first quarter of 2023 and 118.2% from the second quarter of 2023.\n
"""

"\nVehicle sales refer to new vehicles sold during a specific period, typically measured in units or revenue. In this case, NIO's vehicle sales for Q2 2024 were RMB15,679.6 million (US$2,157.6 million). This represents an increase of 87.1% from the first quarter of 2023 and 118.2% from the second quarter of 2023.\n"