###### https://medium.com/the-ai-forum/implementing-contextual-retrieval-in-rag-pipeline-8f1bc7cbd5e0

In [50]:
import hashlib
import os
import getpass
from typing import List, Tuple
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate
from rank_bm25 import BM25Okapi
from langchain.retrievers import ContextualCompressionRetriever,BM25Retriever,EnsembleRetriever
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain_community.document_transformers.embeddings_redundant_filter import EmbeddingsRedundantFilter
from langchain.retrievers.document_compressors import FlashrankRerank
from langchain_community.embeddings import HuggingFaceEmbeddings
import time
import random
from transformers import AutoTokenizer

In [205]:
import os
from dotenv import load_dotenv
load_dotenv()

True

### Storage Class - FAISS

In [2]:
class MyFAISSStorage:
    def __init__(self, storage_path: str, embeddings):
        """
        Args:
            storage_path (str): Path to the directory where the FAISS index will be saved/loaded.
            embeddings: Embeddings model (e.g., OpenAIEmbeddings, SentenceTransformerEmbeddings).
        """
        self.storage_path = storage_path
        self.embeddings = embeddings
        self.vectorstore = None

    def _index_exists(self) -> bool:
        """
        Returns:
            bool: True if the index exists, False otherwise.
        """
        return os.path.exists(os.path.join(self.storage_path, "index.faiss"))

    def _load_or_create_index(self):
        if self._index_exists():
            print("Loading existing FAISS index...")
            self.vectorstore = FAISS.load_local(
                self.storage_path, self.embeddings)
        else:
            print("Creating new FAISS index...")
            self.vectorstore = FAISS.from_documents([], self.embeddings)

    def add_documents(self, documents: list[Document]):
        """
        Args:
            documents (list[Document]): List of Document objects to add to the index.
        """
        if self.vectorstore is None:
            self._load_or_create_index()

        print(f"Adding {len(documents)} documents to the FAISS index...")
        self.vectorstore.add_documents(documents)
        return self.vectorstore

    def save_index(self):
        if self.vectorstore is None:
            raise ValueError(
                "FAISS index not initialized. Add documents first.")

        print("Saving FAISS index...")
        self.vectorstore.save_local(self.storage_path)

    def query(self, query: str, k: int = 5) -> list[Document]:
        """
        Args:
            query (str): The query string.
            k (int): Number of documents to retrieve.

        Returns:
            list[Document]: List of Document objects most similar to the query.
        """
        if self.vectorstore is None:
            raise ValueError(
                "FAISS index not initialized. Add documents first.")

        print(f"Querying FAISS index for: '{query}'")
        return self.vectorstore.similarity_search(query, k=k)

##### Create a RAG pipeline

### RAG pipeline

In [238]:
class MyRAGPipeline:
    def __init__(self, ):
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=800,
            chunk_overlap=100,
        )
        #self.embeddings = OpenAIEmbeddings()

        model_name = "BAAI/bge-large-en-v1.5"
        model_kwargs = {'device': 'cpu'}
        encode_kwargs = {'normalize_embeddings': False}
        self.embeddings = HuggingFaceEmbeddings(
            model_name=model_name,
            model_kwargs=model_kwargs,
            encode_kwargs=encode_kwargs
        )
        # self.llm = ChatOpenAI(
        #     model="gpt-4o",
        #     temperature=0,
        #     max_tokens=None,
        #     timeout=None,
        #     max_retries=2,
        # )

        # self.llm = ChatGroq(
        #     model="llama-3.2-3b-preview",
        #     temperature=0,
        #     max_tokens=None,
        #     timeout=None,
        #     max_retries=2,
        # )

        self.llm = ChatGroq(
            model="mixtral-8x7b-32768",
            temperature=0,
            max_tokens=None,
            timeout=None,
            max_retries=2,
        )

        self.storage_class: MyFAISSStorage = MyFAISSStorage("./faiss_storage", self.embeddings)
        self.tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-3B-Instruct")
        # self.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
        # self.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

    
    def process_document(self, document: str) -> Tuple[List[Document], List[Document]]:
        chunks = self.text_splitter.create_documents([document])
        contextualized_chunks = self._generate_contextualized_chunks(document, chunks)
        return chunks, contextualized_chunks

    def split_document(self, document: str, max_retries: int = 1, delay: int = 60) -> Tuple[List[Document], List[Document]]:
        chunks = self.text_splitter.split_documents(document)
        print(f"Total number of chunks in document: {len(chunks)}")
        contextualized_chunks = self._generate_contextualized_chunks(document, chunks)
        return chunks, contextualized_chunks

    def _generate_contextualized_chunks(self, document: str, chunks: List[Document], max_retries: int = 1, delay: int = 60) -> List[Document]:
        contextualized_chunks = []
        for chunk in chunks:
            retries = 0
            while retries <= max_retries:
                try:
                    context = self._generate_context(document, chunk.page_content)
                    contextualized_content = f"{context}\n\n{chunk.page_content}"
                    contextualized_chunks.append(Document(page_content=contextualized_content, metadata=chunk.metadata))
                    break
                except Exception as e:
                    if "rate limit" in str(e).lower() or "exceeded" in str(e).lower() or "quota" in str(e).lower():
                        retries += 1
                        if retries > max_retries:
                            print(f"Max retries ({max_retries}) exceeded for chunk: {chunk.page_content[:50]}...")
                            raise e
                        delay_with_randomness = delay + random.random()
                        print(f"Rate limit error: {e}. Retrying chunk in {delay_with_randomness:.2f} seconds...")
                        time.sleep(delay_with_randomness)
                    else:
                        print(f"Error processing chunk: {chunk.page_content[:50]}... Error: {e}")
                        raise e
        return contextualized_chunks

    def _generate_context(self, document: str, chunk: str) -> str:
        relevant_document = self._extract_relevant_part(document, chunk)

        print(f"Length of the relevant document: {len(relevant_document)}")

        prompt = ChatPromptTemplate.from_template("""
        You are an AI assistant specializing in document analysis. Your task is to provide brief, relevant context for a chunk of text from the whitepaper report.
        Here is the whitepaper:
        <document>
        {document}
        </document>

        Here is the chunk we want to situate within the whole document::
        <chunk>
        {chunk}
        </chunk>

        Provide a concise context (2-3 sentences) for this chunk, considering the following guidelines:
        Please give a short succinct context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct context and nothing else.

        Context:
        """)
        messages = prompt.format_messages(document=relevant_document, chunk=chunk)
        response = self.llm.invoke(messages)
        return response.content

    def _extract_relevant_part(self, document: List[Document], chunk: str) -> str:
        """
        Extract a relevant part of the document for context generation.
        This reduces the number of tokens sent to the LLM.
        """
        # Find the position of the chunk in the document
        chunk_start = document[0].page_content.find(chunk)
        chunk_end = chunk_start + len(chunk)
        
        # Extract a window of text around the chunk (e.g., 2048 characters before and after)
        window_size = 2048
        start = max(0, chunk_start - window_size)
        end = min(len(document[0].page_content), chunk_end + window_size)
        
        return document[0].page_content[start:end]

    def _extract_relevant_part_with_maxtokens(self, document: List[Document], chunk: str, max_tokens: int = 2048) -> str:
        chunk_tokens = len(self.tokenizer.encode(chunk))
        
        # Calculate the maximum allowed tokens for context
        max_context_tokens = max_tokens - chunk_tokens
        
        # Estimate the number of characters per token (average is ~4 characters per token)
        chars_per_token = 4
        max_context_chars = max_context_tokens * chars_per_token
        
        # Find the position of the chunk in the document
        chunk_start = document[0].page_content.find(chunk)
        chunk_end = chunk_start + len(chunk)
        
        # Extract a window of text around the chunk
        window_size = min(max_context_chars, len(document[0].page_content))
        start = max(0, chunk_start - window_size // 2)
        end = min(len(document[0].page_content), chunk_end + window_size // 2)
        
        return document[0].page_content[start:end]

    def create_inmemory_vectorstores(self, chunks: List[Document]) -> FAISS:
        return FAISS.from_documents(chunks, self.embeddings)

    def create_vectorstores(self, chunks: List[Document]) -> FAISS:
        return self.storage_class.add_documents(chunks)

    def save_vectorstores(self):
        self.storage_class.save_index()

    def create_bm25_index(self, chunks: List[Document]) -> BM25Okapi:
        tokenized_chunks = [chunk.page_content.split() for chunk in chunks]
        return BM25Okapi(tokenized_chunks)
    
    def create_flashrank_index(self,vectorstore):
        retriever = vectorstore.as_retriever(search_kwargs={"k":20})
        compression_retriever = ContextualCompressionRetriever(base_compressor=FlashrankRerank(), base_retriever=retriever)
        return compression_retriever

    def create_bm25_retriever(self, chunks: List[Document]) -> BM25Retriever:
        bm25_retriever = BM25Retriever.from_documents(chunks)
        return bm25_retriever
    
    def create_ensemble_retriever_reranker(self, vectorstore, bm25_retriever) -> EnsembleRetriever:
        retriever_vs = vectorstore.as_retriever(search_kwargs={"k":20})
        bm25_retriever.k =10
        ensemble_retriever = EnsembleRetriever(
            retrievers=[retriever_vs, bm25_retriever],
            weights=[0.5, 0.5]
        )
        redundant_filter = EmbeddingsRedundantFilter(embeddings=self.embeddings)
        reranker = FlashrankRerank()
        pipeline_compressor = DocumentCompressorPipeline(transformers=[redundant_filter, reranker])
        compression_pipeline = ContextualCompressionRetriever(base_compressor=pipeline_compressor,
                                                      base_retriever=ensemble_retriever)
        return compression_pipeline

    @staticmethod
    def generate_cache_key(document: str) -> str:
        """
        Generate a cache key for a document.
        """
        return hashlib.md5(document.encode()).hexdigest()

    def generate_answer(self, query: str, relevant_chunks: List[str]) -> str:
        prompt = ChatPromptTemplate.from_template("""
        Based on the following information, please provide a concise and accurate answer to the question.
        If the information is not sufficient to answer the question, say so.

        Question: {query}

        Relevant information:
        {chunks}

        Answer:
        """)
        messages = prompt.format_messages(query=query, chunks="\n\n".join(relevant_chunks))
        response = self.llm.invoke(messages)
        return response.content

In [188]:
from llama_parse import LlamaParse
from langchain_community.document_loaders import UnstructuredMarkdownLoader

instruction = """The provided document is a PDF file containing structured and unstructured content.
It may include financial information, tables, management discussions, and analyses.
Try to capture the essence of the document, including text, tables, and key highlights.
Be precise and ensure data integrity while processing."""


async def parse_pdf(file_path: str):
  parser = LlamaParse(
      result_type="markdown",
      parsing_instruction=instruction,
      max_timeout=5000,
  )
  return await parser.aload_data(file_path)


async def load_and_combine_documents(folder_path: str, output_folder: str):
  for filename in os.listdir(folder_path):
    combined_content = ""
    file_path = os.path.join(folder_path, filename)
    if filename.endswith('.pdf'):
        print(f"Parsing {filename}...")
        parsed_data = await parse_pdf(file_path)
        combined_content += f"# Document: {filename}\n\n{parsed_data}\n\n"
    else:
        print(f"Unsupported file type: {filename}")
    output_file = output_folder + "/" + os.path.splitext(filename)[0] + ".md"
    with open(output_file, "w", encoding="utf-8") as md_file:
        md_file.write(combined_content)
    print(f"All documents combined into {output_file}")


def read_markdown_with_loader(folder_path: str):
  documents = []
  for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    if filename.endswith('.md'):
      loader = UnstructuredMarkdownLoader(file_path)
      documents.append(loader.load())
  return documents

##### Insatiate RAG Pipeline

In [None]:
folder_path = "./dataset/source"
output_folder = "./dataset/converted_md"
await load_and_combine_documents(folder_path, output_folder)

In [189]:
documents = read_markdown_with_loader(output_folder)

In [239]:
my_rag = MyRAGPipeline()

##### Sample Document

In [7]:
# Example financial document (approximately 5000 words, keyword-rich)
document_old = """
    Tesla, Inc. (TSLA) Financial Analysis and Market Overview - Q3 2023

    Executive Summary:
    Tesla, Inc. (NASDAQ: TSLA) continues to lead the electric vehicle (EV) market, showcasing strong financial performance and strategic growth initiatives in Q3 2023. This comprehensive analysis delves into Tesla's financial statements, market position, and future outlook, providing investors and stakeholders with crucial insights into the company's performance and potential.

    1. Financial Performance Overview:

    Revenue:
    Tesla reported total revenue of $23.35 billion in Q3 2023, marking a 9% increase year-over-year (YoY) from $21.45 billion in Q3 2022. The automotive segment remained the primary revenue driver, contributing $19.63 billion, up 5% YoY. Energy generation and storage revenue saw significant growth, reaching $1.56 billion, a 40% increase YoY.

    Profitability:
    Gross profit for Q3 2023 stood at $4.18 billion, with a gross margin of 17.9%. While this represents a decrease from the 25.1% gross margin in Q3 2022, it remains above industry averages. Operating income was $1.76 billion, resulting in an operating margin of 7.6%. Net income attributable to common stockholders was $1.85 billion, translating to diluted earnings per share (EPS) of $0.53.

    Cash Flow and Liquidity:
    Tesla's cash and cash equivalents at the end of Q3 2023 were $26.08 billion, a robust position that provides ample liquidity for ongoing operations and future investments. Free cash flow for the quarter was $0.85 billion, reflecting the company's ability to generate cash despite significant capital expenditures.

    2. Operational Highlights:

    Production and Deliveries:
    Tesla produced 430,488 vehicles in Q3 2023, a 17% increase YoY. The Model 3/Y accounted for 419,666 units, while the Model S/X contributed 10,822 units. Total deliveries reached 435,059 vehicles, up 27% YoY, demonstrating strong demand and improved production efficiency.

    Manufacturing Capacity:
    The company's installed annual vehicle production capacity increased to over 2 million units across its factories in Fremont, Shanghai, Berlin-Brandenburg, and Texas. The Shanghai Gigafactory remains the highest-volume plant, with an annual capacity exceeding 950,000 units.

    Energy Business:
    Tesla's energy storage deployments grew by 90% YoY, reaching 4.0 GWh in Q3 2023. Solar deployments also increased by 48% YoY to 106 MW, reflecting growing demand for Tesla's energy products.

    3. Market Position and Competitive Landscape:

    Global EV Market Share:
    Tesla maintained its position as the world's largest EV manufacturer by volume, with an estimated global market share of 18% in Q3 2023. However, competition is intensifying, particularly from Chinese manufacturers like BYD and established automakers accelerating their EV strategies.

    Brand Strength:
    Tesla's brand value continues to grow, ranked as the 12th most valuable brand globally by Interbrand in 2023, with an estimated brand value of $56.3 billion, up 4% from 2022.

    Technology Leadership:
    The company's focus on innovation, particularly in battery technology and autonomous driving capabilities, remains a key differentiator. Tesla's Full Self-Driving (FSD) beta program has expanded to over 800,000 customers in North America, showcasing its advanced driver assistance systems.

    4. Strategic Initiatives and Future Outlook:

    Product Roadmap:
    Tesla reaffirmed its commitment to launching the Cybertruck in 2023, with initial deliveries expected in Q4. The company also hinted at progress on a next-generation vehicle platform, aimed at significantly reducing production costs.

    Expansion Plans:
    Plans for a new Gigafactory in Mexico are progressing, with production expected to commence in 2025. This facility will focus on producing Tesla's next-generation vehicles and expand the company's North American manufacturing footprint.

    Battery Production:
    Tesla continues to ramp up its in-house battery cell production, with 4680 cells now being used in Model Y vehicles produced at the Texas Gigafactory. The company aims to achieve an annual production rate of 1,000 GWh by 2030.

    5. Risk Factors and Challenges:

    Supply Chain Constraints:
    While easing compared to previous years, supply chain issues continue to pose challenges, particularly in sourcing semiconductor chips and raw materials for batteries.

    Regulatory Environment:
    Evolving regulations around EVs, autonomous driving, and data privacy across different markets could impact Tesla's operations and expansion plans.

    Macroeconomic Factors:
    Rising interest rates and inflationary pressures may affect consumer demand for EVs and impact Tesla's profit margins.

    Competition:
    Intensifying competition in the EV market, especially in key markets like China and Europe, could pressure Tesla's market share and pricing power.

    6. Financial Ratios and Metrics:

    Profitability Ratios:
    - Return on Equity (ROE): 18.2%
    - Return on Assets (ROA): 10.3%
    - EBITDA Margin: 15.7%

    Liquidity Ratios:
    - Current Ratio: 1.73
    - Quick Ratio: 1.25

    Efficiency Ratios:
    - Asset Turnover Ratio: 0.88
    - Inventory Turnover Ratio: 11.2

    Valuation Metrics:
    - Price-to-Earnings (P/E) Ratio: 70.5
    - Price-to-Sales (P/S) Ratio: 7.8
    - Enterprise Value to EBITDA (EV/EBITDA): 41.2

    7. Segment Analysis:

    Automotive Segment:
    - Revenue: $19.63 billion (84% of total revenue)
    - Gross Margin: 18.9%
    - Key Products: Model 3, Model Y, Model S, Model X

    Energy Generation and Storage:
    - Revenue: $1.56 billion (7% of total revenue)
    - Gross Margin: 14.2%
    - Key Products: Powerwall, Powerpack, Megapack, Solar Roof

    Services and Other:
    - Revenue: $2.16 billion (9% of total revenue)
    - Gross Margin: 5.3%
    - Includes vehicle maintenance, repair, and used vehicle sales

    8. Geographic Revenue Distribution:

    - United States: $12.34 billion (53% of total revenue)
    - China: $4.67 billion (20% of total revenue)
    - Europe: $3.97 billion (17% of total revenue)
    - Other: $2.37 billion (10% of total revenue)

    9. Research and Development:

    Tesla invested $1.16 billion in R&D during Q3 2023, representing 5% of total revenue. Key focus areas include:
    - Next-generation vehicle platform development
    - Advancements in battery technology and production processes
    - Enhancements to Full Self-Driving (FSD) capabilities
    - Energy storage and solar technology improvements

    10. Capital Expenditures and Investments:

    Capital expenditures for Q3 2023 totaled $2.46 billion, primarily allocated to:
    - Expansion and upgrades of production facilities
    - Tooling for new products, including the Cybertruck
    - Supercharger network expansion
    - Investments in battery cell production capacity

    11. Debt and Capital Structure:

    As of September 30, 2023:
    - Total Debt: $5.62 billion
    - Total Equity: $43.51 billion
    - Debt-to-Equity Ratio: 0.13
    - Weighted Average Cost of Capital (WACC): 8.7%

    12. Stock Performance and Shareholder Returns:

    - 52-Week Price Range: $152.37 - $299.29
    - Market Capitalization: $792.5 billion (as of October 31, 2023)
    - Dividend Policy: Tesla does not currently pay dividends, reinvesting profits into growth initiatives
    - Share Repurchases: No significant share repurchases in Q3 2023

    13. Corporate Governance and Sustainability:

    Board Composition:
    Tesla's Board of Directors consists of 8 members, with 6 independent directors. The roles of CEO and Chairman are separate, with Robyn Denholm serving as Chairwoman.

    ESG Initiatives:
    - Environmental: Committed to using 100% renewable energy in all operations by 2030
    - Social: Focus on diversity and inclusion, with women representing 29% of the global workforce
    - Governance: Enhanced transparency in supply chain management and ethical sourcing of materials

    14. Analyst Recommendations and Price Targets:

    As of October 31, 2023:
    - Buy: 22 analysts
    - Hold: 15 analysts
    - Sell: 5 analysts
    - Average 12-month price target: $245.67

    15. Upcoming Catalysts and Events:

    - Cybertruck production ramp-up and initial deliveries (Q4 2023)
    - Investor Day 2024 (Date TBA)
    - Potential unveiling of next-generation vehicle platform (2024)
    - Expansion of FSD beta program to additional markets

    Conclusion:
    Tesla's Q3 2023 financial results demonstrate the company's continued leadership in the EV market, with strong revenue growth and operational improvements. While facing increased competition and margin pressures, Tesla's robust balance sheet, technological innovations, and expanding product portfolio position it well for future growth. Investors should monitor key metrics such as production ramp-up, margin trends, and progress on strategic initiatives to assess Tesla's long-term value proposition in the rapidly evolving automotive and energy markets.
    """

##### Process the document

In [191]:
tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-3B-Instruct")
tpm_limit = 7000

In [240]:
def get_document_chunks(document, max_retries=1, delay=60):
    details = {}
    source_document = document[0].metadata["source"]
    details["source"] = source_document
    details["original_chunks"], details["contextualized_chunks"] = my_rag.split_document(document, max_retries, delay)
    return details

In [241]:
def process_document(document, documents_chunk, retry_document_processing, current_tokens, delay=60):
  try:
    source_document = document[0].metadata["source"]
    print(f"Processing document: {source_document} with currrent tokens: {current_tokens}")
    details = get_document_chunks(document, 1, delay)
    documents_chunk.append(details)

    tokens = len(tokenizer.encode(str(details)))
    current_tokens += tokens

    if current_tokens >= tpm_limit:
        print(f"TPM limit reached, current tokens are: {current_tokens}. Waiting for {delay} seconds...")
        time.sleep(delay)
        current_tokens = 0

    return current_tokens

  except Exception as e:
      print(f"Failed to process document: {source_document}. Error: {e}")
      if delay > 60: # Hack: it's retrying, don't append again
        retry_document_processing.append(document)

      return current_tokens

In [None]:
documents_chunk = []
retry_document_processing = []
current_tokens = 0

for document in documents:
  current_tokens = process_document(document, documents_chunk, retry_document_processing, current_tokens)

# Retry processing failed documents
if retry_document_processing:
    print("Retrying failed documents...")
    for document in retry_document_processing:
        current_tokens = process_document(document, documents_chunk, retry_document_processing, current_tokens, 120)

In [229]:
retry_document_processing

[]

In [None]:
documents_chunk = []
retry_document_processing = []
current_tokens = 0

# current_tokens = process_document(documents[3], documents_chunk, retry_document_processing, current_tokens)
chunks, contextualized_chunks = my_rag.split_document(documents[3])

In [117]:
print(len(documents_chunk[0]["contextualized_chunks"]))
print(documents_chunk[0]["original_chunks"][0])
print(documents_chunk[0]["contextualized_chunks"][0])

5
page_content='Document: Diabetes-Whitepaper.pdf' metadata={'source': './dataset/converted_md/Diabetes-Whitepaper.md'}
page_content='This chunk discusses Kaiser Permanente's diabetes care program, highlighting its comprehensive approach to managing the condition, with a focus on blood pressure and blood sugar control. The information is presented in the context of a national leader in diabetes care, with specific statistics and comparisons to other healthcare providers. This context is unrelated to Tesla's financial performance, but rather serves as a comparison to a healthcare organization's metrics.

Document: Diabetes-Whitepaper.pdf' metadata={'source': './dataset/converted_md/Diabetes-Whitepaper.md'}


##### Create vectorstore

In [9]:
for document_chunk in documents_chunk:
  original_vectorstore = my_rag.create_vectorstores(document_chunk["original_chunks"])
  contextualized_vectorstore = my_rag.create_vectorstores(document_chunk["contextualized_chunks"])

  bm25_retriever_original = my_rag.create_bm25_retriever(document_chunk["original_chunks"])
  bm25_retriever_contextualized = my_rag.create_bm25_retriever(document_chunk["contextualized_chunks"])
  original_ensemble_retriever_reranker = my_rag.create_ensemble_retriever_reranker(original_vectorstore, bm25_retriever_original)
  contextualized_ensemble_retriever_reranker = my_rag.create_ensemble_retriever_reranker(contextualized_vectorstore, bm25_retriever_contextualized)

##### Create BM25 Index

In [10]:
# original_bm25_index = cr.create_bm25_index(original_chunks)
# contextualized_bm25_index = cr.create_bm25_index(contextualized_chunks)

##### Create Reranker

In [11]:
# original_reranker = cr.create_flashrank_index(original_vectorstore)
# contextualized_reranker = cr.create_flashrank_index(contextualized_vectorstore)

INFO:flashrank.Ranker:Downloading ms-marco-MultiBERT-L-12...
ms-marco-MultiBERT-L-12.zip: 100%|██████████| 98.7M/98.7M [00:28<00:00, 3.59MiB/s]


##### Create retriver system with hybrid search with Reranker

In [13]:
# Crete ensemble retriver reranker
bm25_retriever_original = cr.create_bm25_retriever(original_chunks)
#
bm25_retriever_contextualized = cr.create_bm25_retriever(contextualized_chunks)
#
original_ensemble_retriever_reranker = cr.create_ensemble_retriever_reranker(original_vectorstore, bm25_retriever_original)
#
contextualized_ensemble_retriever_reranker = cr.create_ensemble_retriever_reranker(contextualized_vectorstore, bm25_retriever_contextualized)

##### Retrieve context from hybrid retriever

In [14]:
contextualized_ensemble_retriever_reranker.invoke("What was Tesla's total revenue in Q3 2023? what was the gross profit and cash position?")
######################################################################
# [Document(metadata={'id': 2, 'relevance_score': 0.9995201}, page_content="Tesla's financial performance and market position are evaluated through various metrics, including profitability ratios, liquidity ratios, and efficiency ratios, which provide insights into the company's financial health and strategic direction. The Q3 2023 financial report highlights year-over-year changes in revenue, gross margin, and operating income, with a focus on the automotive segment and energy generation and storage business. Key figures include a 9% increase in revenue, a 17.9% gross margin, and an operating margin of 7.6%.\n\nRegulatory Environment:\n    Evolving regulations around EVs, autonomous driving, and data privacy across different markets could impact Tesla's operations and expansion plans.\n\n    Macroeconomic Factors:\n    Rising interest rates and inflationary pressures may affect consumer demand for EVs and impact Tesla's profit margins.\n\n    Competition:\n    Intensifying competition in the EV market, especially in key markets like China and Europe, could pressure Tesla's market share and pricing power.\n\n    6. Financial Ratios and Metrics:\n\n    Profitability Ratios:\n    - Return on Equity (ROE): 18.2%\n    - Return on Assets (ROA): 10.3%\n    - EBITDA Margin: 15.7%\n\n    Liquidity Ratios:\n    - Current Ratio: 1.73\n    - Quick Ratio: 1.25"),
#  Document(metadata={'id': 1, 'relevance_score': 0.99947655}, page_content="Tesla's Q3 2023 financial performance overview, which includes a 9% year-over-year increase in revenue to $23.35 billion, driven by strong growth in the automotive segment and significant increases in energy generation and storage revenue. The company's gross profit and operating income also show improvement, with a 17.9% gross margin and a 7.6% operating margin, respectively. These metrics highlight Tesla's continued financial strength and operational efficiency.\n\nTesla, Inc. (TSLA) Financial Analysis and Market Overview - Q3 2023\n\n    Executive Summary:\n    Tesla, Inc. (NASDAQ: TSLA) continues to lead the electric vehicle (EV) market, showcasing strong financial performance and strategic growth initiatives in Q3 2023. This comprehensive analysis delves into Tesla's financial statements, market position, and future outlook, providing investors and stakeholders with crucial insights into the company's performance and potential.\n\n    1. Financial Performance Overview:"),
#  Document(metadata={'id': 0, 'relevance_score': 0.99941415}, page_content="Tesla's Q3 2023 financial performance, specifically its profitability, cash flow, and liquidity, demonstrate the company's ability to maintain a strong financial position despite increased competition and margin pressures. The results show a decrease in gross margin from Q3 2022 but still above industry averages, with operating income and net income attributable to common stockholders also increasing. Key figures include a gross profit of $4.18 billion, a gross margin of 17.9%, and free cash flow of $0.85 billion.\n\nProfitability:\n    Gross profit for Q3 2023 stood at $4.18 billion, with a gross margin of 17.9%. While this represents a decrease from the 25.1% gross margin in Q3 2022, it remains above industry averages. Operating income was $1.76 billion, resulting in an operating margin of 7.6%. Net income attributable to common stockholders was $1.85 billion, translating to diluted earnings per share (EPS) of $0.53.\n\n    Cash Flow and Liquidity:\n    Tesla's cash and cash equivalents at the end of Q3 2023 were $26.08 billion, a robust position that provides ample liquidity for ongoing operations and future investments. Free cash flow for the quarter was $0.85 billion, reflecting the company's ability to generate cash despite significant capital expenditures.\n\n    2. Operational Highlights:")]


ERROR:langsmith._internal._serde:Failed to use model_dump to serialize <class 'langchain_core.documents.base.Document'> to JSON: PydanticSerializationError(Unable to serialize unknown type: <class 'numpy.float32'>)
ERROR:langsmith._internal._serde:Failed to use model_dump to serialize <class 'langchain_core.documents.base.Document'> to JSON: PydanticSerializationError(Unable to serialize unknown type: <class 'numpy.float32'>)
ERROR:langsmith._internal._serde:Failed to use model_dump to serialize <class 'langchain_core.documents.base.Document'> to JSON: PydanticSerializationError(Unable to serialize unknown type: <class 'numpy.float32'>)


[Document(metadata={'id': 1, 'relevance_score': 0.9995039}, page_content="Tesla's Q3 2023 financial report highlights the company's strong revenue growth, with total revenue increasing by 9% year-over-year to $23.35 billion, driven by the automotive segment's $19.63 billion in revenue, a 5% year-over-year increase. The energy generation and storage segment also saw significant growth, with revenue reaching $1.56 billion, a 40% year-over-year increase. This financial performance overview sets the stage for further analysis of Tesla's profitability, cash flow, and operational highlights.\n\nTesla, Inc. (TSLA) Financial Analysis and Market Overview - Q3 2023\n\n    Executive Summary:\n    Tesla, Inc. (NASDAQ: TSLA) continues to lead the electric vehicle (EV) market, showcasing strong financial performance and strategic growth initiatives in Q3 2023. This comprehensive analysis delves into Tesla's financial statements, market position, and future outlook, providing investors and stakeholde

##### Generate cache key for the document

In [15]:
cache_key = cr.generate_cache_key(document)
#
print(f"Processed {len(original_chunks)} chunks")
print(f"Cache key for the document: {cache_key}")
#
# ###############################################################
# Processed 15 chunks
# Cache key for the document: 8bc3e18738fe5e65f23ecc719972feb8

Processed 15 chunks
Cache key for the document: 8bc3e18738fe5e65f23ecc719972feb8


##### Ask question

In [16]:
# Example queries related to financial information
queries = [
        "How does the automotive gross margin in Q3 2023 compare to the previous year?",
    ]
#
######################################################
for query in queries:
  print(f"\nQuery: {query}")

  # Retrieve from original vectorstore
  original_vector_results = original_vectorstore.similarity_search(query, k=3)

  # Retrieve from contextualized vectorstore
  contextualized_vector_results = contextualized_vectorstore.similarity_search(query, k=3)

  # Retrieve from original BM25
  original_tokenized_query = query.split()
  original_bm25_results = original_bm25_index.get_top_n(original_tokenized_query, original_chunks, n=3)

  # Retrieve from contextualized BM25
  contextualized_tokenized_query = query.split()
  contextualized_bm25_results = contextualized_bm25_index.get_top_n(contextualized_tokenized_query, contextualized_chunks, n=3)

  # Retrieve from Original Reranker
  original_reranker_results = original_reranker.invoke(query)

  # Retrieve from Contextualized Reranker
  contextualized_reranker_results = contextualized_reranker.invoke(query)

  # Retrieve from Original ensemble_retriever_reranker
  original_ensemble_retriever_reranker_results = original_ensemble_retriever_reranker.invoke(query)

  # Retrieve from Contextualized ensemble_retriever_reranker
  contextualized_ensemble_retriever_reranker_results = contextualized_ensemble_retriever_reranker.invoke(query)

  # Generate answers
  original_vector_answer = cr.generate_answer(query, [doc.page_content for doc in original_vector_results])
  contextualized_vector_answer = cr.generate_answer(query, [doc.page_content for doc in contextualized_vector_results])
  original_bm25_answer = cr.generate_answer(query, [doc.page_content for doc in original_bm25_results])
  contextualized_bm25_answer = cr.generate_answer(query, [doc.page_content for doc in contextualized_bm25_results])
  original_reranker_answer = cr.generate_answer(query, [doc.page_content for doc in original_reranker_results])
  contextualized_reranker_answer = cr.generate_answer(query, [doc.page_content for doc in contextualized_reranker_results])
  original_ensemble_retriever_reranker_answer = cr.generate_answer(query, [doc.page_content for doc in original_ensemble_retriever_reranker_results])
  contextualized_ensemble_retriever_reranker_answer = cr.generate_answer(query, [doc.page_content for doc in contextualized_ensemble_retriever_reranker_results])

  print("\nOriginal Vector Search Results:")
  for i, doc in enumerate(original_vector_results, 1):
      print(f"{i}. {doc.page_content[:200]}...")

  print("\nOriginal Vector Search Answer:")
  print(original_vector_answer)
  print("\n" + "-"*50)

  print("\nContextualized Vector Search Results:")
  for i, doc in enumerate(contextualized_vector_results, 1):
      print(f"{i}. {doc.page_content[:200]}...")

  print("\nContextualized Vector Search Answer:")
  print(contextualized_vector_answer)
  print("\n" + "-"*50)

  print("\nOriginal BM25 Search Results:")
  for i, doc in enumerate(original_bm25_results, 1):
      print(f"{i}. {doc.page_content[:200]}...")

  print("\nOriginal BM25 Search Answer:")
  print(original_bm25_answer)
  print("\n" + "-"*50)

  print("\nContextualized BM25 Search Results:")
  for i, doc in enumerate(contextualized_bm25_results, 1):
      print(f"{i}. {doc.page_content[:200]}...")

  print("\nContextualized BM25 Search Answer:")
  print(contextualized_bm25_answer)

  print("\nOriginal Reranker Search Results:")
  for i, doc in enumerate(original_reranker_results, 1):
      print(f"{i}. {doc.page_content[:200]}...")

  print("\nOriginal Reranker Search Answer:")
  print(original_reranker_answer)

  print("\nContextualized Reranker Search Results:")
  for i, doc in enumerate(contextualized_reranker_results, 1):
      print(f"{i}. {doc.page_content[:200]}...")
  
  print("\nContextualized Reranker Search Answer:")
  print(contextualized_reranker_answer)


  print("\nOriginal Ensemble Retriever Reranker Search Results:")
  for i, doc in enumerate(original_ensemble_retriever_reranker_results, 1):
      print(f"{i}. {doc.page_content[:200]}...")

  print("\nOriginal Ensemble Retriever Reranker Search Answer:")
  print(original_ensemble_retriever_reranker_answer)

  print("\nContextualized Ensemble Retriever Reranker Search Results:")
  for i, doc in enumerate(contextualized_ensemble_retriever_reranker_results, 1):
      print(f"{i}. {doc.page_content[:200]}...")
  
  print("\nContextualized Ensemble Retriever Reranker Search Answer:")
  print(contextualized_ensemble_retriever_reranker_answer)
  print("\n" + "="*50)

ERROR:langsmith._internal._serde:Failed to use model_dump to serialize <class 'langchain_core.documents.base.Document'> to JSON: PydanticSerializationError(Unable to serialize unknown type: <class 'numpy.float32'>)
ERROR:langsmith._internal._serde:Failed to use model_dump to serialize <class 'langchain_core.documents.base.Document'> to JSON: PydanticSerializationError(Unable to serialize unknown type: <class 'numpy.float32'>)
ERROR:langsmith._internal._serde:Failed to use model_dump to serialize <class 'langchain_core.documents.base.Document'> to JSON: PydanticSerializationError(Unable to serialize unknown type: <class 'numpy.float32'>)
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/c


Original Vector Search Results:
1. Profitability:
    Gross profit for Q3 2023 stood at $4.18 billion, with a gross margin of 17.9%. While this represents a decrease from the 25.1% gross margin in Q3 2022, it remains above industry ave...
2. Conclusion:
    Tesla's Q3 2023 financial results demonstrate the company's continued leadership in the EV market, with strong revenue growth and operational improvements. While facing increased compe...
3. Liquidity Ratios:
    - Current Ratio: 1.73
    - Quick Ratio: 1.25

    Efficiency Ratios:
    - Asset Turnover Ratio: 0.88
    - Inventory Turnover Ratio: 11.2

    Valuation Metrics:
    - Price-to...

Original Vector Search Answer:
The information provided is not sufficient to answer the question of how the automotive gross margin in Q3 2023 compares to the previous year. The relevant information for the automotive segment only mentions the gross margin for Q3 2023 as 18.9%, but does not provide a comparison to the gross margin in Q3 2022.
