### Load Your Documents

In [1]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    "https://blog.langchain.dev/langchain-v0-1-0/"
)

documents = loader.load()

### Instantiate Embedding Model

In [2]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002"
)

### Instantiate LLM

In [3]:
from langchain_openai import OpenAI
import os
# from google.colab import userdata
# os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
openai_llm = OpenAI(temperature=0)

### Document Splitter

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1250,
    chunk_overlap = 100,
    length_function = len,
    is_separator_regex = False
)
#
split_docs = text_splitter.split_documents(documents)
print(len(split_docs))

17


### Instantiate the Vectorstore

In [6]:
from langchain_community.vectorstores import Chroma
vectorstore = Chroma(embedding_function=embeddings,
                     persist_directory="Vectorstore/chromadb",
                     collection_name="full_documents")

### Load and persist the split documents into the vectorstore

In [7]:
vectorstore.add_documents(split_docs)
vectorstore.persist()

### Instantiate the Keyword / Sparse embeddings model

In [8]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.retrievers import ContextualCompressionRetriever
#
bm25_retriever = BM25Retriever.from_documents(split_docs)
bm25_retriever.k=10

### Instantiate Reranker — Cross Encoders

In [9]:
from __future__ import annotations
from typing import Dict, Optional, Sequence
from langchain.schema import Document
from langchain.pydantic_v1 import Extra, root_validator

from langchain.callbacks.manager import Callbacks
from langchain.retrievers.document_compressors.base import BaseDocumentCompressor

from sentence_transformers import CrossEncoder
# from config import bge_reranker_large

class BgeRerank(BaseDocumentCompressor):
    #  BAAI/bge-reranker-large
    model_name:str = 'BAAI/bge-small-en-v1.5'
    """Model name to use for reranking."""
    top_n: int = 3
    """Number of documents to return."""
    model:CrossEncoder = CrossEncoder(model_name)
    """CrossEncoder instance to use for reranking."""

    def bge_rerank(self,query,docs):
        model_inputs =  [[query, doc] for doc in docs]
        scores = self.model.predict(model_inputs)
        results = sorted(enumerate(scores), key=lambda x: x[1], reverse=True)
        return results[:self.top_n]


    class Config:
        """Configuration for this pydantic object."""

        extra = Extra.forbid
        arbitrary_types_allowed = True

    def compress_documents(
        self,
        documents: Sequence[Document],
        query: str,
        callbacks: Optional[Callbacks] = None,
    ) -> Sequence[Document]:
        """
        Compress documents using BAAI/bge-reranker models.

        Args:
            documents: A sequence of documents to compress.
            query: The query to use for compressing the documents.
            callbacks: Callbacks to run during the compression process.

        Returns:
            A sequence of compressed documents.
        """
        if len(documents) == 0:  # to avoid empty api call
            return []
        doc_list = list(documents)
        _docs = [d.page_content for d in doc_list]
        results = self.bge_rerank(query, _docs)
        final_results = []
        for r in results:
            doc = doc_list[r[0]]
            doc.metadata["relevance_score"] = r[1]
            final_results.append(doc)
        return final_results

  from .autonotebook import tqdm as notebook_tqdm
config.json: 100%|██████████| 743/743 [00:00<00:00, 66.6kB/s]
model.safetensors: 100%|██████████| 133M/133M [00:45<00:00, 2.90MB/s] 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at BAAI/bge-small-en-v1.5 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
tokenizer_config.json: 100%|██████████| 366/366 [00:00<00:00, 28.5kB/s]
vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 698kB/s]
tokenizer.json: 100%|██████████| 711k/711k [00:00<00:00, 1.61MB/s]
special_tokens_map.json: 100%|██████████| 125/125 [00:00<00:00, 115kB/s]
  return torch._C._cuda_getDeviceCount() > 0


### Instantiate a Contextual Compression Pipeline

In [10]:
from langchain_community.document_transformers.embeddings_redundant_filter import EmbeddingsRedundantFilter
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.retrievers import ContextualCompressionRetriever
from langchain_community.document_transformers.long_context_reorder import LongContextReorder
from langchain.retrievers.multi_query import MultiQueryRetriever
#
vs_retriever = vectorstore.as_retriever(search_kwargs={"k":10})
#

ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever,vs_retriever],
                                       weight=[0.5,0.5])
#

redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
#
reordering = LongContextReorder()
#
reranker = BgeRerank()
#
pipeline_compressor = DocumentCompressorPipeline(transformers=[redundant_filter,reordering,reranker])
#
compression_pipeline = ContextualCompressionRetriever(base_compressor=pipeline_compressor,
                                                      base_retriever=ensemble_retriever)

### Helper function to display retrieved documents

In [12]:
def pretty_print_docs(docs):
  print(
      f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n + {d.page_content}" for i,d in enumerate(docs)])
  )

In [13]:
pretty_print_docs(vs_retriever.get_relevant_documents("What are the major changes in v 0.1.0?"))

Document 1:

 + versioning policy for a little over a month now.langchain itself, however, still remained on 0.0.x versions. Having all releases on minor version 0 created a few challenges:Users couldnâ€™t be confident that updating would not have breaking changeslangchain became bloated and unstable as we took a â€œmaintain everythingâ€� approach to reduce breaking changes and deprecation notificationsHowever, starting today with the release of langchain 0.1.0, all future releases will follow a new versioning standard. Specifically:Any breaking changes to the public API will result in a minor version bump (the second digit)Any bug fixes or new features will result in a patch version bump (the third digit)We hope that this, combined with the previous architectural changes, will:Communicate clearly if breaking changes are made, allowing developers to update with confidenceGive us an avenue for officially deprecating and deleting old code, reducing bloatMore responsibly deal with integra

In [14]:
docs = compression_pipeline.get_relevant_documents("What are the major changes in v 0.1.0?")
pretty_print_docs(docs)

Document 1:

 + to get started building on any stack. We have almost 700 integrations, ranging from LLMs to vector stores to tools for agents to use. ğŸ’¡LangChain is often used as the â€œglueâ€� to join all the different pieces you need to build an LLM app together, and so prioritizing a robust integration ecosystem is a priority for us.About a month ago, we started making some changes we think will improve the robustness, stability, scalability, and general developer experience around integrations. We split out ALL third party integrations into langchain-community â€“ this allows us to centralize integration-specific work. We have also begun to split out individual integrations into their own packages. So far we have done this for ~10 packages, including OpenAI, Google and Mistral. One benefit of this is better dependency management - previously, all dependencies were optional, leading to some headaches when trying to install specific versions. Now if integrations are in their own pa

### Define an Advanced RAG

In [18]:
from langchain.chains import RetrievalQA
#
qa = RetrievalQA.from_chain_type(llm=openai_llm,
                                 chain_type="stuff",
                                 retriever=vectorstore.as_retriever(search_kwargs={"k":5}),
                                 return_source_documents=True)

naive_response = qa("What are the major changes in v 0.1.0?")
naive_response["result"]

' The major changes in v 0.1.0 include a new versioning standard, separating out langchain-core and partner packages, and improved focus through both functionality and documentation. Additionally, there are plans to maintain a stable branch for at least 3 months after the release of v 0.2.0 and to continually improve LangChain based on feedback from the community.'

In [15]:
from langchain.chains import RetrievalQA
#
qa_advanced = RetrievalQA.from_chain_type(llm=openai_llm,
                                 chain_type="stuff",
                                 retriever=compression_pipeline,
                                 return_source_documents=True)
#
qa_adv_response = qa_advanced("What are the major changes in v 0.1.0?")  
qa_adv_response["result"]

  warn_deprecated(


' The major changes in v 0.1.0 include a new versioning standard, where any breaking changes to the public API will result in a minor version bump, and any bug fixes or new features will result in a patch version bump. Additionally, all third party integrations have been split into their own packages for better dependency management and versioning. The release of LangSmith, a tool for debugging LLM applications, is also a major change.'

## Evaluating Naive RAG and Advanced RAG using RAGAS evaluation Framework

### Synthetic Test Set Generation

In [20]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
#
#load documents again to avoid any kind of bias
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
)
documents = text_splitter.split_documents(documents)
len(documents)
#
#
generator = TestsetGenerator.with_openai()
#
testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})


Generating:  60%|██████    | 6/10 [00:10<00:07,  1.78s/it]      
Exception in thread Thread-10:
Traceback (most recent call last):
  File "/home/babi/miniconda3/envs/week10/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/home/babi/miniconda3/envs/week10/lib/python3.8/site-packages/ragas/executor.py", line 75, in run
    results = self.loop.run_until_complete(self._aresults())
  File "/home/babi/miniconda3/envs/week10/lib/python3.8/asyncio/base_events.py", line 616, in run_until_complete
    return future.result()
  File "/home/babi/miniconda3/envs/week10/lib/python3.8/site-packages/ragas/executor.py", line 63, in _aresults
    raise e
  File "/home/babi/miniconda3/envs/week10/lib/python3.8/site-packages/ragas/executor.py", line 58, in _aresults
    r = await future
  File "/home/babi/miniconda3/envs/week10/lib/python3.8/asyncio/tasks.py", line 608, in _wait_for_one
    return f.result()  # May raise f.exception().
  File "/home/babi/miniconda3/envs/we

ExceptionInRunner: The runner thread which was running the jobs raised an exeception. Read the traceback above to debug it. You can also pass `raise_exception=False` incase you want to show only a warning message instead.

In [None]:
testset.test_data[0]