In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# Improving the Retriever

things we are going to try
- different embeddedding (with fastembed and openai):
- reranker (cohere)
- Multi-Query Retriever

## Building the VectorStore

In [3]:
from langchain.document_loaders import DirectoryLoader
loader = DirectoryLoader("./data/")
documents = loader.load()

for document in documents:
    document.metadata['file_name'] = document.metadata['source']

docs = documents
len(docs)

26

In [12]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, 
    chunk_overlap=200
)

splits = text_splitter.split_documents(docs)

In [10]:
from langsmith import Client

client = Client()
examples = list(client.list_examples(dataset_name="basecamp"))

examples[0]


Example(dataset_id=UUID('8f267706-24b2-47fb-84ee-3ea3cfc5a0c0'), inputs={'question': 'How do the cycles at 37signals affect communication and decision-making?'}, outputs={'ground_truth': 'The cycles at 37signals help to create a fixed cadence and provide a regular interval for decision-making. They also help to prioritize work and break big projects into smaller ones. The communication mechanisms, such as daily and weekly check-ins, heartbeats, and kickoffs, ensure that everyone is kept in the loop about the work being done.'}, id=UUID('771183c7-5ff6-4fde-bef0-8e999de218e1'), created_at=datetime.datetime(2024, 3, 6, 20, 31, 43, 147801, tzinfo=datetime.timezone.utc), modified_at=datetime.datetime(2024, 3, 6, 20, 31, 43, 147801, tzinfo=datetime.timezone.utc), runs=[], source_run_id=None)

## Experiment 1:  Try out different Embeddings

lets evaluate between
- ada2
- BGE
- OpenAI's new `text-embedding-3-large`

In [13]:
# ada
vectorstore_ada = Chroma.from_documents(
    documents=splits, 
    embedding=OpenAIEmbeddings(model="text-embedding-ada-002")
)

Lets build a reference RAG with this embedding

In [16]:
from operator import itemgetter

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough, RunnableParallel
from langchain_openai import ChatOpenAI

In [74]:
vectorstore_retriever = vectorstore_ada.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


def format_retriever(input_dict):
    # if dict or list
    if isinstance(input_dict, dict):
        docs = input_dict["contexts"]
    else:
        docs = input_dict

    # get the text in each chunk
    doc_strs = []
    for d in docs:
        if isinstance(d, str):
            doc_strs.append(d)
        else:
            doc_strs.append(d.page_content)

    # join and send the rest
    return {
        "question": input_dict["question"],
        "context": "\n\n".join(doc_strs)
    }
    
def ragas_output_parser(input):
    if isinstance(input, list):
        return [doc.page_content for doc in input]
    elif isinstance(input, dict):
        docs = input["contexts"]
        return [doc.page_content for doc in docs]

def passthrough(column_name):
   return RunnableLambda(lambda x: x.get(column_name) if isinstance(x, dict) else x)

In [84]:
from langchain_core.runnables import RunnableParallel

generator = (
    prompt
    | llm
    | StrOutputParser()
)

def retriver_factory(retriever):
    retriever = RunnableParallel({
        "contexts": passthrough("question") | retriever | ragas_output_parser, 
        "question": passthrough("question")
    })

    return retriever

filter_langsmith_dataset = RunnableLambda(lambda x: x["question"] if isinstance(x, dict) else x)

def rag_factory(vector_store=None, retriever = None):
    if vector_store is not None:
        retriever = vector_store.as_retriever()
    elif retriever is not None:
        retriever = retriever
    else:
        raise ValueError("You must provide a vectorstore or a retriever")
    rag_chain_ada = (
        filter_langsmith_dataset |
        retriver_factory(retriever) |
        RunnableParallel({
            "answer": format_retriever | generator,
            "contexts": RunnablePassthrough()
        })
    )
    return rag_chain_ada

rag_chain_ada = rag_factory(vectorstore_ada)

In [85]:
q = examples[0].inputs
q["question"]

'How do the cycles at 37signals affect communication and decision-making?'

In [86]:
get_answer = RunnableLambda(lambda x: x["answer"])
(rag_factory(vectorstore_ada) | get_answer).invoke(q)

'The cycles at 37signals create a sense of urgency and help prevent projects from becoming too large. They also provide a regular interval for decision-making on what to work on. Communication is facilitated through daily and weekly questions about work progress and intentions.'

### BGE Embeddings



In [33]:
# BGE
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
vectorstore_bge = Chroma.from_documents(
    collection_name="bge",
    documents=splits, 
    embedding=FastEmbedEmbeddings(model_name="BAAI/bge-large-en-v1.5")
)



Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

In [34]:
# make the rag
bge_rag = rag_factory(vectorstore_bge)

(bge_rag | get_answer).invoke(q)

'The cycles at 37signals create a sense of urgency and help prevent projects from becoming too large. They also provide a regular interval for decision-making on what to work on. Communication is facilitated through daily and weekly questions about work progress and intentions.'

### OpenAI's Text Embedding 3 Large

In [35]:
# text-embedding
vectorstore_text_embedding = Chroma.from_documents(
    collection_name="text-emb-3-lg",
    documents=splits, 
    embedding=OpenAIEmbeddings(model="text-embedding-3-large")
)

In [36]:
# make the rag
text_embed_rag = rag_factory(vectorstore_text_embedding)

(text_embed_rag | get_answer).invoke(q)

'The cycles at 37signals create a sense of urgency and help prevent projects from becoming too large. They also provide a regular interval for decision-making on what projects to work on. Communication is facilitated through daily and weekly questions about work progress and intentions.'

## Evaluation of the different embeddings

In [37]:
from ragas.integrations.langchain import EvaluatorChain
from ragas.integrations.langsmith import evaluate

# import the metrics we will need
from ragas.metrics import context_precision, context_recall

In [38]:
# retriever with Ada embeddings
ada_retriever = retriver_factory(vectorstore_ada)

run = evaluate(
    experiment_name="ada",
    dataset_name="basecamp", 
    llm_or_chain_factory=retriver_factory(vectorstore_ada), 
    metrics=[context_precision, context_recall],
    verbose=False
)

View the evaluation results for project 'ada' at:
https://smith.langchain.com/o/9bfbddc5-b88e-41e5-92df-2a62f0c64b4b/datasets/8f267706-24b2-47fb-84ee-3ea3cfc5a0c0/compare?selectedSessions=60f28e99-82b0-4a32-9ec8-8cfae59ac7da

View all tests for Dataset basecamp at:
https://smith.langchain.com/o/9bfbddc5-b88e-41e5-92df-2a62f0c64b4b/datasets/8f267706-24b2-47fb-84ee-3ea3cfc5a0c0
[>                                                 ] 0/7

  warn_deprecated(


[------------->                                    ] 2/7

Invalid JSON response. Expected dictionary with key 'Attributed'
Failed to batch ingest runs: LangSmithError('Failed to post https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'400 Client Error: Bad Request for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Request body is not valid JSON"}\')')


[------------------------------------------------->] 7/7

In [44]:
# retriever with BGE embeddings
bge_retriever = retriver_factory(vectorstore_bge)

run = evaluate(
    experiment_name="bge_retriever",
    dataset_name="basecamp", 
    llm_or_chain_factory=bge_retriever, 
    metrics=[context_precision, context_recall],
    verbose=True
)

View the evaluation results for project 'bge_retriever' at:
https://smith.langchain.com/o/9bfbddc5-b88e-41e5-92df-2a62f0c64b4b/datasets/8f267706-24b2-47fb-84ee-3ea3cfc5a0c0/compare?selectedSessions=efc8c7d0-5517-4d9d-abfc-0427dbe2b86a

View all tests for Dataset basecamp at:
https://smith.langchain.com/o/9bfbddc5-b88e-41e5-92df-2a62f0c64b4b/datasets/8f267706-24b2-47fb-84ee-3ea3cfc5a0c0
[------------------------------------------------->] 7/7

Unnamed: 0,feedback.context_precision,feedback.context_recall,error,execution_time,run_id
count,7.0,7.0,0.0,7.0,7
unique,,,0.0,,7
top,,,,,b3dd731c-22ca-49e4-abf1-d86e5618aa9b
freq,,,,,1
mean,0.793651,0.928571,,0.476688,
std,0.374007,0.188982,,0.249074,
min,0.0,0.5,,0.144702,
25%,0.777778,1.0,,0.260192,
50%,1.0,1.0,,0.643713,
75%,1.0,1.0,,0.66887,


In [45]:
# retriever with text-embedding embeddings
text_embedding_retriever = retriver_factory(vectorstore_text_embedding)

run = evaluate(
    experiment_name="text_embedding_3_retriver",
    dataset_name="basecamp", 
    llm_or_chain_factory=text_embedding_retriever, 
    metrics=[context_precision, context_recall],
    verbose=True
)

View the evaluation results for project 'text_embedding_3_retriver' at:
https://smith.langchain.com/o/9bfbddc5-b88e-41e5-92df-2a62f0c64b4b/datasets/8f267706-24b2-47fb-84ee-3ea3cfc5a0c0/compare?selectedSessions=ad669381-ced0-4a1f-ae3c-45d4f852c902

View all tests for Dataset basecamp at:
https://smith.langchain.com/o/9bfbddc5-b88e-41e5-92df-2a62f0c64b4b/datasets/8f267706-24b2-47fb-84ee-3ea3cfc5a0c0
[------------------------------------------>       ] 6/7

Invalid JSON response. Expected dictionary with key 'Attributed'
Failed to batch ingest runs: LangSmithError('Failed to post https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'400 Client Error: Bad Request for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Request body is not valid JSON"}\')')


[------------------------------------------------->] 7/7

Unnamed: 0,feedback.context_precision,feedback.context_recall,error,execution_time,run_id
count,7.0,6.0,0.0,7.0,7
unique,,,0.0,,7
top,,,,,88b94785-b177-48ec-843a-4b8c1c036713
freq,,,,,1
mean,0.948413,1.0,,0.439906,
std,0.088466,0.0,,0.130925,
min,0.805556,1.0,,0.272072,
25%,0.916667,1.0,,0.363577,
50%,1.0,1.0,,0.432972,
75%,1.0,1.0,,0.516501,


# Experiment 2: Using a ReRanker

In [62]:
# Helper function for printing docs
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

In [63]:
ret = vectorstore_text_embedding.as_retriever(search_kwargs={"k": 10})
docs = ret.get_relevant_documents(q["question"])
pretty_print_docs(docs)

Document 1:

How We Work

Cycles

We work in 6-week or 8-week cycles at 37signals. There are typically six cycles to a year. Two are 8-week cycles, during Summer Hours, and the rest 6-week cycles. This fixed cadence serves to give us an internal sense of urgency, work as a scope hammer to keep projects from ballooning, and provide a regular interval to decide what we’re working on.

The idea is not that everything we ever decide to work on has to take six or eight weeks or can be completed in that time. But rather that we think about how we can break big projects into smaller ones that can be done in that amount of time, and that we bundle smaller things into a presentable scope of work that can be discussed.
----------------------------------------------------------------------------------------------------
Document 2:

Communication

It’s hard to keep up on what everyone is doing and what it means, if you just watch the stream of latest activity scrolling along in 37signals. (It’s al

In [64]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors.cohere_rerank import CohereRerank

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
compressor = CohereRerank()
vectorstore_text_embedding.as_retriever
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, 
    base_retriever=vectorstore_text_embedding.as_retriever(search_kwargs={"k": 10})
)

In [65]:
compressed_docs = compression_retriever.get_relevant_documents(q["question"])
pretty_print_docs(compressed_docs)

Document 1:

How We Work

Cycles

We work in 6-week or 8-week cycles at 37signals. There are typically six cycles to a year. Two are 8-week cycles, during Summer Hours, and the rest 6-week cycles. This fixed cadence serves to give us an internal sense of urgency, work as a scope hammer to keep projects from ballooning, and provide a regular interval to decide what we’re working on.

The idea is not that everything we ever decide to work on has to take six or eight weeks or can be completed in that time. But rather that we think about how we can break big projects into smaller ones that can be done in that amount of time, and that we bundle smaller things into a presentable scope of work that can be discussed.
----------------------------------------------------------------------------------------------------
Document 2:

While a few pitches might instantly strike a chord loud enough to go on the plate for the next cycle, it’s more likely that your pitch will sit for a while first. Ther

In [95]:
rag_with_reranker = rag_factory(retriever=compression_retriever)
(rag_with_reranker | get_answer).invoke(q)



'The cycles at 37signals provide a fixed cadence for decision-making and project scope. They help break big projects into smaller ones that can be completed in a set amount of time. Communication is facilitated through daily and weekly questions to keep everyone in the loop about the work being done.'

In [96]:
run = evaluate(
    experiment_name="reranker",
    dataset_name="basecamp", 
    llm_or_chain_factory=rag_with_reranker, 
    metrics=[context_precision, context_recall],
    verbose=True
)

View the evaluation results for project 'reranker' at:
https://smith.langchain.com/o/9bfbddc5-b88e-41e5-92df-2a62f0c64b4b/datasets/8f267706-24b2-47fb-84ee-3ea3cfc5a0c0/compare?selectedSessions=127a0557-51f5-4f02-908a-bbc5502058ed

View all tests for Dataset basecamp at:
https://smith.langchain.com/o/9bfbddc5-b88e-41e5-92df-2a62f0c64b4b/datasets/8f267706-24b2-47fb-84ee-3ea3cfc5a0c0
[-------------------->                             ] 3/7

Invalid JSON response. Expected dictionary with key 'Attributed'
Failed to batch ingest runs: LangSmithError('Failed to post https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'400 Client Error: Bad Request for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Request body is not valid JSON"}\')')


[---------------------------->                     ] 4/7

Invalid JSON response. Expected dictionary with key 'Attributed'
Failed to batch ingest runs: LangSmithError('Failed to post https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'400 Client Error: Bad Request for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Request body is not valid JSON"}\')')


[------------------------------------------>       ] 6/7

Invalid JSON response. Expected dictionary with key 'Attributed'
Failed to batch ingest runs: LangSmithError('Failed to post https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'400 Client Error: Bad Request for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Request body is not valid JSON"}\')')


[------------------------------------------------->] 7/7

Unnamed: 0,feedback.context_precision,feedback.context_recall,error,execution_time,run_id
count,7.0,4.0,0.0,7.0,7
unique,,,0.0,,7
top,,,,,6fde9b65-8174-4fa1-8441-9dee884f23a0
freq,,,,,1
mean,0.428571,1.0,,2.534166,
std,0.534522,0.0,,0.437899,
min,0.0,1.0,,1.776245,
25%,0.0,1.0,,2.398282,
50%,0.0,1.0,,2.57,
75%,1.0,1.0,,2.694026,


# Experiment 3: Multi-Query Retriever

In [98]:
q

{'question': 'How do the cycles at 37signals affect communication and decision-making?'}

In [99]:
from langchain.retrievers.multi_query import MultiQueryRetriever

multi_query_ret = MultiQueryRetriever.from_llm(
    retriever=vectorstore_text_embedding.as_retriever(), 
    llm=llm
)

In [100]:
docs = multi_query_ret.get_relevant_documents(q["question"])
pretty_print_docs(docs)

Document 1:

How We Work

Cycles

We work in 6-week or 8-week cycles at 37signals. There are typically six cycles to a year. Two are 8-week cycles, during Summer Hours, and the rest 6-week cycles. This fixed cadence serves to give us an internal sense of urgency, work as a scope hammer to keep projects from ballooning, and provide a regular interval to decide what we’re working on.

The idea is not that everything we ever decide to work on has to take six or eight weeks or can be completed in that time. But rather that we think about how we can break big projects into smaller ones that can be done in that amount of time, and that we bundle smaller things into a presentable scope of work that can be discussed.
----------------------------------------------------------------------------------------------------
Document 2:

Communication

It’s hard to keep up on what everyone is doing and what it means, if you just watch the stream of latest activity scrolling along in 37signals. (It’s al

In [101]:
multi_query_rag = rag_factory(retriever=multi_query_ret)
(multi_query_rag | get_answer).invoke(q)

'The cycles at 37signals create a sense of urgency and help prevent projects from becoming too large. Communication is facilitated through daily and weekly questions to keep everyone in the loop about the work being done. Pitches play a role in decision-making, with some ideas taking multiple cycles to come to fruition.'

In [103]:
_ = evaluate(
    experiment_name="mulit_query",
    dataset_name="basecamp", 
    llm_or_chain_factory=multi_query_rag, 
    metrics=[context_precision, context_recall],
    verbose=True
)



View the evaluation results for project 'mulit_query' at:
https://smith.langchain.com/o/9bfbddc5-b88e-41e5-92df-2a62f0c64b4b/datasets/8f267706-24b2-47fb-84ee-3ea3cfc5a0c0/compare?selectedSessions=581dd7af-7bae-4242-bb78-a137e2355898

View all tests for Dataset basecamp at:
https://smith.langchain.com/o/9bfbddc5-b88e-41e5-92df-2a62f0c64b4b/datasets/8f267706-24b2-47fb-84ee-3ea3cfc5a0c0
[------>                                           ] 1/7

Invalid JSON response. Expected dictionary with key 'Attributed'
Invalid JSON response. Expected dictionary with key 'Attributed'
Failed to batch ingest runs: LangSmithError('Failed to post https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'400 Client Error: Bad Request for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Request body is not valid JSON"}\')')


[-------------------->                             ] 3/7

Invalid JSON response. Expected dictionary with key 'Attributed'
Failed to batch ingest runs: LangSmithError('Failed to post https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'400 Client Error: Bad Request for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Request body is not valid JSON"}\')')


[---------------------------->                     ] 4/7

Invalid JSON response. Expected dictionary with key 'Attributed'
Failed to batch ingest runs: LangSmithError('Failed to post https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'400 Client Error: Bad Request for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Request body is not valid JSON"}\')')


[------------------------------------------>       ] 6/7

Invalid JSON response. Expected dictionary with key 'Attributed'
Failed to batch ingest runs: LangSmithError('Failed to post https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError(\'400 Client Error: Bad Request for url: https://api.smith.langchain.com/runs/batch\', \'{"detail":"Request body is not valid JSON"}\')')


[------------------------------------------------->] 7/7

Unnamed: 0,feedback.context_precision,feedback.context_recall,error,execution_time,run_id
count,7.0,2.0,0.0,7.0,7
unique,,,0.0,,7
top,,,,,bce40e6c-d7b5-4fe2-9bf6-234bba353c21
freq,,,,,1
mean,0.571429,1.0,,4.735159,
std,0.534522,0.0,,0.981516,
min,0.0,1.0,,3.85938,
25%,0.0,1.0,,3.989503,
50%,1.0,1.0,,4.533062,
75%,1.0,1.0,,5.192072,
