# Setting up Tokenizer

In [27]:
import tiktoken
import re

# create the length function
def tiktoken_len(text):
    input_string = str(tiktoken.encoding_for_model('gpt-3.5-turbo'))
    match = re.search(r"'(.*?)'", input_string)
    result = match.group(1)

    tokenizer = tiktoken.get_encoding(result)
    
    tokens = tokenizer.encode(
        text,
        disallowed_special=()
    )
    return len(tokens)

In [28]:
tiktoken_len('hiiii ssasd')

5

# Setting up Embedding Model

In [25]:
from langchain_openai import OpenAIEmbeddings

model_name = "text-embedding-3-small"

# get openai api key from platform.openai.com
OPENAI_API_KEY = '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'

embed = OpenAIEmbeddings(
    model=model_name, openai_api_key=OPENAI_API_KEY, disallowed_special=())

In [29]:
embed.model

'text-embedding-3-small'

# Connecting to Database

In [5]:
from pinecone import Pinecone
from tqdm.autonotebook import tqdm

In [17]:
default_api_key = '25959b28-fb44-44df-9371-13b27f6f3903'  # Handle your API key securely
pc = Pinecone(api_key=default_api_key)

In [19]:
[index['name'] for index in pc.list_indexes()]

['langchain-retrieval-augmentation',
 'rag-test-3',
 'canopy--advanced-rag',
 'naive-rag-chunk400-text-embedding-3-small-cos']

In [54]:
index = pc.Index("rag-test-3")

In [94]:
from langchain.vectorstores import Pinecone
#from langchain_pinecone import Pinecone

text_field = "text"
index_name = "naive-rag-chunk400-text-embedding-3-small-cos"
index = pc.Index(index_name)

vectorstore = Pinecone(index, embed, text_field)

# Setting up Generation Model

In [95]:
#from langchain.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY,model_name='gpt-3.5-turbo')

# Setting up the `MultiQueryRetriever` Chain

In [139]:
from langchain.retrievers.multi_query import MultiQueryRetriever

retriever = MultiQueryRetriever.from_llm(
    retriever=vectorstore.as_retriever(), llm=llm
)

In [140]:
import inspect
from langchain.retrievers import MultiQueryRetriever

def get_default_prompt():
    sig = inspect.signature(MultiQueryRetriever.from_llm)
    default_prompt = sig.parameters['prompt'].default
    return default_prompt.template

if __name__ == "__main__":
    template = get_default_prompt()
    print("Default Prompt Template:")
    print(template)


Default Prompt Template:
You are an AI language model assistant. Your task is 
    to generate 3 different versions of the given user 
    question to retrieve relevant documents from a vector  database. 
    By generating multiple perspectives on the user question, 
    your goal is to help the user overcome some of the limitations 
    of distance-based similarity search. Provide these alternative 
    questions separated by newlines. Original question: {question}


In [141]:
# We set logging so that we can see the queries as they're generated by our LLM.
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [142]:
question = "What does Scorates think about death?"

docs = retriever.get_relevant_documents(query=question)
len(docs)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. How does Socrates view the concept of death?', "2. What are Socrates' beliefs regarding death?", "3. Can you provide insights into Socrates' perspective on death?"]


5

In [143]:
docs

[Document(page_content="It is probably true that in mainstream fifth century Greek culture,\nbelief in an afterlife of the soul was weak and unclear (Claus 1981,\n68; Burnet 1916, 248-9). If so, it is fitting that Socrates' arguments\nfor the immortality of the soul, most prominently in the\nPhaedo, are offered to interlocutors who, at the outset of the\ndiscussion, are by no means convinced of the idea. (In fact, in the\nApology, 40c, Socrates himself is presented as being\nnoncommittal about what happens to the soul at death, and even about\nwhether it survives at all.) “Men find it very hard to\nbelieve”, Cebes says at Phaedo 70a, “what you said\nabout the soul. They think that after it has left the body it no\nlonger exists anywhere, but that it is destroyed and dissolved on the\nday the man dies.” This view is restated by Simmias (at 77b) as\nthe opinion of the majority (cf. 80d); note that the view includes the\nidea that the soul is a material thing, and is destroyed by being\nd

### Costumzing the prompt: 

#### My own version

In [113]:
from langchain_core.prompts import PromptTemplate
#from langchain.prompts import PromptTemplate

custom_prompt = PromptTemplate(
    input_variables=['question'],
    template='''You are an AI language model assistant. Your task is \n    to generate 5 different versions of the given user \n    question to retrieve relevant documents from a vector  database. \n    By generating multiple perspectives on the user question, \n    your goal is to help the user overcome some of the limitations \n    of distance-based similarity search. Provide these alternative \n    questions separated by newlines. Original question: {question}'''
)

multi_query_retriever = MultiQueryRetriever.from_llm(
    retriever=vectorstore.as_retriever(),
    llm=llm,
    prompt=custom_prompt
)

In [111]:
question = "What does Scorates think about death?"

docs = multi_query_retriever.get_relevant_documents(query=question)
len(docs)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. How does Socrates view the concept of death?', "2. What are Socrates' beliefs regarding death?", "3. What is Socrates' perspective on the topic of death?", '4. How does Socrates perceive the nature of death?', '5. What insights does Socrates offer on the subject of death?']


6

In [112]:
docs

[Document(page_content="It is probably true that in mainstream fifth century Greek culture,\nbelief in an afterlife of the soul was weak and unclear (Claus 1981,\n68; Burnet 1916, 248-9). If so, it is fitting that Socrates' arguments\nfor the immortality of the soul, most prominently in the\nPhaedo, are offered to interlocutors who, at the outset of the\ndiscussion, are by no means convinced of the idea. (In fact, in the\nApology, 40c, Socrates himself is presented as being\nnoncommittal about what happens to the soul at death, and even about\nwhether it survives at all.) “Men find it very hard to\nbelieve”, Cebes says at Phaedo 70a, “what you said\nabout the soul. They think that after it has left the body it no\nlonger exists anywhere, but that it is destroyed and dissolved on the\nday the man dies.” This view is restated by Simmias (at 77b) as\nthe opinion of the majority (cf. 80d); note that the view includes the\nidea that the soul is a material thing, and is destroyed by being\nd

#### LangChain's Version

In [134]:
from typing import List

from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field


# Output parser will split the LLM result into a list of queries
class LineList(BaseModel):
    # "lines" is the key (attribute name) of the parsed output
    lines: List[str] = Field(description="Lines of text")


class LineListOutputParser(PydanticOutputParser):
    def __init__(self) -> None:
        super().__init__(pydantic_object=LineList)

    def parse(self, text: str) -> LineList:
        print("Debug LLM Output:", text)  # Add this line to debug
        lines = text.strip().split("\n")
        return LineList(lines=lines)



output_parser = LineListOutputParser()

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five 
    different versions of the given user question to retrieve relevant documents from a vector 
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search. 
    Provide these alternative questions separated by newlines, i.e., with '\n'.
    Original question: {question}""",
)
llm =  ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY,model_name='gpt-3.5-turbo')

# Chain
llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)

# Other inputs
question = "What are the approaches to Task Decomposition?"

In [135]:
# Example test
test_output = "Query 1\nQuery 2\nQuery 3"
parsed_output = output_parser.parse(test_output)
print(parsed_output)


Debug LLM Output: Query 1
Query 2
Query 3
lines=['Query 1', 'Query 2', 'Query 3']


In [136]:
llm_chain.invoke("What does Scorates think about death?")

OutputParserException: Failed to parse LineList from completion 1. Got: 1 validation error for LineList
  Input should be a valid dictionary or instance of LineList [type=model_type, input_value=1, input_type=int]
    For further information visit https://errors.pydantic.dev/2.7/v/model_type

In [137]:
# Run
retriever = MultiQueryRetriever(
    retriever=vectorstore.as_retriever(), llm_chain=llm_chain
)  # "lines" is the key (attribute name) of the parsed output

# Results
unique_docs = retriever.get_relevant_documents(
    query="What does Scorates think about death?"
)
len(unique_docs)

OutputParserException: Failed to parse LineList from completion 1. Got: 1 validation error for LineList
  Input should be a valid dictionary or instance of LineList [type=model_type, input_value=1, input_type=int]
    For further information visit https://errors.pydantic.dev/2.7/v/model_type

In [124]:
retriever.get_relevant_documents()

MultiQueryRetriever(retriever=VectorStoreRetriever(tags=['Pinecone', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.pinecone.Pinecone object at 0x0000022D24B5A0C0>), llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['question'], template='You are an AI language model assistant. Your task is to generate five \n    different versions of the given user question to retrieve relevant documents from a vector \n    database. By generating multiple perspectives on the user question, your goal is to help\n    the user overcome some of the limitations of the distance-based similarity search. \n    Provide these alternative questions separated by newlines.\n    Original question: {question}'), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x0000022D24B5B2C0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x0000022D24BC4A10>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy=''), output_pa

# Adding a Generation in RAG

In [138]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

QA_PROMPT = PromptTemplate(
    input_variables=["query", "contexts"],
    template="""You are a helpful assistant who answers user queries using the
    contexts provided. If the question cannot be answered using the information
    provided say "I don't know".

    Contexts:
    {contexts}

    Question: {query}""",
)

# Chain
qa_chain = LLMChain(llm=llm, prompt=QA_PROMPT)

In [148]:
# Let's now apply the generation to the multiple docs retrieved

question1 = "Tell me about llama 2 in llms."

out = qa_chain(
    inputs={
        "query": question1,
        "contexts": "\n---\n".join([d.page_content for d in docs])
    }
)
out["text"]

"I don't know."

In [149]:
question2 = 'What does Scorates think about death?'

# Let's now apply the generation to the multiple docs retrieved

question1 = "Tell me about llama 2 in llms."

out = qa_chain(
    inputs={
        "query": question2,
        "contexts": "\n---\n".join([d.page_content for d in docs])
    }
)
out["text"]

"Socrates' views on death are complex and nuanced. In the Apology, he is presented as being noncommittal about what happens to the soul at death and whether it survives at all. However, in the Phaedo, Socrates argues for the immortality of the soul, stating that the soul is immortal because it has life essentially, similar to how fire has heat essentially. He also discusses the idea that the soul contemplates truths after its separation from the body at the time of death. Ultimately, Socrates believes in the immortality of the soul and that it is deathless."

In [150]:
out

{'query': 'What does Scorates think about death?',
 'contexts': "It is probably true that in mainstream fifth century Greek culture,\nbelief in an afterlife of the soul was weak and unclear (Claus 1981,\n68; Burnet 1916, 248-9). If so, it is fitting that Socrates' arguments\nfor the immortality of the soul, most prominently in the\nPhaedo, are offered to interlocutors who, at the outset of the\ndiscussion, are by no means convinced of the idea. (In fact, in the\nApology, 40c, Socrates himself is presented as being\nnoncommittal about what happens to the soul at death, and even about\nwhether it survives at all.) “Men find it very hard to\nbelieve”, Cebes says at Phaedo 70a, “what you said\nabout the soul. They think that after it has left the body it no\nlonger exists anywhere, but that it is destroyed and dissolved on the\nday the man dies.” This view is restated by Simmias (at 77b) as\nthe opinion of the majority (cf. 80d); note that the view includes the\nidea that the soul is a mat

The text provided includes distinct portions that directly correspond to some of the 6 provided documents using the multiretriever (`docs`). By analyzing the content of each document and comparing it to the segments in the main text, we can identify the specific documents that were directly incorporated:

1. **First Document:**

    Content: Discusses beliefs about the afterlife in fifth-century Greek culture and Socrates' arguments on the soul's immortality.
    Matches with: The first segment of the main text, which discusses the same themes, references specific dialogues (like the Phaedo and Apology), and even mentions specific passages (e.g., Phaedo 70a, 77b).
    

2. **Second Document:**

    Content: Mentions Socrates' argument that the soul is inherently immortal, a theme elaborated upon as Socrates’ final argument.
    Matches with: The segment in the main text discussing the immortality of the soul and the nature of its arguments, specifically noting it as Socrates' "most elaborate and final argument."
    

3. **Third Document:**

    Content: Addresses the broader implications and discussions surrounding the soul's powers and wisdom after death.
    Matches with: The portion of the main text that discusses the power and wisdom of the soul post-death, touching upon themes and specific dialogues mentioned in the document (Phaedo 70b; cf. 76c).


4. **Fourth Document:**

    Content: Explores the theme that the soul animates the body and is inherently alive, much like fire has heat.
    Matches with: The segment in the main text where this specific argument is detailed, including the reference to all living things having souls.
    
These four documents directly match specific segments of the provided text, which indicates they were likely sources for the content. The remaining documents, while related to Socrates or Plato's philosophies, do not have direct text matches in the provided sample and thus are not directly incorporated based on the content shared.

# Chaining Everything with a SequentialChain

We can pull together the logic above into a function or set of methods, whatever is prefered — however if we'd like to use LangChain's approach to this we must "chain" together multiple chains. The first retrieval component is (1) not a chain per se, and (2) requires processing of the output. To do that, and fit with LangChain's "chaining chains" approach, we setup the _retrieval_ component within a `TransformChain`:

In [151]:
from langchain.chains import TransformChain

def retrieval_transform(inputs: dict) -> dict:
    docs = retriever.get_relevant_documents(query=inputs["question"])
    docs = [d.page_content for d in docs]
    docs_dict = {
        "query": inputs["question"],
        "contexts": "\n---\n".join(docs)
    }
    return docs_dict

retrieval_chain = TransformChain(
    input_variables=["question"],
    output_variables=["query", "contexts"],
    transform=retrieval_transform
)

Now we chain this with our generation step using the `SequentialChain`:

In [152]:
from langchain.chains import SequentialChain

rag_chain = SequentialChain(
    chains=[retrieval_chain, qa_chain],
    input_variables=["question"],  # we need to name differently to output "query"
    output_variables=["query", "contexts", "text"]
)

In [158]:
question1 = "Tell me about llama 2 in llms."
question2 = 'What does Scorates think about death?'

out1 = rag_chain({"question": question1})
print("Question: ", out1["question"])
print("Answer: ", out1["text"])
print('\n')

out2 = rag_chain({"question": question2})
print("Question: ", out2["question"])
print("Answer: ", out2["text"])

INFO:langchain.retrievers.multi_query:Generated queries: ['1. What information can you provide about llama 2 within the context of llms?', '2. Can you share details about the second llama in the llms dataset?', "3. I'm interested in learning more about llama 2 specifically in relation to llms."]


Question:  Tell me about llama 2 in llms.
Answer:  I don't know.




INFO:langchain.retrievers.multi_query:Generated queries: ['1. How does Socrates view the concept of death?', "2. What are Socrates' beliefs regarding death?", "3. Can you provide insights into Socrates' perspective on death?"]


Question:  What does Scorates think about death?
Answer:  Socrates' views on death are complex and nuanced. In the Apology, he is presented as being noncommittal about what happens to the soul at death and whether it survives at all. However, in the Phaedo, Socrates argues for the immortality of the soul, stating that the soul is immortal because it has life essentially, much like fire has heat essentially. He also discusses the idea that the soul contemplates truths after its separation from the body at the time of death. Ultimately, Socrates believes in the immortality of the soul and that it is deathless.
