In [2]:
import os
import openai
import sys

sys.path.append('../..')

from dotenv import load_dotenv
load_dotenv()

openai.api_key  = os.environ['OPENAI_API_KEY']

In [3]:
# Load pdf

from langchain.document_loaders import PyPDFLoader

# Load PDF
loaders = [
    # Duplicate documents on purpose - messy data
    PyPDFLoader("../notebooks/data/faiss_meta_paper.pdf")
]
docs = []
for loader in loaders:
    docs.extend(loader.load())

In [4]:
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)

In [5]:
splits = text_splitter.split_documents(docs)
len(splits)

83

## Embeddings


In [6]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

  warn_deprecated(


In [7]:
from langchain.vectorstores import Chroma
persist_directory = 'notebooks/data'

In [8]:
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding,
    persist_directory=persist_directory
)

In [9]:
print(vectordb._collection.count())

166


In [10]:
vectordb.persist()

In [11]:
question = "what is faiss?"
docs = vectordb.similarity_search(question,k=5)

len(docs)


5

In [12]:
import datetime
current_date = datetime.datetime.now().date()
if current_date < datetime.date(2023, 9, 2):
    llm_name = "gpt-3.5-turbo-0301"
else:
    llm_name = "gpt-3.5-turbo"
print(llm_name)

gpt-3.5-turbo


In [13]:
print(vectordb._collection.count())

166


In [14]:
question = "what is faiss?"
docs = vectordb.similarity_search(question,k=5)

len(docs)


5

In [15]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model_name=llm_name, temperature=0)

In [16]:
from langchain.chains import RetrievalQA

In [17]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever()
)

In [18]:
result = qa_chain({"query": question})
result["result"]

  warn_deprecated(


'Faiss is a library dedicated to vector similarity search, which is a core functionality of vector databases. It is a toolkit of indexing methods and related primitives used to search, cluster, compress, and transform vectors. Faiss is designed to manage large collections of embedding vectors efficiently, especially as the number of embeddings in AI applications grows rapidly. It is used for tasks like content moderation and removing duplicates from large datasets.'

## Prompt

In [19]:
from langchain.prompts import PromptTemplate

# Build prompt
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)


In [20]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [21]:
question = "what are indexing methods"

result = qa_chain({"query": question})
result["result"]


'Indexing methods are techniques used to efficiently organize and search through data. Thanks for asking!'

In [22]:
result["source_documents"][0]

Document(page_content='essary: depending on the usage constraints, the most\nefficient indexing methods are different.\nLet us also summarize what Faiss is not: Faiss does\nnot extract features – it only indexes embeddings that\nhave been extracted by a different mechanism; Faiss\nis not a service – it only provides functions that are\n1arXiv:2401.08281v1  [cs.LG]  16 Jan 2024', metadata={'page': 0, 'source': '../notebooks/data/faiss_meta_paper.pdf'})

## RetrievalQA chain types

In [23]:
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    chain_type="map_reduce"
)

In [24]:
result = qa_chain_mr({"query": question})

result["result"]

'Indexing methods are techniques used to efficiently organize and retrieve data in a database or information retrieval system. These methods help speed up the process of searching for specific information by creating a structured index that allows for quick access to the data. Different indexing methods can be used depending on the specific requirements and constraints of the system in question. Some common indexing methods include the Inverted File (IVF) indexing technique and the hierarchical k-means method, which are used in the context of vector search to organize and structure database vectors for efficient retrieval.'

## Chat


In [25]:
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name=llm_name, temperature=0)
llm.predict("Hello world!")

  warn_deprecated(
  warn_deprecated(


'Hello! How can I assist you today?'

In [27]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Run chain
from langchain.chains import RetrievalQA
question = "Explain refining in the simplest words"
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vectordb.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})


result = qa_chain({"query": question})
result["result"]

'Refining involves combining a fast but inaccurate indexing method with a slower and more accurate search. The fast index retrieves a shortlist of results, and the accurate search computes more precise results for that shortlist. This method improves search accuracy while maintaining efficiency. Thanks for asking!'

### Memory

In [28]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

### ConversationalRetrievalChain

In [30]:
from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [31]:
question = "Explain refining in the simplest words"
result = qa({"question": question})
result['answer']

'Refining is a process where a fast but less accurate method is used first to get a quick result, and then a slower but more accurate method is used to improve that result.'

In [32]:
question = "why is it used then?"
result = qa({"question": question})
result['answer']

'Refining is used to combine a fast and inaccurate indexing method with a slower and more accurate search. This approach involves querying the fast index to retrieve a shortlist of results, and then using the more accurate search to compute more precise search results only for the shortlist. This method allows for a balance between speed and accuracy in the search process.'

In [33]:
question = "is there any other alternate methods"
result = qa({"question": question})
result['answer']

'Yes, there are alternative methods available for refining indexing methods. Some methods combine a fast and inaccurate indexing method with a slower and more accurate search. This involves querying the fast index to retrieve a shortlist of results and then using the accurate search to compute more accurate results only for the shortlist. Other methods are based on the same refining principle but do not use two separate indexes. Instead, they use two ways of interpreting the same compressed vectors: a fast and inaccurate decoding method and a slower but more accurate method.'

### Create a chatbot that works on your documents