### Project: Question-Anwering on Private Documents(RAG)   

RAG - Retrieval Augemented Generation  
    - It helps overcome knowledge limits, make answers more factual, and lets the model handle complex questions

How can LLMs learn new knowledge?
1. Fine-tuning on a training set(Expensive)
2. Model inputs(Better way to build short term memory)

The recommended approach is to use a model inputs with embedded-based search.

Question_Answering Pipeline
1. Prepare the document(once per document)   

   a. Load the data into LangChain Documents.  
   b. SPlit the documents into chunks.   
   c. Embed the chucks into numeric vectors.   
   d. Save the chunks and the embeddings to a vector database.   

1. Search (once per query)   
   a. Embed the user's question.   
   b. Using the question's embedding and the chunk embeddings, rank the vectors similarity to the questions's embedding. The nearest vectors
   represent chunks similar to the question.   

2. Ask(once per query)   
    a. Insert the question and the most relevant chunks into a message to a GPT model   
    b. Return GPT's answer   

### Installing requrements

In [None]:
pip install -r requirements.txt -q

In [None]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

#### Loading Documents

In [None]:
def load_document(file):
    name, extension = os.path.splitext(file)
    Loader = ''
    if extension == '.pdf':
        from langchain.document_loaders import PyPDFLoader
        Loader = PyPDFLoader
    elif extension == '.docx':
        from langchain.document_loaders import Docx2txtLoader
        Loader = Docx2txtLoader
    else:
        print('Document format not supported!')
        return
    print(f'{file}')
    loader = Loader(file)
    data = loader.load()
    return data

In [None]:
def load_from_wikipedia(query, lang='en', load_max_docs=2):
    from langchain.document_loaders import WikipediaLoader
    loader = WikipediaLoader(query=query, lang=lang, load_max_docs=load_max_docs)
    data = loader.load()
    return data

#### Deleting Pinecone Indexes

In [None]:
def delete_pinecone_index(index_name='all'):
    import pinecone
    
    pc = pinecone.Pinecone()
    if index_name == 'all':
        indexes = pc.list_indexes().names()
        print('Deleting indexes ...')
        for ind in indexes:
            pc.delete_index(ind)
        print('Deleted')
    else:
        print(f'Deleting index {index_name} ...', end='')
        pc.delete_index(index_name)

### Embedding and uploading to a Vector Database(Pinecone)

In [None]:
def insert_or_fetch_embeddings(index_name, chunks):
    import pinecone
    from langchain_community.vectorstores import Pinecone
    from langchain_openai import OpenAIEmbeddings
    from pinecone import PodSpec
    
    pc = pinecone.Pinecone()
    embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536)
    
    if index_name in pc.list_indexes().names():
        vector_store = Pinecone.from_existing_index(index_name, embeddings)
        print('Done!!')
    else:
        print(f'Creating index {index_name} and embeddings....', end='')
        pc.create_index(
            name=index_name,
            dimension=1536,
            metric='cosine',
            spec=PodSpec(environment='gcp-starter')
        )
        vector_store = Pinecone.from_documents(chunks, embeddings, index_name=index_name)
        print('Done')
        return vector_store

### Generating Answer

In [None]:
def ask_and_get_answer(vector_store, question):
    from langchain.chains import RetrievalQA
    from langchain_openai import ChatOpenAI
    
    llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=1)
    retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})
    chain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)
    answer = chain.run(question)
    return answer

#### Chunking data

In [None]:
def chunk_data(data, chunk_size=256):
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
    chunks = text_splitter.split_documents(data)
    return chunks

#### Calculating Cost

In [None]:
from utils import calculate_embedding_cost

data  = load_document('../files/us_constitution.pdf')
chunks = chunk_data(data)
# print(len(chunks))
# print(chunks[10].page_content)
print(calculate_embedding_cost(chunks))

### Creating index and Vectors(Pinecone)

In [None]:
delete_pinecone_index() # Using a free plan therefore I can only have a single index at a time. Thereby necessitating need to delete index before creating a new one.
index_name = 'qadocument'
vector_store = insert_or_fetch_embeddings(index_name=index_name, chunks=chunks)

### Running Q&A

In [None]:
import time
i = 1
print('Write QUit ir Exit to quit')

while True:
    question = input (f'Question #{i}: ')
    i += 1
    if question.lower() in ['quit', 'exit']:
        print('Quitting .... bye bye!')
        time.sleep(2)
        break
    answer = ask_and_get_answer(vector_store, question)
    print(f'\nAnswer: {answer}')
    print(f'\n {"-" * 50} \n')
    

In [None]:
delete_pinecone_index()

### Loading from public(Wikipedia) and creating Vectors

In [None]:

data = load_from_wikipedia('ChatGPT', 'en')
chunks = chunk_data(data)
index_name = 'chatgpt'
vector_store = insert_or_fetch_embeddings(index_name, chunks)


In [None]:
question = 'What is chatGPT'
answer = ask_and_get_answer(vector_store, question)
print(answer)

### Using Chroma Vector Database

In [None]:
pip install chroma -q

In [None]:
pip install chromadb -q

In [None]:
def create_embeddings_chroma(chunks, persistent_directory='./chroma_db'):
    from langchain.vectorstores import Chroma
    from langchain_openai import OpenAIEmbeddings
    
    embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536)
    vector_store = Chroma.from_documents(chunks, embeddings, persist_directory=persistent_directory)
    return vector_store

In [None]:
data = load_document('../files/rag_powered_by_google_search.pdf')
chunks = chunk_data(data, chunk_size=256)
vector_store = create_embeddings_chroma(chunks)

In [None]:
q = 'What is the Vertext AI search'
answer = ask_and_get_answer(vector_store, q)

print(answer)

In [None]:
q = 'How many pairs of questions and answres had thr StackOverflow dataset?'
answer = ask_and_get_answer(vector_store, q)
print(answer)

In [None]:
q = 'Multiply the number by 2'
answer = ask_and_get_answer(vector_store, q)
print(answer)

### Adding Memory (Chat History)

In [None]:
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 5})
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

system_template = r'''
Use the following context to answer the user questions.
if you don't know the answer, say `I have no idea'
---------------------------------------------------
Context: ```{context}```
'''

user_template = '''
Question: ```{question}```
Chat History: ```{chat_history}```
'''

messages = [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template(user_template)
]

qa_prompt = ChatPromptTemplate.from_messages(messages)

crc = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    chain_type='stuff',
    combine_docs_chain_kwargs={'prompt': qa_prompt},
    verbose=True
)

In [None]:
print(qa_prompt)

In [None]:
def ask_question(q, chain):
    result = chain.invoke({'question': q})
    return result

In [None]:
from utils import generate_vector_store
vector_store = generate_vector_store('../files/rag_powered_by_google_search.pdf')

In [None]:
q = 'How many pairs of questions and answers had the StackOverflow dataset?'
result = ask_question(q, crc)
print(result)

In [None]:
q = 'Multiply the number by 2'
result = ask_question(q, crc)
print(result)

In [None]:
q = 'Divide the result by 4'
result = ask_question(q, crc)
print(result)

In [None]:
for chat in result['chat_history']:
    print(chat)
    print('-'*50)

### Using Custom Prompt

In [None]:
q = 'How many pairs of questions and answers had the StackOverflow dataset?'
result = ask_question(q, crc)
print(result)

In [None]:
q = 'When was Elon Musk born?'
result = ask_question(q, crc)
print(result)