### Project: Question-Answering on Private Documents (RAG)

In [None]:
import os

# Langchain imports
from dotenv import load_dotenv, find_dotenv # type: ignore
from langchain.document_loaders import PyPDFLoader # type: ignore
from langchain.document_loaders import Docx2txtLoader # type: ignore
from langchain.document_loaders import TextLoader # type: ignore
from langchain.text_splitter import RecursiveCharacterTextSplitter # type: ignore
from langchain.chains import RetrievalQA # type: ignore
from langchain_openai import ChatOpenAI # type: ignore
from langchain_openai import OpenAIEmbeddings # type: ignore
from langchain.document_loaders import WikipediaLoader # type: ignore

# Importing tiktoken
import tiktoken # type: ignore

# Importing the necessary libraries for the Pinecone client
import pinecone # type: ignore
from langchain_community.vectorstores import Pinecone # type: ignore
from pinecone import PodSpec # type: ignore

# Importing the necessary libraries for the ChromaDB client
from langchain.vectorstores import Chroma # type: ignore


load_dotenv(find_dotenv(), override=True)

### Loading Documents

In [None]:
# Loading PDF, DOCX and TXT files as LangChain Documents
def load_document(file):
    
    name, extension = os.path.splitext(file)

    if extension == '.pdf':
        print(f'Loading {file}')
        loader = PyPDFLoader(file)
    
    elif extension == '.docx':    
        print(f'Loading {file}')
        loader = Docx2txtLoader(file)
    
    elif extension == '.txt':
        loader = TextLoader(file)
    
    else:
        print('Document format is not supported!')
        return None

    # Load the document
    # loader.load(): returns a 'List' of LangChain documents, where each page is a separate Langchain document.
    # You can access the content of a particular page using the following command: data[index_number].page_content
    # To access metadata of a particular page, use the following command: data[index_number].metadata
    data = loader.load()
    
    # Return the loaded document
    return data

### Function to Load data from Wikipedia (Online Services)

In [None]:
# Function to Load Wikipedia
# query: is the text used to find content in the Wikipedia
def load_from_wikipedia(query, lang='en', load_max_docs=2):
    
    # Creating a Wikipedia Loader object
    loader = WikipediaLoader(query=query, lang=lang, load_max_docs=load_max_docs)
    
    # Load and Return the data
    data = loader.load()
    return data

### Chunking Data

In [None]:
# Function to chunk the data
def chunk_data(data, chunk_size=256):
    
    # Define the Text Splitter Strategy
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
    
    # Split the data into chunks
    chunks = text_splitter.split_documents(data)
    
    # Return the chunks
    return chunks

### Calculating Cost

In [None]:
def print_embedding_cost(texts):
    enc = tiktoken.encoding_for_model('text-embedding-3-small')
    total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])
    print(f'Total Tokens: {total_tokens}')
    print(f'Embedding Cost in USD: {total_tokens / 1000 * 0.00002:.6f}')

### Embedding and Uploading to a Vector Database (Pinecone)

In [None]:
def insert_or_fetch_embeddings(index_name, chunks):
    
    # initializing the Pinecone client and the OpenAI embeddings
    pc = pinecone.Pinecone()
        
    # initializing the OpenAI embeddings
    embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536)  # 512 works as well

    # loading from existing index
    if index_name in pc.list_indexes():
        print(f'Index {index_name} already exists. Loading embeddings ... ', end='')
        vector_store = Pinecone.from_existing_index(index_name, embeddings)
        print('Ok')
    
    else:
        # creating the index and embedding the chunks into the index 
        print(f'Creating index {index_name} and embeddings ...', end='')

        # creating a new index
        pc.create_index(
            name=index_name,
            dimension=1536,
            metric='cosine',
            spec=PodSpec(
                environment='gcp-starter'
            )
        )

        # processing the input documents, generating embeddings using the provided `OpenAIEmbeddings` instance,
        # inserting the embeddings into the index and returning a new Pinecone vector store object. 
        vector_store = Pinecone.from_documents(chunks, embeddings, index_name=index_name)
        print('Ok')
        
    return vector_store
    

### Function to delete all the Pinecone Indexes

In [None]:
def delete_pinecone_index(index_name='all'):

    # initializing the Pinecone client
    pc = pinecone.Pinecone()
    
    if index_name == 'all':
        indexes = pc.list_indexes().names()
        print('Deleting all indexes ... ')
        for index in indexes:
            pc.delete_index(index)
        print('Ok')
    else:
        print(f'Deleting index {index_name} ...', end='')
        pc.delete_index(index_name)
        print('Ok')
    

### Function for Asking and Getting Answers

In [None]:
def ask_and_get_answer(vector_store, userQues, k=3):
    
    # initializing the OpenAI language model
    llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=1)

    # initializing the retriever
    retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': k})

    # initializing the LangChain retrieval QA chain
    chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
    
    # Invoking the chain with the question
    answer = chain.invoke(userQues)

    # returning the answer
    return answer
    

### Using Chroma as a Vector DB

#### Function to create chroma embeddings

In [None]:
def create_embeddings_chroma(chunks, persist_directory='./chroma_db'):

    # Instantiate an embedding model from OpenAI (smaller version for efficiency)
    embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536)  

    # Create a Chroma vector store using the provided text chunks and embedding model, 
    # configuring it to save data to the specified directory 
    vector_store = Chroma.from_documents(chunks, embeddings, persist_directory=persist_directory) 

    return vector_store  # Return the created vector store


#### Function to load chroma embeddings

In [None]:
def load_embeddings_chroma(persist_directory='./chroma_db'):

    # Instantiate the same embedding model used during creation
    embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536) 

    # Load a Chroma vector store from the specified directory, using the provided embedding function
    vector_store = Chroma(persist_directory=persist_directory, embedding_function=embeddings) 

    return vector_store  # Return the loaded vector store


#### Running Code

In [None]:
# Loading the pdf document into LangChain 
data = load_document('files/rag_powered_by_google_search.pdf')

# Splitting the document into chunks
chunks = chunk_data(data, chunk_size=256)

# Creating a Chroma vector store using the provided text chunks and embedding model (default is text-embedding-3-small)
vector_store = create_embeddings_chroma(chunks)

In [None]:
# Asking questions
userQuestion = 'What is Vertex AI Search?'
answer = ask_and_get_answer(vector_store, userQuestion)
print(answer)

In [None]:
print(answer['result'])

#### Code to load an existing Chroma vector store from the specified directory

In [None]:
# Load a Chroma vector store from the specified directory (default ./chroma_db) 
db = load_embeddings_chroma()

# User Query
userQuestion = 'How many pairs of questions and answers had the StackOverflow dataset?'

# Get the LLM answer
answer = ask_and_get_answer(vector_store, userQuestion)
print(answer)

In [None]:
# We can't ask follow-up questions. There is no memory (chat history) available.
userQuestion = 'Multiply that number by 2.'
answer = ask_and_get_answer(vector_store, userQuestion)
print(answer['result'])

#### Adding Memory (Chat History)

In [None]:
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain  # Import class for building conversational AI chains 
from langchain.memory import ConversationBufferMemory  # Import memory for storing conversation history

# Instantiate a ChatGPT LLM (temperature controls randomness)
llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)  

# Configure vector store to act as a retriever (finding similar items, returning top 5)
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 5})  


# Create a memory buffer to track the conversation
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# initializing the LangChain retrieval QA chain with Memory
crc = ConversationalRetrievalChain.from_llm(
    llm=llm,  # Link the ChatGPT LLM
    retriever=retriever,  # Link the vector store based retriever
    memory=memory,  # Link the conversation memory
    chain_type='stuff',  # Specify the chain type
    verbose=False  # Set to True to enable verbose logging for debugging
)


In [None]:
# create a function to ask questions
def ask_question(q, chain):
    result = chain.invoke({'question': q})
    return result

In [None]:
data = load_document('files/rag_powered_by_google_search.pdf')
chunks = chunk_data(data, chunk_size=256)
vector_store = create_embeddings_chroma(chunks)

In [None]:
q = 'How many pairs of questions and answers had the StackOverflow dataset?'
result = ask_question(q, crc)
print(result)

In [None]:
print(result['answer'])

In [None]:
q = 'Multiply that number by 10.'
result = ask_question(q, crc)

In [None]:
print(result['answer'])

In [None]:
q = 'Devide the result by 80.'
result = ask_question(q, crc)
print(result['answer'])


In [None]:
for item in result['chat_history']:
    print(item)

### Loop for asking questions

In [None]:
while True:
    q = input('Your question: ')
    if q.lower() in 'exit quit bye':
        print('Bye bye!')
        break
    result = ask_question(q, crc)
    print(result['answer'])
    print('-' * 100)
    

### Using a Custom Prompt - This is the Main Code!

In [None]:
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 5})
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)


system_template = r'''
Use the following pieces of context to answer the user's question.
Before answering translate your response to Spanish.
If you don't find the answer in the provided context, just respond "I don't know."
---------------
Context: ```{context}```
'''

user_template = '''
Question: ```{question}```
'''

messages= [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template(user_template)
]

qa_prompt = ChatPromptTemplate.from_messages(messages)

crc = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    chain_type='stuff',
    combine_docs_chain_kwargs={'prompt': qa_prompt},
    verbose=True
)

In [None]:
print(qa_prompt)

In [None]:
# Load the existing Chroma Vector Store
db = load_embeddings_chroma()
q = 'How many pairs of questions and answers had the StackOverflow dataset?'
result = ask_question(q, crc)
print(result)

### Loop for asking questions

In [None]:
while True:
    q = input('Your question: ')
    if q.lower() in 'exit quit bye':
        print('Bye bye!')
        break
    result = ask_question(q, crc)
    print(result['answer'])
    print('-' * 100)
    