In [None]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

In [None]:
def load_document(file_path):
    """
    This function is used to load documents from different sources
    :param file_path: The path to the file
    :return: Returns the document's content
    """
    loader = None
    name, extension = os.path.splitext(file_path)
    if extension == '.pdf':
        from langchain.document_loaders import PyPDFLoader
        print(f'Loading {file_path}')
        loader = PyPDFLoader(file_path)
    elif extension == '.docx':
        from langchain.document_loaders import Docx2txtLoader
        print(f'Loading {file_path}')
        loader = Docx2txtLoader(file_path)
    else:
        print(f'The file extension of {extension} does not supported')
    return loader.load()

# Wikipedia
def load_from_wikipedia(query, lang='en', load_max_docs=2):
    """
    This function is used to load documents from Wikipedia
    :param query: is the text which is used to find docs
    :param lang: is used to search in a specific language
    :param load_max_docs: load_max_docs is to limit the number of downloaded docs.
    :return: Returns a list of documents
    """
    from langchain.document_loaders import WikipediaLoader
    loader = WikipediaLoader(query=query, lang=lang, load_max_docs=load_max_docs)
    return loader.load()

In [None]:
def chunk_data(data, chunk_size=256) -> list:
    """
    This function is used to chunk the data into smaller pieces
    :param data: The data to be chunked
    :param chunk_size: The size of the chunk
    :return: Returns a list of chunks of the data
    """
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
    return text_splitter.split_documents(data)

In [None]:
def print_embedding_cost(texts):
    """
    This function is used to print the embedding cost
    :param texts: The texts to be embedded
    :return: Returns the embedding cost
    """
    import tiktoken
    enc = tiktoken.encoding_for_model('text-embedding-ada-002')
    total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])
    print(f'Total Tokens: {total_tokens}')
    print(f'Embedding Cost in USD: {total_tokens / 1000 * 0.0004:.6f}')

#### Embedding and Uploading to a Vector Database (Pinecone)

In [None]:
def insert_or_fetch_embeddings(index_name: str, chunks: list):
    """
    This function is used to insert or fetch the embeddings from the vector database
    :param index_name: The name of the index
    :param chunks: The chunks to be inserted or fetched
    :return: vector store
    """
    import pinecone
    from langchain_community.vectorstores import Pinecone
    from langchain_openai import OpenAIEmbeddings
    from pinecone import PodSpec
     
    pc = pinecone.Pinecone()
    embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536)
    
    if index_name in pc.list_indexes().names():
        print(f'Index {index_name} already exists. Loading embeddings ...', end='')
        vector_store = Pinecone.from_existing_index(index_name, embeddings)
        print('Ok')
    else:
        print(f'Creating index {index_name} and use existing embeddings...', end='')
        pc.create_index(
            name=index_name,
            dimension=1536,
            metric='cosine',
            spec=PodSpec(environment='gcp-starter')
        )
        vector_store = Pinecone.from_documents(chunks, embeddings, index_name=index_name)
        print('Ok')
    return vector_store

In [None]:
def delete_pinecone_index(index_name='all'):
    """
    This function is used to delete the Pinecone index
    :param index_name: The name of the index
    :return: None
    """
    import pinecone
    pc = pinecone.Pinecone()
    if index_name == 'all':
        indexes = pc.list_indexes().names()
        print(f'Deleting all indexes ....')
        for index in indexes:
            pc.delete_index(index)
            print('Ok')
    else:
        print(f'Deleting index {index_name}...', end='')
        pc.delete_index(index_name)
        print('Ok')

In [None]:
def ask_and_get_answer(vector_store, q):
    """
    This function is used to ask a question and get an answer
    :param vector_store: The vector store
    :param q: The question
    :return: Returns the answer
    """
    from langchain.chains import RetrievalQA
    from langchain_openai import ChatOpenAI
    
    llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=1)
    retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})
    chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
    
    return chain.invoke(q)

## Running Code

In [None]:
data = load_document('files/us_constitution.pdf')
# print(data[1].page_content)
# print(data[10].metadata)

print(f'You have {len(data)} pages in your data')
print(f'There are {len(data[20].page_content)} characters in the page')

In [None]:
chunks = chunk_data(data=data)
print(len(chunks))
# print(chunks[10].page_content)

In [None]:
print(chunks[10].page_content)

In [None]:
print_embedding_cost(chunks)

In [None]:
data = load_document('files/the_great_gatsby.docx')
print(data[0].page_content)

In [None]:
data = load_from_wikipedia(query="GPT-4", lang='de')
print(data[0].page_content)

#### Deleting all indexes  

In [None]:
delete_pinecone_index()

#### Create an index, and then create embeddings from document chunks and then uplaod both chunks and embeddings to Pinecone

In [None]:
# Create an index on Pinecone
index_name = 'your_index_name'
vector_store = insert_or_fetch_embeddings(index_name=index_name, chunks=chunks)

In [None]:
q = 'What is the whole document about?'
answer = ask_and_get_answer(vector_store, q)
print(answer['result'])

In [None]:
# Ask questions continuously.
import time
i = 1
print('Write Quit or Exit to quit')
while True:
    q = input(f'Question #{i}: ')
    i += 1
    if q.lower() in ['quit', 'exit']:
        print('Quitting ... bye bye!')
        time.sleep(2)
        break
    answer = ask_and_get_answer(vector_store, q)
    print(f'Question: {answer['query']}')
    print(f'\nAnswer: {answer['result']}')
    print(f'\n {"-" * 50} \n')

In [None]:
data = load_from_wikipedia('ChatGPT', 'ro')
chunks = chunk_data(data)
index_name='your_index_name'
vector_store = insert_or_fetch_embeddings(index_name, chunks)

In [None]:
q = "Ce este ChatGPT?"
answer = ask_and_get_answer(vector_store, q)
print(answer['result'])

## Using Chroma as a Vector DB

In [None]:
# pip install -q chromadb
# If it is not installed, you can install it by running the following command
# export HNSWLIB_NO_NATIVE=1

In [None]:
def create_embeddings_chroma(chunks, persist_directory='./chroma_db'):
    """
    This function is used to create embeddings from document chunks and then upload both chunks and embeddings to Chroma and return the vector store object
    :param chunks: The chunks to be inserted or fetched 
    :param persist_directory: The directory to persist the embeddings
    :return: Returns vector store object
    """
    from langchain_community.vectorstores import Chroma
    from langchain_openai import OpenAIEmbeddings
    
    embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536)
    return Chroma.from_documents(chunks, embeddings, persist_directory=persist_directory)

def load_embeddings_chroma(persist_directory='./chroma_db'):
    """
    This function is used to load the existing embeddings from disk to a vector store object
    :param persist_directory: The directory to persist the embeddings
    :return: Returns a vector store object from an existing embeddings
    """
    from langchain_community.vectorstores import Chroma
    from langchain_openai import OpenAIEmbeddings
    
    embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536)
    return Chroma(persist_directory=persist_directory, embedding_function=embeddings)

In [None]:
data = load_document('files/rag_powered_by_google_search.pdf')
chunks = chunk_data(data, chunk_size=256)
vector_store = create_embeddings_chroma(chunks=chunks)

In [None]:
q = "What is Vertex AI Search?"
answer = ask_and_get_answer(vector_store=vector_store, q=q)
print(answer)

In [None]:
db = load_embeddings_chroma()
q = 'How many pairs of questions and answers had the StackOverflow dataset?'
answer = ask_and_get_answer(vector_store=vector_store, q=q)
print(answer)

## Adding Memory (Chat History)

In [None]:
from langchain_openai import ChatOpenAI
# This chain is used to have a conversation based on the retrieved documents.
from langchain.chains import ConversationalRetrievalChain
# ConversationBufferMemory is for storing conversation into buffer
from langchain.memory import ConversationBufferMemory

llm = ChatOpenAI(model_name='gpt-4-turbo-preview', temperature=0)
# A retriever is a crucial component that helps LLM find and access relevant information. Its aim is to search for relevant data and retrieve the information. The below code retrieves the top k most similar chunks of data. 
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 5})
# `chat_history` is a label for the memory. It is used when interacting with the stored conversation.
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
# The below code is used to create a conversational retrieval chain. It is used to have a conversation based on the retrieved documents.
crc = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    chain_type='stuff', # means, use all the text from documents
    verbose=True
)

In [None]:
def ask_question(q, chain):
    """
    This function is used to ask a question and get an answer
    :param q: The question
    :param chain: The chain
    :return: Returns the answer
    """
    return chain.invoke({'question': q})

In [None]:
data = load_document('files/rag_powered_by_google_search.pdf')
chunks = chunk_data(data, chunk_size=256)
vector_store = create_embeddings_chroma(chunks=chunks)

In [None]:
q = 'How many pairs of questions and answers had the StackOverflow dataset?'
result = ask_question(q, crc)
print(result)

In [None]:
print(result['answer'])

##### Let's test if it remembers the last question.

In [None]:
q = 'Multiply that number by 10.'
result = ask_question(q, crc)
print(result)
# It uses the previous answer arguments for the next answer by saving `chat_history`. 

##### Display the chat_history that contains all the questions and their answers, iterate over the content of the chat history key as follows


In [None]:
for item in result['chat_history']:
    print(item)

## Using a Custom Prompt

In [None]:
from langchain_openai import ChatOpenAI
# This chain is used to have a conversation based on the retrieved documents.
from langchain.chains import ConversationalRetrievalChain
# ConversationBufferMemory is for storing conversation into buffer
from langchain.memory import ConversationBufferMemory

from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

llm = ChatOpenAI(model_name='gpt-4-turbo-preview', temperature=0)
# A retriever is a crucial component that helps LLM find and access relevant information. Its aim is to search for relevant data and retrieve the information. The below code retrieves the top k most similar chunks of data. 
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 5})
# `chat_history` is a label for the memory. It is used when interacting with the stored conversation.
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

system_template = r'''
Use the following pieces of context to answer the user's question.
If you don't find the answer in the provided context, just respond "I don't know."
-------------------
Context: ```{context}```
'''

user_template = r'''
Question: ```{question}```
Chat History: ```{chat_history}```
'''

messages = [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template(user_template)
]

# Question-Answer Prompt
qa_prompt = ChatPromptTemplate.from_messages(messages)

crc = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    chain_type='stuff', # means, use all the text from documents
    combine_docs_chain_kwargs={ 'prompt': qa_prompt },
    verbose=True
)

In [None]:
print(qa_prompt)

In [None]:
db = load_embeddings_chroma()
q = 'When was Bill Gates born?'
result = ask_question(q, crc)
print(result)

In [None]:
q = 'How many pairs of questions and answers had the StackOverflow dataset?'
result = ask_question(q, crc)
print(result)

In [None]:
q = 'Multiply that number by 10.'
result = ask_question(q, crc)
print(result)

In [None]:
for item in result['chat_history']:
    print(item)