#Document based QA Bot with Large Language Models

In [2]:
!pip install --quiet --no-cache-dir llama-index pinecone-client openai transformers

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/607.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/607.9 kB[0m [31m1.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m607.9/607.9 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/179.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.1/179.1 kB[0m [31m278.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.6/73.6 kB[0m [31m242.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m246.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m254.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━

In [15]:
# Step 1: Import required libraries
# ---------------------------------
import os
from pathlib import Path
from llama_index import Document, download_loader, GPTVectorStoreIndex, StorageContext, ServiceContext
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.vector_stores import PineconeVectorStore
import openai
import pinecone

In [17]:
# Step 2: Load documents using Llama-Index
# -----------------------------------------
# A PDFReader is downloaded from Llama-Index to read the contents of the PDF document.
PDFReader = download_loader("PDFReader")

# The PDFReader is then used to load the PDF document and store the content into the 'documents' variable.
loader = PDFReader()
documents = loader.load_data(file=Path('/content/blockchain-book.pdf'))

In [None]:
# Step 3: Setup OpenAI environment
# --------------------------------
# Set up the OpenAI environment using the API key.
os.environ['OPENAI_API_KEY'] = "your_openai_api_key"
openai.api_key = "your_openai_api_key"

In [None]:
# Step 4: Setup Pinecone environment
# ----------------------------------
# Set up the Pinecone environment using the API key and environment details.
os.environ['PINECONE_API_KEY'] = 'your_pinecone_api_key'
os.environ['PINECONE_ENVIRONMENT'] = 'your_pinecone_environment'

# Initialize connection to Pinecone
pinecone.init(
    api_key=os.environ['PINECONE_API_KEY'],
    environment=os.environ['PINECONE_ENVIRONMENT']
)

# Create an index in Pinecone if it does not exist already. This index will store the document embeddings.
index_name = 'docindex'
if index_name not in pinecone.list_indexes():
    pinecone.create_index(
        index_name,
        dimension=1536,
        metric='cosine'
    )

# Connect to the created index in Pinecone
pinecone_index = pinecone.Index(index_name)

In [None]:
# Step 5: Setup Vector Store with Pinecone
# ----------------------------------------
# The Pinecone index is used to create a PineconeVectorStore, which will store the document embeddings.
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

In [None]:
# Step 6: Setup Storage and Service Context
# ------------------------------------------
# The StorageContext is set up using the vector store. It specifies where the document embeddings will be stored.
storage_context = StorageContext.from_defaults(
    vector_store=vector_store
)

# The ServiceContext is set up using OpenAI's text embedding model. It specifies how the documents will be embedded.
embed_model = OpenAIEmbedding(model='text-embedding-ada-002', embed_batch_size=100)
service_context = ServiceContext.from_defaults(embed_model=embed_model)


In [None]:
# Step 7: Indexing documents
# --------------------------
# The documents are indexed using the GPTVectorStoreIndex. This involves embedding the documents using the embed_model
# specified in the ServiceContext, and storing the embeddings in the vector store specified in the StorageContext.
index = GPTVectorStoreIndex.from_documents(
    documents, storage_context=storage_context,
    service_context=service_context
)

In [None]:
# Step 8: Querying the index
# ---------------------------
# The index can now be used to answer queries. The queries are embedded using the same embed_model, and the embeddings
# are compared to the document embeddings in the vector store to find the most similar documents.
query_engine = index.as_query_engine()

In [13]:
query_engine = index.as_query_engine()
res = query_engine.query("What is consensus in blockchain")
print(res)


Consensus in blockchain is the process of reaching agreement among all participants in a network on the validity of a transaction. It is achieved through a variety of consensus algorithms, such as Dev-Mode, PoET (Proof of Elapsed Time), and others. These algorithms are used to validate transactions and ensure that all participants in the network agree on the validity of the transaction.


In [14]:
res = query_engine.query("can you explain proof of work and proof of stake")
print(res)


Proof of Work (PoW) is a consensus algorithm used by blockchain networks to validate transactions and create new blocks. It is an open challenge to all miners to solve a complex mathematical puzzle. The miner who solves the puzzle first is rewarded with a fixed amount of coins. PoW requires a lot of computational power and is used by Bitcoin and other cryptocurrencies.

Proof of Stake (PoS) is an alternative consensus algorithm to PoW. In PoS, the validators are chosen based on the fraction of coins they own in the system. The nodes with more number of coins have more chance to be selected than the node with lesser number of coins. In PoS the reward is in the form of transaction fee, new coins are not created for paying the validators. Presently, Blackcoin, NXT and Peercoin blockchains uses the PoS algorithm. Ethereum is also planning to shift to this method by 2018.


#Web based QA Bot with Large Language Models

In [1]:
!pip install --quiet langchain cohere faiss-cpu  loguru unstructured sentence_transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m45.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.0/60.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m48.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.0/90.0 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.8/101.8 kB[0m [31m3.4 MB/s[0m eta

In [3]:
# Import necessary libraries
from typing import Optional
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import UnstructuredURLLoader
from langchain.llms import Cohere
from langchain.chains import RetrievalQAWithSourcesChain
import requests
from loguru import logger
import pickle
import os


class WebsiteQABot:
    def __init__(
        self,
        urls: list,
        chunk_size: int,
        chunk_overlap: int,
    ):
        # Initialize the bot and start the logging
        logger.info("Building the QA Bot ...")

        # Load the URLs content
        logger.info("Loading URLs content ...")
        loader = UnstructuredURLLoader(urls)
        data = loader.load()

        # Split the documents into chunks of a specific size
        logger.info("Splitting documents in chunks ...")
        doc_splitter = CharacterTextSplitter(
            chunk_size=chunk_size, chunk_overlap=chunk_overlap
        )
        docs = doc_splitter.split_documents(data)
        logger.info("{n} chunks created", n=len(docs))

        # Build the vector database using the HuggingFaceInstructEmbeddings
        logger.info("Building the vector database ...")
        embeddings = HuggingFaceEmbeddings()
        docsearch = FAISS.from_documents(docs, embeddings)

        # Save the database to a pickle file
        with open("faiss_store_instruct.pkl", "wb") as f:
            pickle.dump(docsearch, f)

        # Create a retriever that retrieves the top 3 most relevant documents
        retriever = docsearch.as_retriever(search_kwargs={"k": 3})

        # Build the retrieval chain using the Cohere model
        logger.info("Building the retrieval chain ...")
        os.environ["COHERE_API_KEY"] = ""
        llm = Cohere(model="command-nightly", temperature=0)
        self.chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=retriever)

        logger.info("QA Bot created!")

    # Function to ask a query to the bot
    def ask(self, query: str):
        return self.chain({"question": query}, return_only_outputs=False)

if __name__ == "__main__":
    # Define the URLs from where the data will be loaded
    urls = [
        "https://blog.monsterapi.ai/what-is-falcon-7b-instruct-a-better-alternative-to-gpt-3/"
    ]
    # Create the bot
    bot = WebsiteQABot(
        urls=urls,
        chunk_size=2000,
        chunk_overlap=100,
    )

    # Ask a query to the bot
    query ="why Is Falcon-7B Instruct a better alternative to GPT-3?"
    res = bot.ask(query)
    print(res)


[32m2023-07-21 11:31:06.944[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m23[0m - [1mBuilding the QA Bot ...[0m
[32m2023-07-21 11:31:06.946[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m26[0m - [1mLoading URLs content ...[0m
[32m2023-07-21 11:31:07.594[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m31[0m - [1mSplitting documents in chunks ...[0m
[32m2023-07-21 11:31:07.597[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m36[0m - [1m4 chunks created[0m
[32m2023-07-21 11:31:07.602[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m39[0m - [1mBuilding the vector database ...[0m
[32m2023-07-21 11:31:18.374[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m51[0m - [1mBuilding the retrieval chain ...[0m
[32m2023-07-21 11:31:18.377[0m | [1mINFO    [0m | [36m__main__[0m:[36m__init__[0m:[36m56[0m - [1mQA Bot created![0m
Token indices sequence length is l

{'question': 'why Is Falcon-7B Instruct a better alternative to GPT-3?', 'answer': ' Falcon-7B Instruct has been fine-tuned on instructions and conversational data, making it more suitable for popular assistant-style tasks. It requires less GPU memory, making it more accessible on consumer hardware. It offers a unique combination of affordability and performance, making it a compelling alternative to GPT-3.', 'sources': ''}
