Install Necessary Libarary

In [1]:
#%pip install --quiet --upgrade ipywidgets langchain langchain_openai

```mermaid
graph LR
A[Document] --> |Split| B[Chunks]
    B --> |Create Embedding| C[Vector Store]
D[User] --> E(Query)
    E --> |Create Embedding| F(Similarity Search)
    F --> C
    E --> G[LLMS]
    C --> G
    
```

Import OpenAI API Key

In [2]:
import os
f = open('openaikey.txt')
os.environ["OPENAI_API_KEY"]=f.read()

Import Document and Split into Chuncks

In [3]:
from langchain_community.document_loaders.pdf import PyPDFLoader
pyPDFLoader = PyPDFLoader("resources/DriversGuidefinal2014.pdf")
data = pyPDFLoader.load()

Split using Text Splitter

In [4]:
from langchain_text_splitters import PythonCodeTextSplitter
textSplitter = PythonCodeTextSplitter(chunk_size=1000,chunk_overlap=300)
pages= pyPDFLoader.load_and_split(text_splitter=textSplitter)

Print Relevant information

In [5]:
print(f"Total Documents Length before text split {len(data)}")
print(f"Total Documents after the text split {len(pages)}")


Total Documents Length before text split 122
Total Documents after the text split 304


Initiate Embedding

In [6]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
embedding = OpenAIEmbeddings()
db = FAISS.from_documents(documents=pages,embedding=embedding)
print(db.index.ntotal)

304


Try executing  a query

In [7]:
query = "What is this document about ? "
result = db.similarity_search(query=query)


Explore results

In [8]:
print(result[2])

page_content='to Operation, Safety  
and Licensing
CARS & LIGHT TRUCKSDRIVER’S  
GUIDE' metadata={'source': 'resources/DriversGuidefinal2014.pdf', 'page': 1}


Create a Chat bot

In [9]:
from langchain.chains.question_answering.chain import load_qa_chain
from langchain.chains import ConversationalRetrievalChain
from langchain_openai import ChatOpenAI

openAI = ChatOpenAI()

chain = load_qa_chain(openAI,chain_type="stuff")
query = "I am from India can I exchange my Indian licence to get Alberta license ?"
docs = db.similarity_search(query)
response = chain.invoke(input={"input_documents":docs, "question":query})

Explore result

In [10]:
print(response)

{'input_documents': [Document(metadata={'source': 'resources/DriversGuidefinal2014.pdf', 'page': 17}, page_content='NOTE: Japanese licence exchanges \nmust be completed at an approved \nregistry agent office . Parental consent \nfor applicants under 18 years of age \nis required.  All other licence classes require testing \nand vision screening. The applicants must disclose all medical conditions and \nphysical disabilities that may affect their \nability to safely operate a motor vehicle. \nAll applicants must produce immigration \nand residency documentation along \nwith acceptable identification.\nCountries without reciprocal \nlicensing agreements\nA person coming to Alberta from outside \nCanada may apply to have their previous \ndriving history credited to their Alberta \ndriving record. A successful application \nwill grant exemption from the Graduated \nDriver Licensing (GDL) program and allow \nfor Class 5 road testing without being \nrequired to hold a Class 7 driver’s licenc

Create a Chat Component

In [11]:
from IPython.display import display
import ipywidgets as widgets

qa = ConversationalRetrievalChain.from_llm(openAI, db.as_retriever())

chat_history = []

def on_sumbit(_):
    query = input_box.value
    input_box.value=""
    print(chat_history)
    if query.lower() == 'exit':
        print("Thank you for chatting with me")
        return
    
    result = qa.invoke(input={"question":query,"chat_history":chat_history})
    chat_history.append((query, result['answer']))
   
    display(widgets.HTML(f"Number of matches found : {len(result)}"))
    display(widgets.HTML(f"<b>User Query</b> : {query}"))
    display(widgets.HTML(f'<b><font color="blue">Chat Bot :</font></b> {result["answer"]}'))

print("Welcome to the chatbot")
input_box = widgets.Text(placeholder="Enter the Question")
input_box.on_submit(on_sumbit)

display(input_box)

Welcome to the chatbot


  input_box.on_submit(on_sumbit)


Text(value='', placeholder='Enter the Question')