!pip install pypdf2
!pip install faiss-cpu
!pip install tiktoken

In [1]:
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter

In [112]:
from langchain.vectorstores import FAISS

In [3]:
pdfreader = PdfReader("Accenture-Tech-Vision-2024.pdf")

In [111]:
# from typing_extensions import Concatenate

In [13]:
raw_text=''

for i, page in enumerate(pdfreader.pages):
    content = page.extract_text()
    if content:
        raw_text +=content

In [14]:
raw_text



In [31]:
text_splitter = CharacterTextSplitter(
    separator = "",
    chunk_size = 256,
    length_function = len    
)

In [32]:
texts = text_splitter.split_text(raw_text)

In [33]:
len(texts)

4011

In [91]:
from langchain.embeddings import HuggingFaceEmbeddings

In [93]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"

embeddings = HuggingFaceEmbeddings(model_name=model_name)

In [97]:
# storing embeddings in the vector store
vectorstore = FAISS.from_documents(text_splitter.create_documents(texts), embeddings)

In [98]:
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x26d466234c0>

In [99]:
from langchain.chains.question_answering import load_qa_chain
from langchain_community.llms import Ollama

In [109]:
chain = load_qa_chain(Ollama(model="llama2"), chain_type = "stuff")

In [101]:
query = "what are the key areas of research?"

In [102]:
docs = vectorstore.similarity_search(query)

In [103]:
docs

[Document(page_content='research techniques, such as data-science-led analysis, with a deep understanding of industry and technology, our team of 300 researchers in 20 countries publish hundreds of reports, articles and points of view every year. Our thought-provoking research d'),
 Document(page_content='e, India. For more information,  visit Accenture Labs on www.accenture.com .\nAccenture Research  \nAccenture Research creates thought leadership about the most pressing business issues organizations face. Combining innovative research techniques, such as da'),
 Document(page_content='ntire tracts of scientific research by looking \nfor information on the web, consulting scientific documents, and using scientific \nequipment in a cloud lab.35 Google’s PaLM-E \ncan take a command in natural language, \nbreak it down into a series of subtasks'),
 Document(page_content='www.accenture.com .\nAccenture Research  \nAccenture Research creates thought leadership about the most pressing busine

In [110]:
chain.invoke({"input_documents": docs, "question":query})

{'input_documents': [Document(page_content='research techniques, such as data-science-led analysis, with a deep understanding of industry and technology, our team of 300 researchers in 20 countries publish hundreds of reports, articles and points of view every year. Our thought-provoking research d'),
  Document(page_content='e, India. For more information,  visit Accenture Labs on www.accenture.com .\nAccenture Research  \nAccenture Research creates thought leadership about the most pressing business issues organizations face. Combining innovative research techniques, such as da'),
  Document(page_content='ntire tracts of scientific research by looking \nfor information on the web, consulting scientific documents, and using scientific \nequipment in a cloud lab.35 Google’s PaLM-E \ncan take a command in natural language, \nbreak it down into a series of subtasks'),
  Document(page_content='www.accenture.com .\nAccenture Research  \nAccenture Research creates thought leadership about t

In [116]:
vectorstore.save_local(folder_path = "", index_name = "Acc_TechVision_FiassIndex")

In [118]:
vectorstore_test = FAISS.load_local(folder_path = "", index_name = "Acc_TechVision_FiassIndex",embeddings = embeddings)

In [119]:
vectorstore_test

<langchain_community.vectorstores.faiss.FAISS at 0x26d4476f910>

In [120]:
vectorstore_test.similarity_search("what are the key areas of research?")

[Document(page_content='research techniques, such as data-science-led analysis, with a deep understanding of industry and technology, our team of 300 researchers in 20 countries publish hundreds of reports, articles and points of view every year. Our thought-provoking research d'),
 Document(page_content='e, India. For more information,  visit Accenture Labs on www.accenture.com .\nAccenture Research  \nAccenture Research creates thought leadership about the most pressing business issues organizations face. Combining innovative research techniques, such as da'),
 Document(page_content='ntire tracts of scientific research by looking \nfor information on the web, consulting scientific documents, and using scientific \nequipment in a cloud lab.35 Google’s PaLM-E \ncan take a command in natural language, \nbreak it down into a series of subtasks'),
 Document(page_content='www.accenture.com .\nAccenture Research  \nAccenture Research creates thought leadership about the most pressing busine

In [1]:
import gradio as gr
from langchain_community.llms import Ollama
from langchain.chains.question_answering import load_qa_chain
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [2]:
llm = Ollama(model="llama2")
model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
vectorstore = FAISS.load_local(folder_path = "", index_name = "Acc_TechVision_FiassIndex",embeddings = embeddings)
chain = load_qa_chain(llm, chain_type="stuff")

In [3]:
def InvokeQnAmodel(query,history):
    docs = vectorstore.similarity_search(query)
    return chain.invoke({"input_documents": docs, "question": query})['output_text']

In [4]:
t = gr.ChatInterface(fn=InvokeQnAmodel,title="Accenture-Tech-Vision-2024 helper Bot",retry_btn=None,undo_btn=None,clear_btn=None)
t.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [1]:
%run DocumentChatBot.py

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.
