In [18]:
# Import necessary libraries
import openai
import tiktoken
import dotenv
import os
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import AzureOpenAI

In [80]:

# Configure OpenAI API
# os.environ.get("AZURE_OPENAI_ENDPOINT")
AZURE_MODEL_NAME = os.environ.get("AZURE_MODEL_NAME")
AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_KEY = os.environ.get("AZURE_OPENAI_KEY")
AZURE_OPENAI_API_VERSION = os.environ.get("AZURE_OPENAI_API_VERSION")
AZURE_OPENAI_TEXT_DEPLOYMENT = os.environ.get("AZURE_OPENAI_TEXT_DEPLOYMENT")
AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.environ.get("AZURE_OPENAI_CHATGPT_DEPLOYMENT")
AZURE_EMBEDDING_MODEL_NAME = os.environ.get("AZURE_EMBEDDING_MODEL_NAME")
openai.api_type = "azure"
openai_api_type = 'azure'
openai.api_version = AZURE_OPENAI_API_VERSION
openai.api_base = AZURE_OPENAI_ENDPOINT
openai.api_key = AZURE_OPENAI_KEY






In [20]:
# Configure the location of the PDF file.
pdfReader = PdfReader('../data/LLM-AI-Agents.pdf')



In [21]:
# Extract the text from the PDF file.
raw_text = ''
for i, page in enumerate(pdfReader.pages):
    text = page.extract_text()
    if text:
        raw_text += text

In [22]:
# Show first 1000 characters of the text.
raw_text[:1000]

'TPTU: Task Planning and Tool Usage of\nLarge Language Model-based AI Agents\nJingqing Ruan†‡\nruanjingqing@sensetime.comYihong Chen†‡\nchenyihong@sensetime.comBin Zhang†‡\nzhangbin11@sensetime.com\nZhiwei Xu†‡\nxuzhiwei@sensetime.comTianpeng Bao†\nbaotianpeng@sensetime.comGuoqing Du†\nduguoqing@sensetime.com\nShiwei Shi†\nshishiwei@sensetime.comHangyu Mao†∗\nmaohangyu@sensetime.comXingyu Zeng\nzengxingyu@sensetime.com\nRui Zhao\nzhaorui@sensetime.com\nSenseTime Research, China\nAbstract\nWith recent advancements in natural language processing, Large Language Models\n(LLMs) have emerged as powerful tools for various real-world applications. Despite\ntheir prowess, the intrinsic generative abilities of LLMs may prove insufficient\nfor handling complex tasks which necessitate a combination of task planning and\nthe usage of external tools. In this paper, we first propose a structured framework\ntailored for LLM-based AI Agents and discuss the crucial capabilities necessary for\ntackling 

In [23]:
# Split the text into chunks of 1000 characters with 200 characters overlap.
text_splitter = CharacterTextSplitter(        
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,
)
pdfTexts = text_splitter.split_text(raw_text)


In [24]:
# Show how many chunks of text are generated.
len(pdfTexts)

98

In [81]:
# Pass the text chunks to the Embedding Model from Azure OpenAI API to generate embeddings.
embeddings = OpenAIEmbeddings(
    openai_api_key = AZURE_OPENAI_KEY, 
    deployment = AZURE_OPENAI_TEXT_DEPLOYMENT, 
    model = AZURE_EMBEDDING_MODEL_NAME,
    client="azure",
    chunk_size=1
)



In [82]:
# Pass the text chunks to the Embedding Model from Azure OpenAI API to generate embeddings.
embeddings = OpenAIEmbeddings(
    openai_api_base= AZURE_OPENAI_ENDPOINT,
    openai_api_type='azure',
    deployment='text-embedding-ada-002',
    openai_api_key=AZURE_OPENAI_KEY,
    chunk_size=1,
)


In [83]:
# Use FAISS to index the embeddings. This will allow us to perform a similarity search on the texts using the embeddings.
# https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/faiss.html
pdfDocSearch = FAISS.from_texts(pdfTexts, embeddings)

In [77]:
# Use FAISS to index the embeddings. This will allow us to perform a similarity search on the texts using the embeddings.
# https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/faiss.html
pdfDocSearch = FAISS.from_texts(pdfTexts, embeddings)

In [84]:
# Create a Question Answering chain using the embeddings and the similarity search.
# https://docs.langchain.com/docs/components/chains/index_related_chains
chain = load_qa_chain(AzureOpenAI(openai_api_key=AZURE_OPENAI_KEY, deployment_name=AZURE_OPENAI_TEXT_DEPLOYMENT, model_name=AZURE_MODEL_NAME, openai_api_version=AZURE_OPENAI_API_VERSION), chain_type="stuff")

print(chain)

memory=None callbacks=None callback_manager=None verbose=False tags=None metadata=None input_key='input_documents' output_key='output_text' llm_chain=LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=PromptTemplate(input_variables=['context', 'question'], output_parser=None, partial_variables={}, template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:", template_format='f-string', validate_template=True), llm=AzureOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.completion.Completion'>, model_name='text-davinci-003', temperature=0.7, max_tokens=256, top_p=1, frequency_penalty=0, presence_penalty=0, n=1, best_of=1, model_kwargs={}, openai_api_key='815e9449a52847f7acbff5391ed6e

In [85]:
# Perform first sample of question answering.
inquiry = "Please tell me the key summary of this book."
docs = pdfDocSearch.similarity_search(inquiry)
chain.run(input_documents=docs, question=inquiry)

' This book discusses the use of techniques such as chain-of-thought and vector databases to endow AI Agents with the abilities of summarization, task planning, tool usage, perception, learning/reflection/memory, and summarization. The authors also explore phenomena such as LLMs difficulty in understanding output formats.'