In [12]:
import openai
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import AzureOpenAI
from langchain.document_loaders import UnstructuredFileLoader

In [13]:
# Configure the baseline configuration of the OpenAI library for Azure OpenAI Service.
OPENAI_API_KEY = "PLEASE_ENTER_YOUR_OWNED_AOAI_SERVICE_KEY"
OPENAI_DEPLOYMENT_NAME = "PLEASE_ENTER_YOUR_OWNED_AOAI_TEXT_MODEL_NAME"
OPENAI_EMBEDDING_MODEL_NAME = "PLEASE_ENTER_YOUR_OWNED_AOAI_EMBEDDING_MODEL_NAME"
MODEL_NAME = "text-davinci-003"
OPENAI_API_VERSION = "2022-12-01"
openai.api_type = "azure"
openai.api_base = "https://PLESAE_ENTER_YOUR_OWNED_AOAI_RESOURCE_NAME.openai.azure.com/"
openai.api_version = "2022-12-01"
openai.api_key = "PLEASE_ENTER_YOUR_OWNED_AOAI_SERVICE_KEY"

In [14]:
# Open the text file and read the text.
text_file = open("./data/how_AI_could_empower_any_business.txt", "r")
raw_text = text_file.read()

In [15]:
# Split the text into chunks of 1000 characters with 200 characters overlap.
text_splitter = CharacterTextSplitter(        
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,
)
textTexts = text_splitter.split_text(raw_text)

Created a chunk of size 1016, which is longer than the specified 1000
Created a chunk of size 1456, which is longer than the specified 1000


In [16]:
# Show how many chunks of text are generated.
len(textTexts)

13

In [21]:
# Pass the text chunks to the Embedding Model from Azure OpenAI API to generate embeddings.
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY, deployment=OPENAI_EMBEDDING_MODEL_NAME, client="azure", chunk_size=1)

In [22]:
# Use FAISS to index the embeddings. This will allow us to perform a similarity search on the texts using the embeddings.
# https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/faiss.html
pdfDocSearch = FAISS.from_texts(textTexts, embeddings)

In [23]:
# Create a Question Answering chain using the embeddings and the similarity search.
# https://docs.langchain.com/docs/components/chains/index_related_chains
chain = load_qa_chain(AzureOpenAI(openai_api_key=OPENAI_API_KEY, deployment_name=OPENAI_DEPLOYMENT_NAME, model_name=MODEL_NAME), chain_type="stuff")

In [26]:
# Perform first sample of question answering.
inquiry = "What is this file talking about?"
docs = pdfDocSearch.similarity_search(inquiry)
chain.run(input_documents=docs, question=inquiry)

' This file is talking about how artificial intelligence (AI) is becoming accessible to more people and how it is similar to the rise of literacy in society.'

In [27]:
# Perform second sample of question answering.
inquiry = "How AI cound empower any business?"
docs = pdfDocSearch.similarity_search(inquiry)
chain.run(input_documents=docs, question=inquiry)

' AI can help spot patterns and suggest improvements for businesses. For example, an AI system could spot if Mediterranean pizzas sell really well on a Friday night and suggest to the pizzeria owner to make more of it on a Friday afternoon. Additionally, AI development platforms are emerging that shift the focus from asking users to write lots of code to instead asking them to focus on providing data, which is much easier for many people to do.'

In [28]:
# Perform third sample of question answering.
inquiry = "Can you give me an example of how AI empowers the business?"
docs = pdfDocSearch.similarity_search(inquiry)
chain.run(input_documents=docs, question=inquiry)

' Yes, an example of how AI empowers businesses is that an AI system could spot patterns in data that a pizza store owner has to figure out what types of pizzas will sell best on certain days. This could help them make more of the pizzas that people are likely to purchase, increasing their revenues.'

In [31]:
# Perform forth sample of question answering.
inquiry = "Please help to summarize this file into 300 words."
docs = pdfDocSearch.similarity_search(inquiry)
chain.run(input_documents=docs, question=inquiry)

' AI is an emerging technology that has the potential to create wealth and spread it across society. To take advantage of this technology, it has been necessary to program lots of code, something that not everyone has the time to do. However, new development platforms are making it easier for people to access AI without the need to code. An example of this is a pizza store that is generating data but could use AI to improve revenues. AI is good at spotting patterns in data and suggest changes that could make a big difference to small business owners. This shift in how AI can be used will democratize access to the technology and will have a huge impact on society.'