# Question Answering

## PDF reader

https://archive.org/details/crossingthechasm_202002

In [None]:
%pip install langchain openai chromadb tiktoken pypdf llama-index tqdm

In [1]:
import os
import tomli
import openai
with open('../.streamlit/secrets.toml','rb') as f:
    toml_dict = tomli.load(f)
openai.api_key = toml_dict['OPEN_AI_KEY']
os.environ['OPENAI_API_KEY'] = toml_dict['OPEN_AI_KEY']
# os.environ['PINECONE_API_KEY'] = toml_dict['PINECONE_API_KEY']
# os.environ['PINECONE_API_ENV'] = toml_dict['PINECONE_API_ENV']
os.environ['aws_access_key_id'] = toml_dict['aws']['aws_access_key_id']
os.environ['aws_secret_access_key'] = toml_dict['aws']['aws_secret_access_key']

### Document loaders

In [2]:
# load document with langchain
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("../book/Crossing the Chasm.pdf")
documents = loader.load()

In [2]:
# load single PDF
from llama_index import download_loader
PDFReader = download_loader("PDFReader")
loader = PDFReader()
documents = loader.load_data("../book/Crossing the Chasm.pdf")

In [None]:
# break down into 1 pdf per page

In [10]:
# load folder
from llama_index import SimpleDirectoryReader
documents = SimpleDirectoryReader('../book/').load_data()

In [None]:
# Failing: load from S3
from llama_index import download_loader
S3Reader = download_loader("S3Reader")
s3_bucket = 'book48'
object_name = 'book/Crossing the Chasm.pdf'
loader = S3Reader(bucket=s3_bucket, key=object_name, 
                  aws_access_id = os.environ['aws_access_key_id'],
                    aws_access_secret = os.environ['aws_secret_access_key'])
documents = loader.load_data()

### Connect to S3

In [5]:
import boto3
s3_client = boto3.client('s3',aws_access_key_id = os.environ['aws_access_key_id'],
                    aws_secret_access_key = os.environ['aws_secret_access_key'])
s3_bucket = 'book48'
object_name = 'index/index-full.json'
file_name = 'index-full.json'
s3_client.download_file(s3_bucket, object_name,file_name)

### Create index

In [42]:
from llama_index import LLMPredictor, ServiceContext
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
chat = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
llm_predictor = LLMPredictor(llm=chat)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)



In [49]:
res = chat([HumanMessage(content='what is Vendor-Oriented Pricing?')])
res.dict()['content']

'Vendor-oriented pricing refers to a pricing strategy in which a vendor sets the price of their products or services based on their own costs, profit margins, and market position. This strategy focuses on maximizing profits for the vendor and may not account for competitive pricing or customer demand. It is also known as cost-plus pricing, as the vendor adds a markup to their costs to determine the final price. This approach may be suitable for certain industries or products, but it can also lead to pricing that is not competitive or appealing to customers.'

In [30]:
from llama_index import GPTSimpleVectorIndex

# load from disk
index = GPTSimpleVectorIndex.load_from_disk('../index/index-full.json',service_context=service_context)

In [None]:
from llama_index import GPTSimpleVectorIndex

index = GPTSimpleVectorIndex.from_documents(documents,service_context=service_context)

In [None]:
# save to disk
index.save_to_disk('../index/index.json')
# load from disk
index = GPTSimpleVectorIndex.load_from_disk('../index/index.json')

In [5]:
index.service_context.llm_predictor.llm

ChatOpenAI(verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x000002AC5594C3A0>, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo', temperature=0.0, model_kwargs={}, openai_api_key=None, request_timeout=60, max_retries=6, streaming=False, n=1, max_tokens=None)

### Query index

In [31]:
query = 'what is Vendor-Oriented Pricing?'
response = index.query(query)

INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4248 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 9 tokens


In [36]:
response.response

'The context provided is not relevant to the original question of defining vendor-oriented pricing. Therefore, the original answer remains the same.'

### Add follow-up questions

In [51]:
follow_up = f'''
Provide a list of of 3 follow-up questions the initial query and the result associated. 
Here is the initial query:
{query}
Here is the result associated:
{response.response}
Format the output as a JSON file with a list of the 3 follow-up questions in a field called follow-up
'''
res = chat([HumanMessage(content=follow_up)])
jason = res.dict()['content']

In [55]:
jason

'{\n   "follow-up": [\n      "Can you provide a definition of vendor-oriented pricing?",\n      "Are there any examples of companies that use vendor-oriented pricing?",\n      "What are the benefits and drawbacks of using vendor-oriented pricing?"\n   ]\n}'

In [53]:
import json
json.loads(jason)

{'follow-up': ['Can you provide a definition of vendor-oriented pricing?',
  'Are there any examples of companies that use vendor-oriented pricing?',
  'What are the benefits and drawbacks of using vendor-oriented pricing?']}

### Other follow-up methods

In [20]:
# Try changing the default qa prompt (unsuccessful)
from llama_index import QuestionAnswerPrompt
query = "what is Vendor-Oriented Pricing?"
QA_PROMPT_TMPL = (
    "We have provided context information below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given this information, please answer the question: {query_str}\n"
    "Format the output as a JSON file, with:\n"
    "- the result of the query as the main field, called result\n"
    "- a list of 3 follow-up questions relative to the initial query and the result associated, in a field called follow-up\n"
    "Give only the JSON format as output\n"
    "Do not mention if the provided context is not directly relevant to the initial query\n"

)
QA_PROMPT = QuestionAnswerPrompt(QA_PROMPT_TMPL)
response = index.query(query, text_qa_template=QA_PROMPT)
# print(response)

INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4473 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 9 tokens


In [21]:
print(response)

The provided context is not related to the original question about Vendor-Oriented Pricing, so the original answer will stand:

"Vendor-Oriented Pricing is a function of internal issues, beginning with cost of goods and extending to cost of sales, cost of overhead, cost of capital, promised rate of risk-adjusted return, and any number of other factors. These factors are critical to being able to manage an enterprise profitably on an ongoing basis. None of these, however, has any immediate meaning in the marketplace. They take on meaning only as they impact other market-visible issues."


In [26]:
# Try adding the additional prompt together with the query to the index (unsuccessful)
query = "what is Vendor-Oriented Pricing?"
follow_up = '''
Format the output as a JSON file, with:
- the result of the query as the main field, called result
- a list of 3 follow-up questions relative to the initial query and the result associated, in a field called follow-up
Give only the JSON format as output
'''
response = index.query(query+follow_up)

INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4534 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 70 tokens


In [27]:
print(response)

Original answer:
{
  "result": "Vendor-oriented pricing is a function of internal issues, such as cost of goods, and extending to cost of sales, cost of overhead, cost of capital, promised rate of risk-adjusted return, and any number of other factors. These factors are critical to being able to manage an enterprise profitably on an ongoing basis. None of these, however, has any immediate meaning in the marketplace. They take on meaning only as they impact other market-visible issues.",
  "follow-up": [
    "What are some of the factors that impact vendor-oriented pricing?",
    "How do these factors impact the marketplace?",
    "What is the impact of vendor-oriented pricing on the number of transactions required to create a given amount of annual revenue?"
  ]
}

Refined answer based on new context:
The provided context doesn't seem to be related to the topic of Vendor-Oriented Pricing, so the original answer is still valid.


### Get sources

In [32]:
response.get_formatted_sources()

'> Source (Doc id: ae730396-8d19-4765-8042-34524fcfea0b): decisions are among the hardest for management groups\nto reach consensus on. The problem is that ...'

In [38]:
response.source_nodes[0].node.get_text()

'decisions are among the hardest for management groups\nto reach consensus on. The problem is that there are so many\nperspectives competing for the controlling influence. In this sec\xad\ntion we are going to sort out some of those perspectives and set\nout some rational guidelines for pricing during the chasm period.\nCustomer-Oriented Pricing\nThe first perspective to set on pricing is the customers’, and, as\nwe noted in the section on discovering the chasm, that varies\ndramatically with their psychographics. Visionaries—the cus\xad\ntomers dominating the early market’s development—are rela\xad\ntively price-insensitive. Seeking a strategic leap forward, with\nan order-of-magnitude return on investment, they are con\xad\nvinced that any immediate costs are insignificant when com\xad\npared with the end result. Indeed, they want to make sure there\nis, if anything, extra money in the price, because they know they\nare going to need special service, and they want their vendors to\nh

## Storing Embeddinds

### Create Embeddings manually


In [None]:
embed_model = "text-embedding-ada-002"

res = openai.Embedding.create(
    input=[
        "Sample document text goes here",
        "there will be several phrases in each batch"
    ], engine=embed_model
)

### Create Pinecone DB

In [None]:
import pinecone 

pinecone.init(api_key=os.environ['PINECONE_API_KEY'], environment=os.environ['PINECONE_API_ENV']) 
# pinecone.list_indexes()
index = pinecone.Index('crossing-the-chasm') 

## QA

### load_qa_chain

In [16]:
# load document
loader = PyPDFLoader("book/Crossing the Chasm-202-217.pdf")
documents = loader.load()


In [14]:
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI
chat = ChatOpenAI(model_name='gpt-3.5-turbo')

chain = load_qa_chain(llm=chat, chain_type="map_reduce")
query = "what is Vendor-Oriented Pricing?"
chain.run(input_documents=documents, question=query)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=60).


'The given portion of the document does not provide a clear definition of Vendor-Oriented Pricing. However, it mentions that "vendor-oriented pricing represents the least sound basis for pricing decisions during the chasm period."'

### RetrievalQA

In [7]:
from langchain.chains import RetrievalQA
from langchain.indexes import VectorstoreIndexCreator
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

In [8]:
# split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
# select which embeddings we want to use
embeddings = OpenAIEmbeddings()
# create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings)

Using embedded DuckDB without persistence: data will be transient


In [10]:
# expose this index in a retriever interface
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":3})
# create a chain to answer questions 
qa = RetrievalQA.from_chain_type(
    llm=chat, chain_type="stuff", retriever=retriever, return_source_documents=True)
query = "what is Vendor-Oriented Pricing?"
result = qa({"query": query})

In [11]:
result

{'query': 'what is Vendor-Oriented Pricing?',
 'result': 'Vendor-oriented pricing is a pricing strategy that is based on internal factors such as the cost of goods, cost of sales, cost of overhead, cost of capital, promised rate of risk-adjusted return, and any number of other factors critical to managing an enterprise profitably on an ongoing basis. Its impact is on the number of transactions required to create a given amount of annual revenue. It sets the distribution channel decision by establishing a price-point ballpark that puts the product in the direct sales, web self-service, or sales 2.0 camp. The pricing strategy is not the most sound basis for pricing decisions during the chasm period, as it requires being almost entirely externally focused- both on the new demands of the mainstream customer and the new relationship you are trying to build with a mainstream channel.',
 'source_documents': [Document(page_content='208 Crossing the Chasm\nVendor -Oriented Pricing\nVendor-orien

In [12]:
result['result']

'Vendor-oriented pricing is a pricing strategy that is based on internal factors such as the cost of goods, cost of sales, cost of overhead, cost of capital, promised rate of risk-adjusted return, and any number of other factors critical to managing an enterprise profitably on an ongoing basis. Its impact is on the number of transactions required to create a given amount of annual revenue. It sets the distribution channel decision by establishing a price-point ballpark that puts the product in the direct sales, web self-service, or sales 2.0 camp. The pricing strategy is not the most sound basis for pricing decisions during the chasm period, as it requires being almost entirely externally focused- both on the new demands of the mainstream customer and the new relationship you are trying to build with a mainstream channel.'

### VectorstoreIndexCreator

Wrapper for the logic above

Source:

https://python.langchain.com/en/latest/modules/chains/getting_started.html
https://github.com/hwchase17/langchain/blob/master/langchain/indexes/vectorstore.py#L21-L74

In [None]:
index = VectorstoreIndexCreator(
    # split the documents into chunks
    text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0),
    # select which embeddings we want to use
    embedding=OpenAIEmbeddings(),
    # use Chroma as the vectorestore to index and search embeddings
    vectorstore_cls=Chroma
).from_loaders([loader])
index.query(llm=chat, question=query, chain_type="stuff")

### ConversationalRetrievalChain

conversation memory + RetrievalQAChain

Allow for passing in chat history which can be used for follow up questions.

Source: https://python.langchain.com/en/latest/modules/chains/index_examples/chat_vector_db.html

In [None]:
from langchain.chains import ConversationalRetrievalChain

In [None]:
# split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
# select which embeddings we want to use
embeddings = OpenAIEmbeddings()
# create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings)
# expose this index in a retriever interface
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2})
# create a chain to answer questions 
qa = ConversationalRetrievalChain.from_llm(chat, retriever)
chat_history = []
result = qa({"question": query, "chat_history": chat_history})

In [None]:
result["answer"]

In [None]:
chat_history = [(query, result["answer"])]
query = "How does it differ from Distribution-Oriented Pricing?"
result = qa({"question": query, "chat_history": chat_history})


In [None]:
chat_history

In [None]:
result['answer']