# Compare Documents
# SubQuestionQueryEngine

In [1]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().handlers = []
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [2]:
import os 
from dotenv import load_dotenv, find_dotenv

In [3]:
load_dotenv('/home/santhosh/Projects/courses/Pinnacle/.env')

True

In [4]:
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']

In [5]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core import set_global_service_context
from llama_index.core.response.pprint_utils import pprint_response
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine

# Load uber and lyft documents

In [7]:
lyft_docs = SimpleDirectoryReader(input_files=["lyft_2021.pdf"]).load_data()
uber_docs = SimpleDirectoryReader(input_files=["uber_2021.pdf"]).load_data()

In [8]:
print(f'Loaded lyft 10-K with {len(lyft_docs)} pages')
print(f'Loaded Uber 10-K with {len(uber_docs)} pages')

Loaded lyft 10-K with 238 pages
Loaded Uber 10-K with 307 pages


# Build indices

In [9]:
lyft_index = VectorStoreIndex.from_documents(lyft_docs)
uber_index = VectorStoreIndex.from_documents(uber_docs)

HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 520 "
Retrying request to /embeddings in 0.927908 seconds
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


# Basic QA

In [10]:
lyft_engine = lyft_index.as_query_engine(similarity_top_k=3)


In [11]:
uber_engine = uber_index.as_query_engine(similarity_top_k=3)


In [12]:
response = await lyft_engine.aquery('What is the revenue of Lyft in 2021? Answer in millions with page reference')


HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [13]:
print(response)

The revenue of Lyft in 2021 was $3.208 billion. (Page reference: 79)


In [14]:
response = await uber_engine.aquery('What is the revenue of Uber in 2021? Answer in millions, with page reference')


HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [15]:
print(response)

The revenue of Uber in 2021 was $17,455 million. (Page reference: 77)


# For comparing between uber and lyft

In [16]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=lyft_engine,
        metadata=ToolMetadata(name='lyft_10k', description='Provides information about Lyft financials for year 2021')
    ),
    QueryEngineTool(
        query_engine=uber_engine,
        metadata=ToolMetadata(name='uber_10k', description='Provides information about Uber financials for year 2021')
    ),
]

s_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=query_engine_tools)

In [17]:
response = await s_engine.aquery('Compare and contrast the customer segments and geographies that grew the fastest')

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Generated 4 sub questions.
[1;3;38;2;237;90;200m[lyft_10k] Q: What were the customer segments that grew the fastest for Lyft in 2021?
[0m[1;3;38;2;90;149;237m[lyft_10k] Q: What were the geographies that grew the fastest for Lyft in 2021?
[0m[1;3;38;2;11;159;203m[uber_10k] Q: What were the customer segments that grew the fastest for Uber in 2021?
[0m[1;3;38;2;155;135;227m[uber_10k] Q: What were the geographies that grew the fastest for Uber in 2021?
[0mHTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;2;237;90;200m[lyft_10k] A: The customer segments that grew the fastest 

In [18]:
print(response)

The customer segments that grew the fastest for Lyft in 2021 were related to their network of Light Vehicles, bike, and scooter sharing services. On the other hand, Uber experienced the fastest growth in customer segments through membership programs like Uber One, Uber Pass, Eats Pass, and Rides Pass.

In terms of geographies, Lyft saw the fastest growth in areas where vaccines were widely distributed and communities fully reopened, resulting in a significant revenue increase. Uber, on the other hand, experienced the fastest growth in cities like Chicago, Miami, New York City in the United States, Sao Paulo in Brazil, and London in the United Kingdom.


In [19]:
response = await s_engine.aquery('Compare revenue growth of Uber and Lyft from 2020 to 2021')

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Generated 4 sub questions.
[1;3;38;2;237;90;200m[lyft_10k] Q: What was the total revenue of Lyft in 2020?
[0m[1;3;38;2;90;149;237m[lyft_10k] Q: What was the total revenue of Lyft in 2021?
[0m[1;3;38;2;11;159;203m[uber_10k] Q: What was the total revenue of Uber in 2020?
[0m[1;3;38;2;155;135;227m[uber_10k] Q: What was the total revenue of Uber in 2021?
[0mHTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;2;237;90;200m[lyft_10k] A: The total revenue of Lyft in 2020 was $2,364,681,000.
[0mHTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;

In [20]:
print(response)

Uber's revenue grew by $6,316 million from 2020 to 2021, while Lyft's revenue increased by $843,642,000 during the same period.
