# Financial Statements analysis with sub-queries
In this demo, we explore answering complex queries by decomposing them into simpler sub-queries.

In [1]:
!pip install -q llama-index-llms-openai llama-index

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.4/15.4 MB[0m [31m46.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m34.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.6/320.6 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m141.9/141.9 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m31.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.4/290.4 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━

In [2]:
import nest_asyncio

nest_asyncio.apply()

In [3]:
import os
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.openai import OpenAI

from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine

## Configure LLM service

In [5]:
# only for Google Colab; please comment Kaggle part in this case
from google.colab import userdata
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

# only for Kaggle; please comment Google Colab part in this case
#from kaggle_secrets import UserSecretsClient
#user_secrets = UserSecretsClient()
#os.environ["OPENAI_API_KEY"] = user_secrets.get_secret("OPENAI_API_KEY")

In [6]:
from llama_index.core import Settings

Settings.llm = OpenAI(temperature=0.2, model="gpt-3.5-turbo")

## Download Data

In [7]:
!mkdir -p 'data/10k/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf' -O 'data/10k/uber_2021.pdf'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/lyft_2021.pdf' -O 'data/10k/lyft_2021.pdf'

--2024-05-29 13:46:58--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1880483 (1.8M) [application/octet-stream]
Saving to: ‘data/10k/uber_2021.pdf’


2024-05-29 13:46:59 (25.6 MB/s) - ‘data/10k/uber_2021.pdf’ saved [1880483/1880483]

--2024-05-29 13:46:59--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/lyft_2021.pdf
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1440303 (1.4M) [appl

## Load data

In [8]:
lyft_docs = SimpleDirectoryReader(
    input_files=["./data/10k/lyft_2021.pdf"]
).load_data()
uber_docs = SimpleDirectoryReader(
    input_files=["./data/10k/uber_2021.pdf"]
).load_data()

## Build indices

In [9]:
lyft_index = VectorStoreIndex.from_documents(lyft_docs)

In [10]:
uber_index = VectorStoreIndex.from_documents(uber_docs)

## Build query engines

In [11]:
lyft_engine = lyft_index.as_query_engine(similarity_top_k=3)

In [12]:
uber_engine = uber_index.as_query_engine(similarity_top_k=3)

In [13]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=lyft_engine,
        metadata=ToolMetadata(
            name="lyft_10k",
            description=(
                "Provides information about Lyft financials for year 2021"
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=uber_engine,
        metadata=ToolMetadata(
            name="uber_10k",
            description=(
                "Provides information about Uber financials for year 2021"
            ),
        ),
    ),
]

s_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools
)

## Run queries

In [14]:
response = s_engine.query(
    "Compare and contrast the customer segments and geographies that grew the fastest"
)

Generated 4 sub questions.
[1;3;38;2;237;90;200m[lyft_10k] Q: What were the customer segments that grew the fastest for Lyft in 2021?
[0m[1;3;38;2;90;149;237m[lyft_10k] Q: What were the geographies that grew the fastest for Lyft in 2021?
[0m[1;3;38;2;11;159;203m[uber_10k] Q: What were the customer segments that grew the fastest for Uber in 2021?
[0m[1;3;38;2;155;135;227m[uber_10k] Q: What were the geographies that grew the fastest for Uber in 2021?
[0m[1;3;38;2;11;159;203m[uber_10k] A: The customer segments that grew the fastest for Uber in 2021 were the membership programs, specifically Uber One, Uber Pass, Eats Pass, and Rides Pass.
[0m[1;3;38;2;155;135;227m[uber_10k] A: Chicago, Miami, New York City in the United States, Sao Paulo in Brazil, and London in the United Kingdom were the geographies that grew the fastest for Uber in 2021.
[0m[1;3;38;2;90;149;237m[lyft_10k] A: The geographies that grew the fastest for Lyft in 2021 were those where the number of Active Riders 

In [None]:
response.response

"The customer segments that grew the fastest for Lyft in 2021 were not explicitly mentioned, while for Uber, the membership programs such as Uber One, Uber Pass, Eats Pass, and Rides Pass experienced the fastest growth. In terms of geographies, Lyft's growth was tied to communities fully reopening due to wider vaccine distribution, whereas Uber saw its fastest growth in specific cities like Chicago, Miami, New York City, Sao Paulo, and London."

In [None]:
response = s_engine.query(
    "Compare revenue growth of Uber and Lyft from 2020 to 2021"
)

Generated 4 sub questions.
[1;3;38;2;237;90;200m[uber_10k] Q: What was the total revenue of Uber in 2020?
[0m[1;3;38;2;90;149;237m[uber_10k] Q: What was the total revenue of Uber in 2021?
[0m[1;3;38;2;11;159;203m[lyft_10k] Q: What was the total revenue of Lyft in 2020?
[0m[1;3;38;2;155;135;227m[lyft_10k] Q: What was the total revenue of Lyft in 2021?
[0m[1;3;38;2;155;135;227m[lyft_10k] A: $3,208,323
[0m[1;3;38;2;11;159;203m[lyft_10k] A: The total revenue of Lyft in 2020 was $2,364,681,000.
[0m[1;3;38;2;237;90;200m[uber_10k] A: The total revenue of Uber in 2020 was $11,139 million.
[0m[1;3;38;2;90;149;237m[uber_10k] A: The total revenue of Uber in 2021 was $17,455 million.
[0m

In [None]:
response.response

'The revenue growth of Uber from 2020 to 2021 was $6,316 million, while the revenue growth of Lyft during the same period was $843,642,000.'