### 10K Analysis

In [23]:
import openai
import os
import nest_asyncio
nest_asyncio.apply()
os.environ['OPENAI_API_KEY'] = '#'
openai.api_key = os.environ['OPENAI_API_KEY']

In [3]:
from llama_index import SimpleDirectoryReader, LLMPredictor, ServiceContext, VectorStoreIndex
from llama_index.response.pprint_utils import pprint_response
from langchain import OpenAI

from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.query_engine import SubQuestionQueryEngine


### Configure LLM Services

In [10]:
llm =OpenAI(temperature=.5, model='text-davinci-003', max_tokens=-1)
service_context = ServiceContext.from_defaults(llm=llm)

### Load Data

In [16]:
lyft_docs = SimpleDirectoryReader(input_files=['/Users/brendan/Desktop - Brendan’s MacBook Air/llm/llamaindex/chatbot/data/LYFT_10K_2023.pdf']).load_data()
uber_docs = SimpleDirectoryReader(input_files=['/Users/brendan/Desktop - Brendan’s MacBook Air/llm/llamaindex/pdf_tutorial/data/UBER_10K_2023.pdf']).load_data()

### Build Indices

In [14]:
lyft_index = VectorStoreIndex.from_documents(lyft_docs)
print(f'Finished building index with {len(lyft_index.docstore.docs)} nodes')

Finished building index with 253 nodes


In [17]:
uber_index = VectorStoreIndex.from_documents(uber_docs)
print(f'Finished building index with {len(uber_index.docstore.docs)} nodes')

Finished building index with 238 nodes


#### Build Query Engines

In [18]:
lyft_engine = lyft_index.as_query_engine(similarity_top_k=3)


In [19]:
uber_engine = uber_index.as_query_engine(similarity_top_k=3)

In [21]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=lyft_engine,
        metadata=ToolMetadata(name='lyft_10k',description='Provides info aovut LYFT')
    ),
    QueryEngineTool(
        query_engine=uber_engine,
        metadata=ToolMetadata(name='uber_10k', description='Provides info about UBER')
    )
]

s_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=query_engine_tools)

### Run queries

In [25]:
response = s_engine.query('Compare and contrast fleet size, which refers to # of vehicles')
print(response)

Generated 2 sub questions.
[1;3;38;2;237;90;200m[lyft_10k] Q: What is the fleet size of LYFT?
[0m[1;3;38;2;90;149;237m[uber_10k] Q: What is the fleet size of UBER?
[0m[1;3;38;2;90;149;237m[uber_10k] A: The context information does not provide any specific information about the fleet size of Uber.
[0m[1;3;38;2;237;90;200m[lyft_10k] A: Lyft operates over 90,000 bikes and scooters across 10 markets in partnership with city governments. Additionally, with the acquisition of PBSC Urban Solutions in 2022, Lyft has an additional 100,000+ bikes deployed in bikeshare systems in 46 markets in 15 countries.
[0mLyft has a fleet size of over 90,000 bikes and scooters across 10 markets, as well as an additional 100,000+ bikes deployed in bikeshare systems in 46 markets in 15 countries through the acquisition of PBSC Urban Solutions. On the other hand, the context information does not provide any specific information about the fleet size of Uber.


In [26]:
response = s_engine.query('Compare and contrast number of riders or drivers on the platforms')
print(response)

Generated 4 sub questions.
[1;3;38;2;237;90;200m[lyft_10k] Q: How many riders are there on LYFT?
[0m[1;3;38;2;90;149;237m[lyft_10k] Q: How many drivers are there on LYFT?
[0m[1;3;38;2;11;159;203m[uber_10k] Q: How many riders are there on UBER?
[0m[1;3;38;2;155;135;227m[uber_10k] Q: How many drivers are there on UBER?
[0m[1;3;38;2;11;159;203m[uber_10k] A: The context information does not provide the exact number of riders on Uber.
[0m[1;3;38;2;90;149;237m[lyft_10k] A: The context information does not provide an exact number of drivers on Lyft.
[0m[1;3;38;2;155;135;227m[uber_10k] A: The context information does not provide the exact number of drivers on Uber.
[0m[1;3;38;2;237;90;200m[lyft_10k] A: The number of riders on Lyft is not explicitly mentioned in the given context information.
[0mThe context information does not provide any specific details about the number of riders or drivers on either Lyft or Uber. Therefore, it is not possible to compare or contrast the numbe

In [27]:
response = s_engine.query('Compare and contrast geographies of the platforms')
print(response)

Generated 8 sub questions.
[1;3;38;2;237;90;200m[lyft_10k] Q: What are the top cities where LYFT operates?
[0m[1;3;38;2;90;149;237m[uber_10k] Q: What are the top cities where UBER operates?
[0m[1;3;38;2;11;159;203m[lyft_10k] Q: What is the market share of LYFT in different cities?
[0m[1;3;38;2;155;135;227m[uber_10k] Q: What is the market share of UBER in different cities?
[0m[1;3;38;2;237;90;200m[lyft_10k] Q: What are the differences in pricing between LYFT and UBER in different cities?
[0m[1;3;38;2;90;149;237m[lyft_10k] Q: What are the differences in availability of LYFT and UBER in different cities?
[0m[1;3;38;2;11;159;203m[lyft_10k] Q: What are the differences in user ratings between LYFT and UBER in different cities?
[0m[1;3;38;2;155;135;227m[lyft_10k] Q: What are the differences in driver ratings between LYFT and UBER in different cities?
[0m[1;3;38;2;90;149;237m[uber_10k] A: UBER operates in Chicago, Los Angeles, New York City, Sao Paulo, and London.
[0m[1;3;38