In [None]:
%pip install llama-index llama-index-llms-mistralai llama-index-embeddings-fastembed -q

In [None]:
!mkdir -p 'data/10k/'
!mkdir -p 'data/paul_graham/'

!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf' -O 'data/10k/uber_2021.pdf'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/lyft_2021.pdf' -O 'data/10k/lyft_2021.pdf'

In [None]:
from llama_index.core import Settings
from llama_index.llms.mistralai import MistralAI 
from llama_index.embeddings.fastembed import FastEmbedEmbedding
import os

os.environ["MISTRAL_API_KEY"] = "API KEY"


Settings.llm = MistralAI(model="mistral-small-latest")
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en")
Settings.chunk_size = 512
Settings.chunk_overlap = 64

In [None]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core import (StorageContext, load_index_from_storage,)

from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine

import nest_asyncio

nest_asyncio.apply()

In [None]:
try:
    storage_context = StorageContext.from_defaults(persist_dir="./storage/lyft",)
    lyft_index = load_index_from_storage(storage_context)

    storage_context = StorageContext.from_defaults(persist_dir="./storage/uber")
    uber_index = load_index_from_storage(storage_context)

    storage_context = StorageContext.from_defaults(persist_dir="./storage/paul_graham")
    paul_graham_index = load_index_from_storage(storage_context)

    index_loaded = True
except:
    index_loaded = False

In [None]:
if not index_loaded:
    lyft_docs = SimpleDirectoryReader(input_files=["./data/10k/lyft_2021.pdf"]).load_data()
    uber_docs = SimpleDirectoryReader(input_files=["./data/10k/uber_2021.pdf"]).load_data()
    paul_docs = SimpleDirectoryReader(input_files=["./data/10k/uber_2021.pdf"]).load_data()

    lyft_index = VectorStoreIndex.from_documents(lyft_docs)
    uber_index = VectorStoreIndex.from_documents(uber_docs)
    paul_index = VectorStoreIndex.from_documents(paul_docs)

    lyft_index.storage_context.persist(persist_dir="./storage/lyft")
    uber_index.storage_context.persist(persist_dir="./storage/uber")
    paul_index.storage_context.persist(persist_dir="./storage/paul_graham")

In [None]:
lyft_engine = lyft_index.as_query_engine(similarity_top_k=5)
uber_engine = uber_index.as_query_engine(similarity_top_k=5)

In [None]:
from llama_index.core.evaluation import RelevancyEvaluator

evaluator = RelevancyEvaluator()

In [None]:
from llama_index.core.tools import ToolMetadata
from llama_index.core.tools.eval_query_engine import EvalQueryEngineTool

query_engine_tools = [
    EvalQueryEngineTool(
        evaluator=evaluator,
        query_engine=lyft_engine,
        metadata=ToolMetadata(
            name="lyft",
            description=(
                "Provides information about Lyft's financials for year 2021. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
    EvalQueryEngineTool(
        evaluator=evaluator,
        query_engine=uber_engine,
        metadata=ToolMetadata(
            name="uber",
            description=(
                "Provides information about Uber's financials for year 2021. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
]

In [None]:
s_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools
)

In [None]:
response = s_engine.query(
    "Compare and contrast the customer segments and geographies that grew the fastest"
)

In [None]:
print(response)

In [None]:
response =  s_engine.query(
    "Compare revenue growth of Uber and Lyft from 2020 to 2021"
)

In [None]:
print(response)