### SubQueries from multiple documents
This query type gathers sub queries from multiple documents and combineds the results together to produce the final synthesis output from the the model

In [1]:
import os

from dotenv import load_dotenv
from llama_index.readers.file.base import SimpleDirectoryReader
from llama_index.vector_stores import SimpleVectorStore
from llama_index.indices.vector_store import VectorStoreIndex
from llama_index.storage import StorageContext
from llama_index.service_context import ServiceContext
from llama_index.llms.openai import OpenAI

In [2]:
load_dotenv()

True

In [3]:
llm = OpenAI(model="togethercomputer/CodeLlama-34b-Instruct", temperature=0.1)

In [4]:
from pprint import pprint

In [5]:
pprint(llm.to_json())

('{"system_prompt": null, "pydantic_program_mode": "default", "model": '
 '"togethercomputer/CodeLlama-34b-Instruct", "temperature": 0.1, "max_tokens": '
 'null, "additional_kwargs": {}, "max_retries": 3, "timeout": 60.0, '
 '"default_headers": null, "reuse_client": true, "api_base": '
 '"https://api.together.xyz/v1", "api_version": "", "class_name": '
 '"openai_llm"}')


In [6]:
# 10K analysis
service_context = ServiceContext.from_defaults(llm=llm)

### Load the data from data sources
We will be loadin the Uber and Lyft financials seperately

In [7]:
uber_docs = SimpleDirectoryReader(input_files=["./data/uber10k2021.pdf"]).load_data()
lyft_docs = SimpleDirectoryReader(input_files=["./data/lyft10k2021.pdf"]).load_data()

### Build the indexes
We will now be building the indexes for the Uber and Lyft documents

In [8]:
uber_index = VectorStoreIndex.from_documents(uber_docs)
lyft_index = VectorStoreIndex.from_documents(lyft_docs)

In [9]:
uber_query_engine = uber_index.as_query_engine()
lyft_query_engine = lyft_index.as_query_engine()

In [10]:
response = uber_query_engine.query("What company is discussed in the document?")
pprint(response.response)

('Uber is the company discussed in the document. The text provides information '
 "about Uber's mission, values, and various aspects of its operations.")


In [11]:
response = lyft_query_engine.query("What was the total earnings value of lyft in 2021")
pprint(response.response)

('Based on the context provided, the document discusses the recognition of a '
 'pre-tax gain of $119.3 million for the divestiture of certain assets related '
 'to the Level 5 division. However, it does not mention the total earnings or '
 'revenue of Lyft for the year 2021.')


In [21]:
pprint(response)

Response(response='Based on the context provided, the document discusses the '
                  'recognition of a pre-tax gain of $119.3 million for the '
                  'divestiture of certain assets related to the Level 5 '
                  'division. However, it does not mention the total earnings '
                  'or revenue of Lyft for the year 2021.',
         source_nodes=[NodeWithScore(node=TextNode(id_='ba94fd96-e3cd-464c-ac1d-ba14b248c458', embedding=None, metadata={'page_label': '81', 'file_name': 'lyft10k2021.pdf', 'file_path': 'data/lyft10k2021.pdf', 'file_type': 'application/pdf', 'file_size': 2029635, 'creation_date': '2024-02-09', 'last_modified_date': '2024-02-09', 'last_accessed_date': '2024-02-09'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], rela

In [12]:
from llama_index.tools import QueryEngineTool, ToolMetadata

In [13]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=uber_query_engine,
        metadata=ToolMetadata(
            description="Retreive relevant information about uber 2021 financial reporting data",
            name="uber_2021_query"
        )
    ),
    QueryEngineTool(
        query_engine=lyft_query_engine,
        metadata=ToolMetadata(
            description="Retreive relevant information about lyft 2021 financial reporting data",
            name="lyft_2021_query"
        )
    )
]

In [14]:
from llama_index.query_engine import SubQuestionQueryEngine

In [15]:
s_query_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=query_engine_tools, service_context=service_context)

In [26]:
response = s_query_engine.query("what is the total revenue of uber in 2021?")
pprint(response.response)

KeyboardInterrupt: 

In [22]:
response = s_query_engine.query("Compare the revenue growth for Uber and Lyft from 2020 to 2021")
pprint(response.response)

ValueError: Expected tool_calls in ai_message.additional_kwargs, but none found.

In [19]:
load_dotenv()

True

In [21]:
import os
import json
import openai

client = openai.OpenAI(
    base_url="https://api.together.xyz/v1"
)

tools = [
  {
    "type": "function",
    "function": {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA"
          },
          "unit": {
            "type": "string",
            "enum": [
              "celsius",
              "fahrenheit"
            ]
          }
        }
      }
    }
  }
]

messages = [
    {"role": "system", "content": "You are a helpful assistant that can access external functions. The responses from these function calls will be appended to this dialogue. Please provide responses based on the information from these function calls."},
    {"role": "user", "content": "What is the current temperature of New York, San Francisco and Chicago?"}
]
    
response = client.chat.completions.create(
    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
    messages=messages,
    tools=tools,
    tool_choice="auto",
)

print(json.dumps(response.choices[0].message.model_dump()['tool_calls'], indent=2))

[
  {
    "id": "call_toq40iiw3rici7fy6kftlly1",
    "function": {
      "arguments": "{\"location\":\"New York, NY\",\"unit\":\"fahrenheit\"}",
      "name": "get_current_weather"
    },
    "type": "function"
  },
  {
    "id": "call_7weax9pyoxzpq75y68jfrvwa",
    "function": {
      "arguments": "{\"location\":\"San Francisco, CA\",\"unit\":\"fahrenheit\"}",
      "name": "get_current_weather"
    },
    "type": "function"
  },
  {
    "id": "call_ts99pihcky07ie4gplmf5qi2",
    "function": {
      "arguments": "{\"location\":\"Chicago, IL\",\"unit\":\"fahrenheit\"}",
      "name": "get_current_weather"
    },
    "type": "function"
  }
]
