In [2]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [3]:
import nest_asyncio
nest_asyncio.apply()

In [4]:
import os
os.environ['NUMEXPR_MAX_THREADS'] = '4'
os.environ['NUMEXPR_NUM_THREADS'] = '2'
import numexpr as ne 

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "sk-xxx"
import openai
openai.api_key = "sk-xxx"

In [5]:
from llama_hub.youtube_transcript import YoutubeTranscriptReader

loader=YoutubeTranscriptReader()

youtube_documents = loader.load_data(ytlinks=['https://www.youtube.com/watch?v=jITPOcBQQW8', 'https://www.youtube.com/watch?v=xFfnJhZeL_Y', 'https://www.youtube.com/watch?v=g_LNu6Aaxvk'])

In [6]:
from llama_index import VectorStoreIndex

vector_indices = {}
vector_query_engines = {}

breeds = ["Savanah", "Ragdol", "Maine Coon"]

for breed, youtube in zip(breeds, youtube_documents):
    vector_index = VectorStoreIndex.from_documents([youtube])
    query_engine = vector_index.as_query_engine(similarity_top_k=3)
    vector_indices[breed] = vector_index
    vector_query_engines[breed] = query_engine

In [7]:
from llama_index.query_engine import SubQuestionQueryEngine
from llama_index.tools import QueryEngineTool, ToolMetadata


query_engine_tools = []
for breed in breeds:
    query_engine = vector_query_engines[breed]

    query_engine_tool = QueryEngineTool(
        query_engine=query_engine,
        metadata=ToolMetadata(
            name=breed,
            description=f"Provides information about the cat breed {breed}"
        ),
    )
    query_engine_tools.append(query_engine_tool)

subquestion_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=query_engine_tools)    

In [8]:
from sqlalchemy import create_engine

engine = create_engine('sqlite:///databases/cats.db', future=True)
with engine.connect() as connection:
    result = connection.exec_driver_sql("SELECT * FROM cat_breeds")
    print(result.fetchall())

[('Savannah', 'USA', 'high energy', 'Large', 'sociable'), ('Ragdoll', 'USA', 'gentle', 'Large', 'sociable'), ('Maine Coon', 'USA (Maine)', 'friendly', 'Very Large', 'sociable')]


In [9]:
from llama_index import SQLDatabase
sql_database = SQLDatabase(engine, include_tables=["cat_breeds"])

In [15]:
from llama_index.indices.struct_store import NLSQLTableQueryEngine
sql_query_engine = NLSQLTableQueryEngine(
    sql_database,
    context_query_kwargs={"cat_breeds": (
        "The only columns available: name,origin,temperament,size,social_behavior. Do not use other columns and foreign keys. \n"
        "Do not attempt to run a query if the column is not among available columns. Do not use unexistant colums in a WHERE part of the query. \n"
        "Never return a query: SELECT name FROM cat_breeds WHERE eye_color = 'blue' AND eye_shape = 'almond' \n"
        "When the question is Which cat has the blue almond-shaped eyes? return SELECT name FROM cat_breeds \n"
        "When the column does not exist return an answer: SELECT name FROM cat_breeds\\n"
        )
    })

In [21]:
sql_tool = QueryEngineTool.from_defaults(
    query_engine=sql_query_engine,
    description=(
        'Useful for translating a natural language query into a SQL query over a table containing: '
        ' name/origin/temperament/size/social_behaviour of each cat breed.'
        'Use when you need information about the cat origin, temperament, size and social behaviour.'
        'Do NOT use for other characteristics except origin, temperament, size and social behaviour.'
    )
)

In [12]:
subquery_vector_engine_tool = QueryEngineTool.from_defaults(
    query_engine=subquestion_engine,
    description=f"Useful for answering semantic questions about different cat breeds",
)

In [22]:
from llama_index.query_engine.sql_join_query_engine import SQLJoinQueryEngine
query_engine = SQLJoinQueryEngine(sql_tool, subquery_vector_engine_tool)

In [20]:
openai.log = "debug"

In [23]:
response=query_engine.query("Tell me about Maine Coon size and it's life span")

message='Request to OpenAI API' method=post path=https://api.openai.com/v1/chat/completions
api_version=None data='{"messages": [{"role": "user", "content": "Some choices are given below. It is provided in a numbered list (1 to 2),where each item in the list corresponds to a summary.\\n---------------------\\n(1) Useful for translating a natural language query into a SQL query over a table containing:  name/origin/temperament/size/social_behaviour of each cat breed.Use when you need information about the cat origin, temperament, size and social behaviour.Do NOT use for other characteristics except origin, temperament, size and social behaviour.\\n\\n(2) Useful for answering semantic questions about different cat breeds\\n---------------------\\nUsing only the choices above and not prior knowledge, generate the selection object and reason that is most relevant to the question: \'Tell me about Maine Coon size and it\'s life span\'\\n"}], "stream": false, "model": "gpt-3.5-turbo", "temper

[36;1m[1;3mQuerying SQL database: The question is asking about the size of a specific cat breed, which falls under the category of information provided in choice 1.
[0mINFO:llama_index.query_engine.sql_join_query_engine:> Querying SQL database: The question is asking about the size of a specific cat breed, which falls under the category of information provided in choice 1.
> Querying SQL database: The question is asking about the size of a specific cat breed, which falls under the category of information provided in choice 1.
INFO:llama_index.indices.struct_store.sql_query:> Table desc str: Table 'cat_breeds' has columns: name (VARCHAR(16)), origin (VARCHAR(16)), temperament (VARCHAR(16)), size (VARCHAR(16)), social_behavior (VARCHAR(16)), and foreign keys: . The table description is: The only columns available: name,origin,temperament,size,social_behavior. Do not use other columns and foreign keys. 
Do not attempt to run a query if the column is not among available columns. Do not 

message='OpenAI API response' path=https://api.openai.com/v1/chat/completions processing_ms=1771 request_id=7eff1ed2adc42180289b3d66416c2768 response_code=200
body='{\n  "id": "chatcmpl-7xu0FpuH26yII0iM6CEkyZa74uMVn",\n  "object": "chat.completion",\n  "created": 1694510983,\n  "model": "gpt-3.5-turbo-0613",\n  "choices": [\n    {\n      "index": 0,\n      "message": {\n        "role": "assistant",\n        "content": null,\n        "function_call": {\n          "name": "SingleSelection",\n          "arguments": "{\\n  \\"index\\": 1,\\n  \\"reason\\": \\"The question is asking about the size of a specific cat breed, which falls under the category of information provided in choice 1.\\"\\n}"\n        }\n      },\n      "finish_reason": "stop"\n    }\n  ],\n  "usage": {\n    "prompt_tokens": 209,\n    "completion_tokens": 40,\n    "total_tokens": 249\n  }\n}\n' headers='{\'Date\': \'Tue, 12 Sep 2023 09:29:45 GMT\', \'Content-Type\': \'application/json\', \'Transfer-Encoding\': \'chunked

OperationalError: (sqlite3.OperationalError) no such column: life_span
[SQL: SELECT size, life_span FROM cat_breeds WHERE name = 'Maine Coon']
(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [37]:
print(response)

The Savannah cat typically has a longer lifespan compared to the Ragdoll cat.
