In [1]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
import os
os.environ['NUMEXPR_MAX_THREADS'] = '4'
os.environ['NUMEXPR_NUM_THREADS'] = '2'
import numexpr as ne

In [4]:
import os
os.environ["OPENAI_API_KEY"] = "<Your API Key"
import openai
openai.api_key = "<Your API Key"

In [5]:
from llama_hub.youtube_transcript import YoutubeTranscriptReader

loader=YoutubeTranscriptReader()

youtube_documents = loader.load_data(ytlinks=['https://www.youtube.com/watch?v=jITPOcBQQW8', 'https://www.youtube.com/watch?v=xFfnJhZeL_Y', 'https://www.youtube.com/watch?v=g_LNu6Aaxvk'])

In [6]:
from llama_index import VectorStoreIndex

vector_indices = {}
vector_query_engines = {}

breeds = ["Savanah", "Ragdol", "Maine Coon"]

for breed, youtube in zip(breeds, youtube_documents):
    vector_index = VectorStoreIndex.from_documents([youtube])
    query_engine = vector_index.as_query_engine(similarity_top_k=3)
    vector_indices[breed] = vector_index
    vector_query_engines[breed] = query_engine

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [7]:
from llama_index.query_engine import SubQuestionQueryEngine
from llama_index.tools import QueryEngineTool, ToolMetadata



query_engine_tools = []

#Create a list of QueryEngineTool instances for each breed:
for breed in breeds:
    query_engine = vector_query_engines[breed]

    query_engine_tool = QueryEngineTool(
        query_engine=query_engine,
        metadata=ToolMetadata(
            name=breed,
            description=f"Provides information about the cat breed {breed}"
        ),
    )
    query_engine_tools.append(query_engine_tool)

#Create a SubQuestionQueryEngine using the list of query engine tools.
subquestion_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=query_engine_tools)    

In [8]:
from sqlalchemy import create_engine

engine = create_engine('sqlite:///databases/cats.db', future=True)
with engine.connect() as connection:
    result = connection.exec_driver_sql("SELECT * FROM cat_breeds")
    print(result.fetchall())

[('Savannah', 'USA', 'high energy', 'Large', 'sociable'), ('Ragdoll', 'USA', 'gentle', 'Large', 'sociable'), ('Maine Coon', 'USA (Maine)', 'friendly', 'Very Large', 'sociable')]


In [9]:
from llama_index import SQLDatabase
sql_database = SQLDatabase(engine, include_tables=["cat_breeds"])

In [10]:
from llama_index.indices.struct_store import NLSQLTableQueryEngine

# Import the NLSQLTableQueryEngine class and create an instance with context query kwargs:
sql_query_engine = NLSQLTableQueryEngine(
    sql_database,
    context_query_kwargs={"cat_breeds": (
        "The only columns available: name,origin,temperament,size,social_behavior. Do not use other columns and foreign keys. \n"
        "Do not attempt to run a query if the column is not among available columns. Do not use unexistant colums in a WHERE part of the query. \n"
        "Never return a query: SELECT name FROM cat_breeds WHERE eye_color = 'blue' AND eye_shape = 'almond' \n"
        "When the question is Which cat has the blue almond-shaped eyes? return SELECT name FROM cat_breeds \n"
        "When the column does not exist return an answer: SELECT name FROM cat_breeds\\n"
        )
    })

In [11]:
# Create a QueryEngineTool for the SQL query engine. Useful for translating a natural language query into a SQL query

sql_tool = QueryEngineTool.from_defaults(
    query_engine=sql_query_engine,
    description=(
        'Useful for translating a natural language query into a SQL query over a table containing: '
        ' name/origin/temperament/size/social_behaviour of each cat breed.'
        'Use when you need information about the cat origin, temperament, size and social behaviour.'
        'Do NOT use for other characteristics except origin, temperament, size and social behaviour.'
    )
)

In [12]:
# Create a subquery vector engine tool for answering semantic questions.

subquery_vector_engine_tool = QueryEngineTool.from_defaults(
    query_engine=subquestion_engine,
    description=f"Useful for answering semantic questions about different cat breeds",
)

In summary, the subquestion_engine is a specialized query engine designed to handle subquestions related to different cat breeds. It leverages a set of tools, each tailored to a specific cat breed, to provide detailed information in response to natural language queries about those breeds. This approach allows for a more nuanced and specific handling of queries, improving the system's ability to answer questions about cat breeds in a targeted manner.

In [13]:
from llama_index.query_engine.sql_join_query_engine import SQLJoinQueryEngine

# Perform a query using the joined query engine:
query_engine = SQLJoinQueryEngine(sql_tool, subquery_vector_engine_tool)

In [14]:
openai.log = "debug"

In [15]:
response=query_engine.query("Tell me about Maine Coon size and it's life span")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;34mQuerying SQL database: The question is asking about the size of a specific cat breed (Maine Coon) and this choice is specifically for obtaining information about the size of cat breeds.
[0mINFO:llama_index.query_engine.sql_join_query_engine:> Querying SQL database: The question is asking about the size of a specific cat breed (Maine Coon) and this choice is specifically for obtaining information about the size of cat breeds.
> Querying SQL database: The question is asking about the size of a specific cat breed (Maine Coon) and this choice is specifically for obtaining information about the size of cat breeds.
INFO:llama_index.indices.struct_store.sql_retriever:> Table desc str: Table 'cat_breeds' has columns: name (VARCHAR(16)), origin (VARCHAR(16)), temperament (VARCHAR(16)), size (VARCHAR(16)), social_beha

In [16]:
print(response)

I'm sorry, but there seems to be an error with the query. Could you please try again?
