In [1]:
import nest_asyncio
nest_asyncio.apply()

In [2]:
import llama_index
llama_index.set_global_handler("simple")

In [3]:
import os

os.environ["OPENAI_API_KEY"] = "sk-..."

import logging
import sys

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output

from llama_index import (
    KnowledgeGraphIndex,
    ServiceContext,
    SimpleDirectoryReader,
    SimpleKeywordTableIndex
)
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import NebulaGraphStore
from llama_index.llms import OpenAI

from IPython.display import Markdown, display
from llama_index.llms.palm import PaLM
from llama_index.embeddings import GooglePaLMEmbedding


from llama_index.callbacks import (
    CallbackManager,
    LlamaDebugHandler
)


from llama_index.retrievers import (
    KeywordTableSimpleRetriever
)

from llama_index import Document, SummaryIndex
from llama_index.query_engine import PandasQueryEngine, RetrieverQueryEngine
from llama_index.retrievers import RecursiveRetriever
from llama_index.schema import IndexNode
from llama_hub.file.pymu_pdf.base import PyMuPDFReader
from pathlib import Path
from typing import List
from llama_index.readers import WikipediaReader

from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    ServiceContext,
    StorageContext,
    SQLDatabase,
)

In [4]:
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])

In [5]:
palm_api_key  = "AIzaSyApBCzqW_RF4qbkX9kMoNwjooIqrm8oZEQ"
model = PaLM(api_key=palm_api_key)

model_name = "models/embedding-gecko-001"
embed_model = GooglePaLMEmbedding(model_name=model_name, api_key=palm_api_key)

service_context = ServiceContext.from_defaults(
                                    llm = model,
                                    embed_model = embed_model,
                                    chunk_size=512,
                                    callback_manager=callback_manager)

In [6]:
## SQL database 
from sqlalchemy import (
    create_engine,
    MetaData,
    Table,
    Column,
    String,
    Integer,
    select,
    column,
)

In [7]:
engine = create_engine("sqlite:///:memory:", future=True)
metadata_obj = MetaData()

In [8]:
# create city SQL table
table_name = "city_stats"
city_stats_table = Table(
    table_name,
    metadata_obj,
    Column("city_name", String(16), primary_key=True),
    Column("population", Integer),
    Column("country", String(16), nullable=False),
)

metadata_obj.create_all(engine)

In [9]:
# print tables
metadata_obj.tables.keys()

dict_keys(['city_stats'])

In [10]:
from sqlalchemy import insert

rows = [
    {"city_name": "Toronto", "population": 2930000, "country": "Canada"},
    {"city_name": "Tokyo", "population": 13960000, "country": "Japan"},
    {"city_name": "Berlin", "population": 3645000, "country": "Germany"},
]
for row in rows:
    stmt = insert(city_stats_table).values(**row)
    with engine.begin() as connection:
        cursor = connection.execute(stmt)

In [11]:
with engine.connect() as connection:
    cursor = connection.exec_driver_sql("SELECT * FROM city_stats")
    print(cursor.fetchall())

[('Toronto', 2930000, 'Canada'), ('Tokyo', 13960000, 'Japan'), ('Berlin', 3645000, 'Germany')]


In [12]:
cities = ["Toronto", "Berlin", "Tokyo"]
wiki_docs = WikipediaReader().load_data(pages=cities)

In [13]:
len(wiki_docs)

3

In [14]:
vector_indices = {}

for city, wiki_doc in zip(cities, wiki_docs):
    vector_index = VectorStoreIndex.from_documents([wiki_doc], service_context= service_context)
    query_engine = vector_index.as_query_engine(similarity_top_k=2)
    vector_indices[city] = vector_index

**********
Trace: index_construction
    |_node_parsing ->  0.164847 seconds
      |_chunking ->  0.15884 seconds
    |_embedding ->  2.32182 seconds
    |_embedding ->  1.032292 seconds
    |_embedding ->  1.106384 seconds
    |_embedding ->  1.067382 seconds
    |_embedding ->  1.123629 seconds
    |_embedding ->  0.481284 seconds
**********
**********
Trace: index_construction
    |_node_parsing ->  0.110866 seconds
      |_chunking ->  0.102881 seconds
    |_embedding ->  1.469821 seconds
    |_embedding ->  1.054815 seconds
    |_embedding ->  1.142529 seconds
    |_embedding ->  1.025215 seconds
    |_embedding ->  0.953844 seconds
**********
**********
Trace: index_construction
    |_node_parsing ->  0.089082 seconds
      |_chunking ->  0.085072 seconds
    |_embedding ->  1.079916 seconds
    |_embedding ->  1.735675 seconds
    |_embedding ->  1.010579 seconds
    |_embedding ->  0.566932 seconds
**********


In [15]:
vector_indices

{'Toronto': <llama_index.indices.vector_store.base.VectorStoreIndex at 0x1ce998e16a0>,
 'Berlin': <llama_index.indices.vector_store.base.VectorStoreIndex at 0x1ce9979cdf0>,
 'Tokyo': <llama_index.indices.vector_store.base.VectorStoreIndex at 0x1ce9965b850>}

In [16]:
# define index nodes
query_engine_dict = {}
summaries = [
    (
        "This node provides information about the city Toronto/toronto"
    ),
    (
        "This node provides information about the city Berlin/berlin"
    ),
    (
        "This node provides information about the city Tokyo/tokyo"
    ),
]

city_nodes = [
    IndexNode(text=summary, index_id=f"{idx}")
    for idx, summary in zip(cities, summaries)
]

for city in cities:
    query_engine_dict[city] = vector_indices[city].as_query_engine()


In [19]:
query_engine_dict

{'Toronto': <llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine at 0x1ce99906070>,
 'Berlin': <llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine at 0x1ce99906760>,
 'Tokyo': <llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine at 0x1ce99906a30>}

In [17]:
# construct top-level vector index + query engine
vector_index = VectorStoreIndex(city_nodes, service_context = service_context)
vector_retriever = vector_index.as_retriever(similarity_top_k=1)

**********
Trace: index_construction
    |_embedding ->  0.949512 seconds
**********


In [18]:
vector_retriever.retrieve("tell me about  arts and culture of toronto")

**********
Trace: query
    |_retrieve ->  0.611131 seconds
      |_embedding ->  0.609598 seconds
**********


[NodeWithScore(node=IndexNode(id_='7a51e2ac-8482-4d14-b42c-c8a182eab0fc', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='a54567ce6633c4cca6895756c737b2706663380a95d82be0f24ad6c489bb9617', text='This node provides information about the city Toronto/toronto', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n', index_id='Toronto'), score=0.7260854508592473)]

In [23]:
from llama_index.retrievers import RecursiveRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.response_synthesizers import get_response_synthesizer

recursive_retriever = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": vector_retriever},
    query_engine_dict=query_engine_dict,
    verbose=True,
)

# response_synthesizer = get_response_synthesizer(
#     service_context=service_context,
#     response_mode="compact"
# )

query_engine = RetrieverQueryEngine.from_args(
    recursive_retriever,service_context = service_context
)


In [24]:
res = query_engine.query("tell me about  arts and culture of berlin")

[1;3;34mRetrieving with query id None: tell me about  arts and culture of berlin
[0m[1;3;38;5;200mRetrieved node with id, entering: Berlin
[0m[1;3;34mRetrieving with query id Berlin: tell me about  arts and culture of berlin
[0m** Prompt: **
Context information is below.
---------------------
Partygoers in Germany often toast the New Year with a glass of sparkling wine.


=== Performing arts ===

Berlin is home to 44 theaters and stages. The Deutsches Theater in Mitte was built in 1849–50 and has operated almost continuously since then. The Volksbühne at Rosa-Luxemburg-Platz was built in 1913–14, though the company had been founded in 1890. The Berliner Ensemble, famous for performing the works of Bertolt Brecht, was established in 1949. The Schaubühne was founded in 1962 and moved to the building of the former Universum Cinema on Kurfürstendamm in 1981. With a seating capacity of 1,895 and a stage floor of 2,854 square meters (30,720 sq ft), the Friedrichstadt-Palast in Berlin M

In [None]:
from llama_index.query_engine import RetrieverQueryEngine

In [None]:
query_engine = RetrieverQueryEngine.from_args(recursive_retriever, service_context = service_context)

In [None]:
response = query_engine.query("tell me about  arts and culture of berlin")

In [None]:
response