In [1]:
import nest_asyncio
nest_asyncio.apply()

In [2]:
import llama_index
llama_index.set_global_handler("simple")

In [3]:
import os

os.environ["OPENAI_API_KEY"] = "sk-..."

import logging
import sys

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output

from llama_index import (
    KnowledgeGraphIndex,
    ServiceContext,
    SimpleDirectoryReader,
    SimpleKeywordTableIndex
)
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import NebulaGraphStore
from llama_index.llms import OpenAI

from IPython.display import Markdown, display
from llama_index.llms.palm import PaLM
from llama_index.embeddings import GooglePaLMEmbedding


from llama_index.callbacks import (
    CallbackManager,
    LlamaDebugHandler
)


from llama_index.retrievers import (
    KeywordTableSimpleRetriever
)

from llama_index import Document, SummaryIndex
from llama_index.query_engine import PandasQueryEngine, RetrieverQueryEngine
from llama_index.retrievers import RecursiveRetriever
from llama_index.schema import IndexNode
from llama_hub.file.pymu_pdf.base import PyMuPDFReader
from pathlib import Path
from typing import List
from llama_index.readers import WikipediaReader

from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    ServiceContext,
    StorageContext,
    SQLDatabase,
)

In [4]:
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])

In [5]:
palm_api_key  = "AIzaSyApBCzqW_RF4qbkX9kMoNwjooIqrm8oZEQ"
# model = PaLM(api_key=palm_api_key)

model_name = "models/embedding-gecko-001"
embed_model = GooglePaLMEmbedding(model_name=model_name, api_key=palm_api_key)

service_context = ServiceContext.from_defaults(
                                    llm = model,
                                    embed_model = embed_model,
                                    chunk_size=512,
                                    callback_manager=callback_manager)

In [6]:
## SQL database 
from sqlalchemy import (
    create_engine,
    MetaData,
    Table,
    Column,
    String,
    Integer,
    select,
    column,
)

In [7]:
engine = create_engine("sqlite:///:memory:", future=True)
metadata_obj = MetaData()

In [8]:
# create city SQL table
table_name = "city_stats"
city_stats_table = Table(
    table_name,
    metadata_obj,
    Column("city_name", String(16), primary_key=True),
    Column("population", Integer),
    Column("country", String(16), nullable=False),
)

metadata_obj.create_all(engine)

In [9]:
# print tables
metadata_obj.tables.keys()

dict_keys(['city_stats'])

In [10]:
from sqlalchemy import insert

rows = [
    {"city_name": "Toronto", "population": 2930000, "country": "Canada"},
    {"city_name": "Tokyo", "population": 13960000, "country": "Japan"},
    {"city_name": "Berlin", "population": 3645000, "country": "Germany"},
]
for row in rows:
    stmt = insert(city_stats_table).values(**row)
    with engine.begin() as connection:
        cursor = connection.execute(stmt)

In [11]:
with engine.connect() as connection:
    cursor = connection.exec_driver_sql("SELECT * FROM city_stats")
    print(cursor.fetchall())

[('Toronto', 2930000, 'Canada'), ('Tokyo', 13960000, 'Japan'), ('Berlin', 3645000, 'Germany')]


In [12]:
cities = ["Toronto", "Berlin", "Tokyo"]
wiki_docs = WikipediaReader().load_data(pages=cities)

In [13]:
len(wiki_docs)

3

In [14]:
vector_indices = {}

for city, wiki_doc in zip(cities, wiki_docs):
    vector_index = VectorStoreIndex.from_documents([wiki_doc], service_context= service_context)
    query_engine = vector_index.as_query_engine(similarity_top_k=1)
    vector_indices[city] = vector_index

**********
Trace: index_construction
    |_node_parsing ->  0.258769 seconds
      |_chunking ->  0.235069 seconds
    |_embedding ->  3.361789 seconds
    |_embedding ->  1.808109 seconds
    |_embedding ->  1.205807 seconds
    |_embedding ->  1.144962 seconds
    |_embedding ->  1.327186 seconds
    |_embedding ->  0.511395 seconds
**********
**********
Trace: index_construction
    |_node_parsing ->  0.198188 seconds
      |_chunking ->  0.198188 seconds
    |_embedding ->  1.523649 seconds
    |_embedding ->  1.182593 seconds
    |_embedding ->  1.881586 seconds
    |_embedding ->  1.813584 seconds
    |_embedding ->  1.03271 seconds
**********
**********
Trace: index_construction
    |_node_parsing ->  0.108496 seconds
      |_chunking ->  0.107497 seconds
    |_embedding ->  1.168175 seconds
    |_embedding ->  1.219408 seconds
    |_embedding ->  1.265574 seconds
    |_embedding ->  0.554646 seconds
**********


In [16]:
vector_index.as_retriever().retrieve("arts and culture")

**********
Trace: query
    |_retrieve ->  0.729894 seconds
      |_embedding ->  0.714111 seconds
**********


[NodeWithScore(node=TextNode(id_='87603540-6fb7-419f-91b1-08b387dc0d75', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='d41f08a9-6135-433f-b005-5a83a1ba344f', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='01ed7006f770776f8a2f6eadc310be74c60a37a49b9a419c1fe33677eda49d87'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='b41ea881-d5ee-4ab0-8d35-b6334fcd6d20', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='e584863f47529968d0d314a53a0d14c40e689b66d645fadcad5790aa4a640529'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='bae30b73-2b6e-4246-a7ab-d3394139e252', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='4408a75433cd560cbc7b45a54621d80fa983b36b1d188605c9330559c6019a10')}, hash='03cf9cb3b8f74d7cb7c35461358e083c2c67295b30a730ac61b124dee521d80a', text="== Culture ==\nTokyo has many museums. In Ueno Park, there is the Tokyo National Mus

In [42]:
vector_indices

{'Toronto': <llama_index.indices.vector_store.base.VectorStoreIndex at 0x1ce9987b910>,
 'Berlin': <llama_index.indices.vector_store.base.VectorStoreIndex at 0x1ce99906790>,
 'Tokyo': <llama_index.indices.vector_store.base.VectorStoreIndex at 0x1ce9a48ee50>}

In [43]:
# define index nodes
retriever_dict = {}
summaries = [
    (
        "This node provides information about the city Toronto/toronto"
    ),
    (
        "This node provides information about the city Berlin/berlin"
    ),
    (
        "This node provides information about the city Tokyo/tokyo"
    ),
]

city_nodes = [
    IndexNode(text=summary, index_id=f"{idx}")
    for idx, summary in zip(cities, summaries)
]

for city in cities:
    retriever_dict[city] = vector_indices[city].as_retriever()


In [44]:
# construct top-level vector index + query engine
vector_index = VectorStoreIndex(city_nodes, service_context = service_context)
vector_retriever = vector_index.as_retriever(similarity_top_k=2)

**********
Trace: index_construction
    |_embedding ->  0.933471 seconds
**********


In [45]:
vector_retriever.retrieve("tell me about  arts and culture of toronto")

**********
Trace: query
    |_retrieve ->  0.613113 seconds
      |_embedding ->  0.611118 seconds
**********


[NodeWithScore(node=IndexNode(id_='a33c439e-51d8-42e9-b13a-c81afbd10bfa', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='a54567ce6633c4cca6895756c737b2706663380a95d82be0f24ad6c489bb9617', text='This node provides information about the city Toronto/toronto', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n', index_id='Toronto'), score=0.7260854508592473),
 NodeWithScore(node=IndexNode(id_='e24c0e78-800a-4ff6-924c-893b76ba7660', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='570a2a80593ebd9a966b54b92c469fe2dcec855ac3278d2c765ca1a8bfbbd735', text='This node provides information about the city Tokyo/tokyo', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n', index_id

In [46]:
from llama_index.retrievers import RecursiveRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.response_synthesizers import get_response_synthesizer

recursive_retriever = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": vector_retriever, **retriever_dict},
    verbose=True,
)

In [47]:
res = recursive_retriever.retrieve("tell me about  arts and culture of berlin")

[1;3;34mRetrieving with query id None: tell me about  arts and culture of berlin
[0m[1;3;38;5;200mRetrieved node with id, entering: Berlin
[0m[1;3;34mRetrieving with query id Berlin: tell me about  arts and culture of berlin
[0m[1;3;38;5;200mRetrieving text node: Partygoers in Germany often toast the New Year with a glass of sparkling wine.


=== Performing arts ===

Berlin is home to 44 theaters and stages. The Deutsches Theater in Mitte was built in 1849–50 and has operated almost continuously since then. The Volksbühne at Rosa-Luxemburg-Platz was built in 1913–14, though the company had been founded in 1890. The Berliner Ensemble, famous for performing the works of Bertolt Brecht, was established in 1949. The Schaubühne was founded in 1962 and moved to the building of the former Universum Cinema on Kurfürstendamm in 1981. With a seating capacity of 1,895 and a stage floor of 2,854 square meters (30,720 sq ft), the Friedrichstadt-Palast in Berlin Mitte is the largest show pala

In [48]:
from llama_index.query_engine import RetrieverQueryEngine

In [49]:
query_engine = RetrieverQueryEngine.from_args(recursive_retriever, service_context = service_context)

In [50]:
response = query_engine.query("tell me about  arts and culture of berlin")

[1;3;34mRetrieving with query id None: tell me about  arts and culture of berlin
[0m[1;3;38;5;200mRetrieved node with id, entering: Berlin
[0m[1;3;34mRetrieving with query id Berlin: tell me about  arts and culture of berlin
[0m[1;3;38;5;200mRetrieving text node: Partygoers in Germany often toast the New Year with a glass of sparkling wine.


=== Performing arts ===

Berlin is home to 44 theaters and stages. The Deutsches Theater in Mitte was built in 1849–50 and has operated almost continuously since then. The Volksbühne at Rosa-Luxemburg-Platz was built in 1913–14, though the company had been founded in 1890. The Berliner Ensemble, famous for performing the works of Bertolt Brecht, was established in 1949. The Schaubühne was founded in 1962 and moved to the building of the former Universum Cinema on Kurfürstendamm in 1981. With a seating capacity of 1,895 and a stage floor of 2,854 square meters (30,720 sq ft), the Friedrichstadt-Palast in Berlin Mitte is the largest show pala

[0m** Prompt: **
Context information is below.
---------------------
Partygoers in Germany often toast the New Year with a glass of sparkling wine.


=== Performing arts ===

Berlin is home to 44 theaters and stages. The Deutsches Theater in Mitte was built in 1849–50 and has operated almost continuously since then. The Volksbühne at Rosa-Luxemburg-Platz was built in 1913–14, though the company had been founded in 1890. The Berliner Ensemble, famous for performing the works of Bertolt Brecht, was established in 1949. The Schaubühne was founded in 1962 and moved to the building of the former Universum Cinema on Kurfürstendamm in 1981. With a seating capacity of 1,895 and a stage floor of 2,854 square meters (30,720 sq ft), the Friedrichstadt-Palast in Berlin Mitte is the largest show palace in Europe. For Berlin's independent dance and theatre scene, venues such as the Sophiensäle in Mitte and the three houses of the Hebbel am Ufer (HAU) in Kreuzberg are important. Most productions the

Trace: query
    |_query ->  11.379221 seconds
      |_synthesize ->  10.765511 seconds
        |_templating ->  0.0 seconds
        |_llm ->  10.74843 seconds
**********


In [1]:
from llama_index.llms import HuggingFaceLLM

In [None]:

import torch

model_id = 'Rijgersberg/GEITje-7B-chat-v2'

llm = HuggingFaceLLM(
    context_window=8000,
    max_new_tokens=500,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=model_id,
    model_name=model_id,
    device_map="cuda",
    # uncomment this if using CUDA to reduce memory usage
    model_kwargs={"torch_dtype": torch.float16}
)