In [2]:
from llama_index import SimpleDirectoryReader, WikipediaReader
from IPython.display import Markdown, display
from langchain import HuggingFaceHub

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "hhh"

In [5]:
wiki_docs = WikipediaReader().load_data(pages=['Toronto', 'Berlin', 'Tokyo'])

# Create db schema

In [6]:
from sqlalchemy import create_engine, MetaData, Table, Column, String, Integer, select, column

In [7]:
engine = create_engine("sqlite:///:memory:")
metadata_obj = MetaData()

In [8]:
# create city SQL table
table_name = "city_stats"
city_stats_table = Table(
    table_name,
    metadata_obj,
    Column("city_name", String(16), primary_key=True),
    Column("population", Integer),
    Column("country", String(16), nullable=False),
)
metadata_obj.create_all(engine)

In [10]:
llm_hf=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.2, "max_length":512})

In [11]:
from llama_index import SQLDatabase, ServiceContext, GPTSQLStructStoreIndex
from llama_index import LLMPredictor

In [12]:
llm_predictor = LLMPredictor(llm=llm_hf)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)

ImportError: `transformers` package not found, please run `pip install transformers`

In [18]:
sql_database = SQLDatabase(engine, include_tables=["city_stats"])

In [19]:
sql_database.table_info

'\nCREATE TABLE city_stats (\n\tcity_name VARCHAR(16) NOT NULL, \n\tpopulation INTEGER, \n\tcountry VARCHAR(16) NOT NULL, \n\tPRIMARY KEY (city_name)\n)\n\n/*\n3 rows from city_stats table:\ncity_name\tpopulation\tcountry\n\n*/'

In [21]:
# NOTE: the table_name specified here is the table that you
# want to extract into from unstructured documents.
# index = GPTSQLStructStoreIndex.from_documents(
#     wiki_docs, 
#     sql_database=sql_database, 
#     table_name="city_stats",
#     service_context=service_context
# )

index = GPTSQLStructStoreIndex(
    [],
    sql_database=sql_database, 
    table_name="city_stats",
)

ValidationError: 1 validation error for OpenAI
__root__
  Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass  `openai_api_key` as a named parameter. (type=value_error)

In [None]:
# view current table
stmt = select(
    city_stats_table.c["city_name", "population", "country"]
).select_from(city_stats_table)

with engine.connect() as connection:
    results = connection.execute(stmt).fetchall()
    print(results)

In [None]:
query_engine = index.as_query_engine(
    query_mode="sql"
)
response = query_engine.query("SELECT city_name from city_stats")

In [None]:
display(Markdown(f"<b>{response}</b>"))

In [None]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine(
    query_mode="nl"
)
response = query_engine.query("Which city has the lowest population?")

In [None]:
display(Markdown(f"<b>{response}</b>"))

In [None]:
# you can also fetch the raw result from SQLAlchemy! 
response.extra_info["result"]