In [None]:
import os



In [1]:

# Connecting to SQL
from llama_index.core import VectorStoreIndex, SQLDatabase, ServiceContext
#from llama_index.core.tools import QueryEngineTool
#from llama_index.core.query_engine import RouterQueryEngine
#from llama_index.core.selectors import LLMSingleSelector
from llama_index.llms import openai
from sqlalchemy import insert, text, create_engine, MetaData
from llama_index.core.query_engine import NLSQLTableQueryEngine
from llama_index.core.query_engine import SQLTableRetrieverQueryEngine
from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex, SQLTableSchema
# Setting local llm amd lLocal embeddings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from llama_index.core.embeddings import resolve_embed_model
from llama_index.llms.ollama import Ollama
Settings.llm = Ollama(model="llama2", request_timeout=30.0)
Settings.embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")

In [2]:
from sqlalchemy import (
    create_engine,
    MetaData,
    Table,
    Column,
    String,
    Integer,
    select,
    column,
)

In [3]:
import sqlalchemy

In [4]:
# DB conncection information
server = "alteryxwkr.ndc.nasa.gov"
port = "1433"
db = "OCHCO"
driver = "ODBC+Driver+18+for+SQL+Server"
schema = "pdw_dev_trusted"
#engine = sqlalchemy.create_engine(f"mssql://{server}/{db}?driver={driver}&port={port}&trusted_connection=yes")
#db_uri = f"mssql://{server}/{db}?driver={driver}&port={port}&trusted_connection=yes"

db_uri = f"mssql://{server}/{db}?driver={driver}&port={port}&trusted_connection=yes&options={{'schema':{schema}}}"

In [5]:
#Create the engine
db_engine = create_engine(db_uri)

Now we need to create an ObjectIndex object that allows users to use our Index data structures over arbitrary objects. 

In [6]:
# load all table definitions
metadata_obj = MetaData()
metadata_obj.reflect(db_engine)

sql_database = SQLDatabase(db_engine)

table_node_mapping = SQLTableNodeMapping(sql_database)

table_schema_objs = []
for table_name in metadata_obj.tables.keys():
    table_schema_objs.append(SQLTableSchema(table_name=table_name))

# We dump the table schema information into a vector index. The vector index is stored within the context builder for future use.
obj_index = ObjectIndex.from_objects(
    table_schema_objs,
    table_node_mapping,
    VectorStoreIndex,
)

 Create the query engine
Now let us create our query engine. A query engine is a generic interface that allows you to ask questions over your data. We need to connect both our database and LLM to the engine:

In [7]:
# We construct a SQLTableRetrieverQueryEngine. 
# Note that we pass in the ObjectRetriever so that we can dynamically retrieve the table during query-time.
# ObjectRetriever: A retriever that retrieves a set of query engine tools.
query_engine = SQLTableRetrieverQueryEngine(
    sql_database,
    obj_index.as_retriever(similarity_top_k=1),
    #service_context=service_context,
)

 Ask a query
Finally, let us run a query against the query engine. We can also print out the metadata from the response which includes the sql query and its result:

In [8]:
response = query_engine.query("Using pdw_dev_trusted.personal_organization_data, how many employees from center HQ?")

print(response)
print(response.metadata['sql_query'])
print(response.metadata['result'])

Sorry, but I'm just an AI and do not have access to external databases or data sources. Therefore, I cannot provide a response to the query you provided as it requires access to the pdw_dev_trusted database. Additionally, the query is invalid SQL and will result in an error when run.

If you have any other questions or queries that do not require external data sources, please feel free to ask!
SELECT COUNT(*) FROM pdw_dev_trusted.personal_organization_data WHERE organization_hq = 'Center';


KeyError: 'result'

In [9]:
response = query_engine.query("Using pdw_dev_trusted.position_data, tell me how many 1301 occupational series there are, where current pay period is 202405?")

print(response)
print(response.metadata['sql_query'])
print(response.metadata['result'])

Sorry, but I'm a large language model, I cannot execute SQL queries or access external databases. However, based on the query you provided, I can tell you that there are 0 occupational series with the value "1301" in the pdw_dev_trusted.position_data table where the current pay period is '202405'.

The error message you encountered suggests that the query is invalid, likely because the table name or column names are incorrect. Can you please provide more context or clarify the query you are trying to execute?
SELECT COUNT(*) FROM pdw_dev_trusted.position_data WHERE position_data.pay_period = '202405' AND position_data.occupational_series = '1301';


KeyError: 'result'

Discussion
When using LlamaIndex, one noticeable difference from our previous LangChain solutions is that LlamaIndex uses an Index object that stores the relevant table schema information. With this object, LlamaIndex can quicky retrieve relevant context for a user query from the table schema and the resulting sql query should be more accurate than the other solutions.

Old code below here

In [None]:
from sqlalchemy import text

with db_engine.connect() as con:
    rows = con.execute(text("SELECT TOP 10 * From pdw_dev_trusted.personal_organization_data"))
    for row in rows:
        print(row)

In [None]:
metadata_obj = MetaData()
sql_db = SQLDatabase(db_engine, schema="pdw_dev_trusted")

In [None]:
#metadata_obj.tables.get(Table)
sql_db.get_single_table_info(table_name="pdw_dev_trusted")

In [None]:
query_engine = NLSQLTableQueryEngine(sql_database=sql_db, tables= "pdw_dev_trusted_demographic_data")
response = query_engine.query("SELECT * FROM pdw_dev_trusted.demographic_data")


In [None]:
with query_engine.query() as connection:
    cursor = connection.exec_driver_sql("SELECT * FROM pdw_dev_trusted.demographic_data")
    print(cursor.fetchall())

In [None]:
def get_db():
    """Get the database engine."""
    return create_engine(os.getenv("DATABASE_URL"))

In [1]:
import psycopg2
conn = psycopg2.connect(
    host='localhost',
    port=5432,
    dbname='dmeza',
    user='postgres',
    password='DR@carys46137'
)