In [1]:
import setup

setup.init_django()

In [2]:
from decouple import config

In [3]:
from blog.models import BlogPost
from rag import db as rag_db, settings as rag_settings

In [4]:
from sqlalchemy import (
    create_engine,
    inspect,
)

from llama_index.core import SQLDatabase
from llama_index.core.query_engine import NLSQLTableQueryEngine
from llama_index.core.retrievers import NLSQLRetriever

In [5]:
# initialize default LlamaIndex settings
rag_settings.init()
# get pooled Neon database string from .env or env vars
vector_database_url = rag_db.get_database_url(use_pooling=True)

In [6]:
engine = create_engine(vector_database_url)

In [7]:
inspect(engine).get_table_names()

['django_migrations',
 'django_content_type',
 'auth_permission',
 'auth_group',
 'auth_group_permissions',
 'auth_user',
 'auth_user_groups',
 'auth_user_user_permissions',
 'django_admin_log',
 'django_session',
 'blog_blogpost',
 'products_embedding',
 'products_product']

In [8]:
tables = []
models = [BlogPost]
for model in models:
    table = model._meta.db_table
    tables.append(table)

In [9]:
tables

['blog_blogpost']

In [10]:
sql_database = SQLDatabase(engine, include_tables=tables)

  self._metadata.reflect(


In [11]:
sql_query_engine = NLSQLTableQueryEngine(
    sql_database=sql_database,
    tables=tables,
)

In [18]:
response = sql_query_engine.query("What is my most recent blog post id?")
response

Response(response='Your most recent blog post ID is 37.', source_nodes=[NodeWithScore(node=TextNode(id_='71c339cc-6c0a-4a8c-a872-021415e117e8', embedding=None, metadata={'sql_query': 'SELECT id FROM blog_blogpost ORDER BY timestamp DESC LIMIT 1;', 'result': [(37,)], 'col_keys': ['id']}, excluded_embed_metadata_keys=['sql_query', 'result', 'col_keys'], excluded_llm_metadata_keys=['sql_query', 'result', 'col_keys'], relationships={}, text='[(37,)]', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=None)], metadata={'71c339cc-6c0a-4a8c-a872-021415e117e8': {'sql_query': 'SELECT id FROM blog_blogpost ORDER BY timestamp DESC LIMIT 1;', 'result': [(37,)], 'col_keys': ['id']}, 'sql_query': 'SELECT id FROM blog_blogpost ORDER BY timestamp DESC LIMIT 1;', 'result': [(37,)], 'col_keys': ['id']})

In [19]:
for node in response.source_nodes:
    print(node.node.get_content())

[(37,)]


In [26]:
nl_sql_retriever = NLSQLRetriever(
    sql_database, tables=tables, return_raw=True
)

r = nl_sql_retriever.retrieve("What is my least most recent blog post?")

In [27]:
print(r)
for node in r:
    print(node)
    print(node.metadata)

[NodeWithScore(node=TextNode(id_='ff5bb486-d8c8-4973-adf1-0c38bad839ad', embedding=None, metadata={'sql_query': 'SELECT id, title, timestamp FROM blog_blogpost ORDER BY timestamp ASC LIMIT 1;', 'result': [(29, 'New Blog Post', datetime.datetime(2024, 7, 31, 18, 19, 27, 623803, tzinfo=datetime.timezone.utc))], 'col_keys': ['id', 'title', 'timestamp']}, excluded_embed_metadata_keys=['sql_query', 'result', 'col_keys'], excluded_llm_metadata_keys=['sql_query', 'result', 'col_keys'], relationships={}, text="[(29, 'New Blog Post', datetime.datetime(2024, 7, 31, 18, 19, 27, 623803, tzinfo=datetime.timezone.utc))]", mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=None)]
Node ID: ff5bb486-d8c8-4973-adf1-0c38bad839ad
Text: [(29, 'New Blog Post', datetime.datetime(2024, 7, 31, 18, 19,
27, 623803, tzinfo=datetime.timezone.utc))]
Score: None

{'sql_query': 'SELECT id, titl