In [1]:
import setup

setup.init_django()

In [2]:
from rag import (
    db as rag_db, 
    engines as rag_engines,
    settings as rag_settings, 
    updaters as rag_updaters,
    patches as rag_patches,
)

In [3]:
from typing import Optional, Union
from sqlalchemy import create_engine, text

In [4]:
rag_settings.init()
rag_db.init_vector_db()
rag_updaters.update_llama_index_documents(use_saved_embeddings=True)

In [5]:
vector_index = rag_engines.get_semantic_query_index()
semantic_query_retriever = rag_engines.get_semantic_query_retriever_engine()
sql_query_engine = rag_engines.get_sql_query_engine()

In [6]:
print(rag_settings.VECTOR_DB_NAME, rag_settings.VECTOR_DB_TABLE_NAME)

vector_db blogpost


In [7]:
from llama_index.core.tools import QueryEngineTool

vector_tool = QueryEngineTool.from_defaults(
    query_engine=semantic_query_retriever,
    description=(
        f"Useful for answering semantic questions about different blog posts"
    ),
)

In [8]:
sql_tool = QueryEngineTool.from_defaults(
    query_engine=sql_query_engine,
    description=(
        "Useful for translating a natural language query into a SQL query over"
        " a table containing: blog posts and page views each blog post"
    ),
)

In [11]:
query_engine = rag_patches.MySQLAutoVectorQueryEngine(
    sql_tool, 
    vector_tool,
)

In [12]:
response = query_engine.query(
    "What kind of org is discussed?"
)

[1;3;34mQuerying other query engine: The question 'What kind of org is discussed?' is a semantic question about the content of blog posts. Therefore, choice (2) is the most relevant as it is useful for answering semantic questions about different blog posts.
[0m

In [13]:
response.response

'The discussion contrasts two types of entities: an organization and an organism. An organization is structured, with systems, charts, and approval processes, while an organism is dynamic, constantly changing, and adapting to its environment. The text suggests that engaging with a culture as part of an organism can lead to better understanding and resilience compared to a traditional organization.'

In [14]:
response = query_engine.query(
    "Are are the top 5 most viewed blog posts? What keywords do their content have?"
)

[1;3;34mQuerying SQL database: The question requires translating a natural language query into a SQL query to retrieve the top 5 most viewed blog posts and analyze their content for keywords. Choice (1) is relevant as it deals with translating queries into SQL over a table containing blog posts and page views.
[0m[1;3;33mSQL query: SELECT 
    blog_blogpost.id, 
    blog_blogpost.title, 
    blog_blogpost.content, 
    COUNT(analytics_pageview.id) AS view_count
FROM 
    blog_blogpost
JOIN 
    analytics_pageview ON blog_blogpost.id = analytics_pageview.post_id
GROUP BY 
    blog_blogpost.id, blog_blogpost.title, blog_blogpost.content
ORDER BY 
    view_count DESC
LIMIT 5;
[0m[1;3;33mSQL response: The top 5 most viewed blog posts and their content keywords are as follows:

1. **Title:** Taking it very seriously
   - **View Count:** 2493
   - **Content Keywords:** April first, greeting, New Year’s, internet, smile, pretend, apocalypse

2. **Title:** “But we were comfortable”
   - *

In [15]:
from IPython.display import Markdown, display

display(Markdown(response.response))

The top 5 most viewed blog posts and their content keywords are as follows:

1. **Title:** Taking it very seriously
   - **View Count:** 2493
   - **Content Keywords:** April first, greeting, New Year’s, internet, smile, pretend, apocalypse

2. **Title:** “But we were comfortable”
   - **View Count:** 2490
   - **Content Keywords:** shift to digital, unwanted, risk, lonely, powerful, efficient, comfortable, follow, stay

3. **Title:** All models are wrong, some models are useful
   - **View Count:** 2471
   - **Content Keywords:** model, map, territory, approximation, problem, organization, opportunity, simplified version

4. **Title:** The Fremen principle
   - **View Count:** 2391
   - **Content Keywords:** new resources, limited resources, population, alternatives, Harvard, distance learning, in-person lectures, tenure, accreditation, waiting list

5. **Title:** Portfolio school: Get better clients
   - **View Count:** 2383
   - **Content Keywords:** tragedy, health, economy, panic, focus, overwhelmed, health care workers, burning the candle

These blog posts cover a range of topics from digital transformation and resource management to health and economic challenges, each resonating with a significant number of readers.

In [20]:
response = query_engine.query(
    "What are the top 5 least viewed blog posts from today?"
)
print(response.response)

[1;3;34mQuerying SQL database: The question requires translating a natural language query into a SQL query to retrieve the top 5 least viewed blog posts from today. Choice (1) is relevant as it is useful for translating natural language queries into SQL queries over a table containing blog posts and page views.
[0m[1;3;33mSQL query: SELECT bp.id, bp.title, COUNT(pv.id) AS view_count
FROM blog_blogpost bp
LEFT JOIN analytics_pageview pv ON bp.id = pv.post_id
WHERE pv.timestamp::date = CURRENT_DATE
GROUP BY bp.id, bp.title
ORDER BY view_count ASC
LIMIT 5;
[0m[1;3;33mSQL response: Based on today's data, here are the top 5 least viewed blog posts:

1. **Monopoly and network effects** - 525 views
2. **A Sunday book reading** - 558 views
3. **Helping leaders in college reboot** - 624 views
4. **Is everything going to be okay?** - 648 views
5. **You’re surrounded** - 654 views

These posts have received the fewest views today.
[0m[1;3;34mTransformed query given SQL response: None
[0mB

In [22]:
display(Markdown(response.response))

Based on today's data, here are the top 5 least viewed blog posts:

1. **Monopoly and network effects** - 525 views
2. **A Sunday book reading** - 558 views
3. **Helping leaders in college reboot** - 624 views
4. **Is everything going to be okay?** - 648 views
5. **You’re surrounded** - 654 views

These posts have received the fewest views today.