In [1]:
import setup

setup.init_django()

In [2]:
from rag import (
    db as rag_db, 
    engines as rag_engines,
    settings as rag_settings, 
    updaters as rag_updaters,
    patches as rag_patches,
)

In [3]:
from typing import Optional, Union
from sqlalchemy import create_engine, text

In [4]:
rag_settings.init()
rag_db.init_vector_db()
rag_updaters.update_llama_index_documents(use_saved_embeddings=True)

In [5]:
vector_index = rag_engines.get_semantic_query_index()
semantic_query_retriever = rag_engines.get_semantic_query_retriever_engine()
sql_query_engine = rag_engines.get_sql_query_engine()

In [6]:
print(rag_settings.VECTOR_DB_NAME, rag_settings.VECTOR_DB_TABLE_NAME)

vector_db blogpost


In [7]:
from llama_index.core.tools import QueryEngineTool

vector_tool = QueryEngineTool.from_defaults(
    query_engine=semantic_query_retriever,
    description=(
        f"Useful for answering semantic questions about different blog posts"
    ),
)

In [8]:
sql_tool = QueryEngineTool.from_defaults(
    query_engine=sql_query_engine,
    description=(
        "Useful for translating a natural language query into a SQL query over"
        " a table containing: blog posts and page views each blog post"
    ),
)

In [9]:
query_engine = rag_patches.MySQLAutoVectorQueryEngine(
    sql_tool, 
    vector_tool,
)

In [15]:
response = query_engine.query(
    "What do you make?"
)

[1;3;34mQuerying other query engine: The question 'What do you make?' is a semantic question about the content or purpose of blog posts, which aligns with choice (2) that is useful for answering semantic questions about different blog posts.
[0m

In [16]:
response.response

'If your job is to make decisions, doing it more productively and with less hassle or drama could be beneficial. Improving the quality of your decisions is crucial, as it is a common goal for professionals in any field. Making better decisions can lead to more effective outcomes and greater success in your work.'

In [17]:
response = query_engine.query(
    "Are are the top 5 most viewed blog posts? What keywords do their content have?"
)

[1;3;34mQuerying SQL database: The question requires translating a natural language query into a SQL query to retrieve the top 5 most viewed blog posts and analyze their content for keywords. This aligns with the functionality described in choice 1.
[0m[1;3;33mSQL query: SELECT 
    blog_blogpost.title, 
    blog_blogpost.content, 
    COUNT(analytics_pageview.id) AS view_count
FROM 
    blog_blogpost
JOIN 
    analytics_pageview ON blog_blogpost.id = analytics_pageview.post_id
GROUP BY 
    blog_blogpost.id
ORDER BY 
    view_count DESC
LIMIT 5;
[0m[1;3;33mSQL response: Here are the top 5 most viewed blog posts along with the keywords that their content includes:

1. **Title: "Taking it very seriously"**
   - **View Count:** 2493
   - **Keywords:** April first, greeting, Happy, internet, apocalypse, pretend, smile

2. **Title: "“But we were comfortable”"**
   - **View Count:** 2490
   - **Keywords:** digital shift, unwanted, risk, lonely, powerful, efficient, comfortable, follow


In [18]:
from IPython.display import Markdown, display

display(Markdown(response.response))

Here are the top 5 most viewed blog posts along with the keywords that their content includes:

1. **Title: "Taking it very seriously"**
   - **View Count:** 2493
   - **Keywords:** April first, greeting, Happy, internet, apocalypse, pretend, smile

2. **Title: "“But we were comfortable”"**
   - **View Count:** 2490
   - **Keywords:** digital shift, unwanted, risk, lonely, powerful, efficient, comfortable, follow

3. **Title: "All models are wrong, some models are useful"**
   - **View Count:** 2471
   - **Keywords:** model, map, territory, approximation, problem, organization, opportunity, simplified

4. **Title: "The Fremen principle"**
   - **View Count:** 2391
   - **Keywords:** new resources, limited resources, population, alternatives, distance learning, in-person lectures, tenure, accreditation

5. **Title: "Portfolio school: Get better clients"**
   - **View Count:** 2383
   - **Keywords:** tragedy, health, economy, panic, focus, overwhelmed, health care workers

These blog posts cover a range of topics from digital transformation and resource management to health and economic challenges, each resonating with a significant number of readers.

In [25]:
response = query_engine.query(
    "What are the top 5 least viewed blog posts in the year 2024 to 2025?"
)
print(response.response)

[1;3;34mQuerying SQL database: The question requires translating a natural language query into a SQL query to retrieve the top 5 least viewed blog posts in the specified time frame.
[0m[1;3;33mSQL query: SELECT 
    blog_blogpost.id, 
    blog_blogpost.title, 
    COUNT(analytics_pageview.id) AS view_count
FROM 
    blog_blogpost
LEFT JOIN 
    analytics_pageview ON blog_blogpost.id = analytics_pageview.post_id
WHERE 
    analytics_pageview.timestamp BETWEEN '2024-01-01' AND '2025-12-31'
GROUP BY 
    blog_blogpost.id, blog_blogpost.title
ORDER BY 
    view_count ASC
LIMIT 5;
[0m[1;3;33mSQL response: Based on the query results, the top 5 least viewed blog posts from the year 2024 to 2025 are as follows:

1. **"Monopoly and network effects"** with 525 views.
2. **"A Sunday book reading"** with 558 views.
3. **"Helping leaders in college reboot"** with 624 views.
4. **"Is everything going to be okay?"** with 648 views.
5. **"You’re surrounded"** with 654 views.
[0m[1;3;34mTransfor

In [26]:
display(Markdown(response.response))

Based on the query results, the top 5 least viewed blog posts from the year 2024 to 2025 are as follows:

1. **"Monopoly and network effects"** with 525 views.
2. **"A Sunday book reading"** with 558 views.
3. **"Helping leaders in college reboot"** with 624 views.
4. **"Is everything going to be okay?"** with 648 views.
5. **"You’re surrounded"** with 654 views.