In [None]:
import os
import logging
import json
from datetime import datetime
from pprint import pprint

# Import the verified_query module
from verified_query import (
    VerifiedQuery,
    Question,
    get_verified_query,
    get_verified_queries_by_vector_search,
    get_best_query,
    get_query_recommendations,
    get_follow_up_queries
)
import config

# For database connection
from sqlalchemy import create_engine, text
from sqlalchemy.orm import Session

# For LLM service
from llm_service import LLMService

# Set up logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Configure database connections
engine = create_engine(config.APPLICATION_DB_CONNECTION_STRING)
insurance_db_engine = create_engine(config.BUSINESS_DB_CONNECTION_STRING)

# Initialize LLM service
llm_service = LLMService()

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
with Session(engine) as db:
    # First get a query to work with
    query_id = input("Enter a query ID to get recommendations for (or press Enter for default): ")
    if not query_id:
        query_id = "vq_top_agencies_premium"
    
    vq = get_verified_query(query_id, db)
    if not vq:
        print(f"No query found with ID: {query_id}")
    else:
    
        print(f"\nSelected query: {vq.name}")
        print(f"SQL:\n{vq.sql}")


Selected query: Top Performing Agencies by Premium
SQL:
SELECT 
  a.agency_id,
  a.agency_name,
  a.region,
  a.tier,
  COUNT(p.policy_id) AS policy_count,
  SUM(p.premium_amount) AS total_premium,
  AVG(p.premium_amount) AS avg_premium
FROM 
  agencies a
  JOIN agents ag ON a.agency_id = ag.agency_id
  JOIN policies p ON ag.agent_id = p.agent_id
WHERE 
  p.status = 'active' -- active, lapsed, cancelled
GROUP BY 
  a.agency_id, a.agency_name, a.region, a.tier
ORDER BY 
  total_premium DESC
LIMIT 10;


In [None]:
# Create context with user question
user_question = input("\nEnter user question for tailoring recommendations: ")
if not user_question:
    user_question = "Which agencies in my region generated the most premium in current quarter?"
    print(f"Using default question: '{user_question}'")

Using default question: 'Which agencies in my region generated the most premium in current quarter?'


In [None]:
# Add some sample context
context = {
    "user_question": user_question,
    "calendar_context": "Current quarter: 2025 Q2, Previous quarter: 2025 Q1",
    "user_profile": "Region: Northeast, LOB: Personal Insurance",
    "session_context": "Previous question was about overall performance"
}

In [None]:
print("\nGetting recommendations from LLM...")
recommendations = get_query_recommendations(vq, context, llm_service)

print("\nRecommendations:")
print(f"Modifications needed: {recommendations.get('modifications_needed', False)}")
print("\nExplanation:", recommendations.get('explanation', 'No explanation provided'))

if recommendations.get('modifications'):
    print("\nRecommended modifications:")
    for i, mod in enumerate(recommendations['modifications']):
        print(f"  {i+1}. Type: {mod.get('type')}")
        print(f"     Description: {mod.get('description')}")
        print(f"     SQL Impact: {mod.get('sql_impact')}")

2025-04-30 13:55:49,516 - verified_query - DEBUG - User prompt for LLM: SQL:
SELECT 
  a.agency_id,
  a.agency_name,
  a.region,
  a.tier,
  COUNT(p.policy_id) AS policy_count,
  SUM(p.premium_amount) AS total_premium,
  AVG(p.premium_amount) AS avg_premium
FROM 
  agencies a
  JOIN agents ag ON a.agency_id = ag.agency_id
  JOIN policies p ON ag.agent_id = p.agent_id
WHERE 
  p.status = 'active' -- active, lapsed, cancelled
GROUP BY 
  a.agency_id, a.agency_name, a.region, a.tier
ORDER BY 
  total_premium DESC
LIMIT 10;

Explanation:
This query calculates the total premium amount generated by each agency.  It joins the agencies, agents, and policies tables to aggregate premium amounts for active policies, then sorts them by total premium in descending order and limits result to 10 records.


This SQL query is designed to answer the following questions:
[
  "Which agencies generate the most premium?",
  "What are our top agencies by premium revenue?",
  "Which agencies have the highest 


Getting recommendations from LLM...
SQL:
SELECT 
  a.agency_id,
  a.agency_name,
  a.region,
  a.tier,
  COUNT(p.policy_id) AS policy_count,
  SUM(p.premium_amount) AS total_premium,
  AVG(p.premium_amount) AS avg_premium
FROM 
  agencies a
  JOIN agents ag ON a.agency_id = ag.agency_id
  JOIN policies p ON ag.agent_id = p.agent_id
WHERE 
  p.status = 'active' -- active, lapsed, cancelled
GROUP BY 
  a.agency_id, a.agency_name, a.region, a.tier
ORDER BY 
  total_premium DESC
LIMIT 10;

Explanation:
This query calculates the total premium amount generated by each agency.  It joins the agencies, agents, and policies tables to aggregate premium amounts for active policies, then sorts them by total premium in descending order and limits result to 10 records.


This SQL query is designed to answer the following questions:
[
  "Which agencies generate the most premium?",
  "What are our top agencies by premium revenue?",
  "Which agencies have the highest premium amounts?",
  "Show me the a

2025-04-30 13:55:54,216 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Wed, 30 Apr 2025 18:55:54 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'anthropic-ratelimit-input-tokens-limit', b'50000'), (b'anthropic-ratelimit-input-tokens-remaining', b'50000'), (b'anthropic-ratelimit-input-tokens-reset', b'2025-04-30T18:55:50Z'), (b'anthropic-ratelimit-output-tokens-limit', b'10000'), (b'anthropic-ratelimit-output-tokens-remaining', b'10000'), (b'anthropic-ratelimit-output-tokens-reset', b'2025-04-30T18:55:54Z'), (b'anthropic-ratelimit-requests-limit', b'50'), (b'anthropic-ratelimit-requests-remaining', b'49'), (b'anthropic-ratelimit-requests-reset', b'2025-04-30T18:55:51Z'), (b'anthropic-ratelimit-tokens-limit', b'60000'), (b'anthropic-ratelimit-tokens-remaining', b'60000'), (b'anthropic-ratelimit-tokens-reset', b'2025-04-30T18:55:50Z'), (b'request-id', b'


Recommendations:
Modifications needed: True

Explanation: The original SQL query is designed to provide a general overview of agency performance across all regions and tiers. However, the user's specific question is focused on the top agencies in the Northeast region during the current quarter (2025 Q2). To tailor the query to this specific need, we need to add filters for region and policy start date, remove unnecessary columns, and keep the sorting and limiting as in the original query.

Recommended modifications:
  1. Type: filter
     Description: Filter policies to only include those with 'active' status
     SQL Impact: Modify the WHERE clause to filter policies with status = 'active'
  2. Type: filter
     Description: Filter agencies to only include those in the Northeast region
     SQL Impact: Add a WHERE clause condition to filter agencies with region = 'Northeast'
  3. Type: filter
     Description: Filter policies to only include those from the current quarter (2025 Q2)
 

In [None]:
system_prompt = """You are an expert SQL developer for PostgreSQL. Your task is to analyze and modify SQL based on specific requirements. Your response will be a valid SQL query."""

# User prompt with original SQL and modifications
user_prompt = f"""Original SQL:
        {vq.sql}

        Modification instructions:
        {recommendations['modifications']}

        Column Alias Guidelines:
        - Change only if necessary
        - Match the verb from user's question
        - Keep prefixes if present
        - Maintain quote style and capitalization

        Return only the modified SQL query. Do NOT include any other text or explanations.
        """

try:
    logger.info("Adjusting SQL query")
    
    # Get modified SQL from LLM
    modified_sql = llm_service.generate_text(
        prompt=user_prompt,
        system_prompt=system_prompt,
        temperature=0
    )
    
    modsql = modified_sql.strip()

    print("\nModified SQL:")
    print(modsql)

except Exception as e:
    print("Error generating modified SQL:", e)

2025-04-30 13:56:45,413 - __main__ - INFO - Adjusting SQL query
2025-04-30 13:56:45,414 - anthropic._base_client - DEBUG - Request options: {'method': 'post', 'url': '/v1/messages', 'timeout': Timeout(connect=5.0, read=600, write=600, pool=600), 'files': None, 'idempotency_key': 'stainless-python-retry-c4b1aaf6-ad5b-46b6-8e3d-22ba6ca388e0', 'json_data': {'max_tokens': 2000, 'messages': [{'role': 'user', 'content': 'Original SQL:\n        SELECT \n  a.agency_id,\n  a.agency_name,\n  a.region,\n  a.tier,\n  COUNT(p.policy_id) AS policy_count,\n  SUM(p.premium_amount) AS total_premium,\n  AVG(p.premium_amount) AS avg_premium\nFROM \n  agencies a\n  JOIN agents ag ON a.agency_id = ag.agency_id\n  JOIN policies p ON ag.agent_id = p.agent_id\nWHERE \n  p.status = \'active\' -- active, lapsed, cancelled\nGROUP BY \n  a.agency_id, a.agency_name, a.region, a.tier\nORDER BY \n  total_premium DESC\nLIMIT 10;\n\n        Modification instructions:\n        [{\'type\': \'filter\', \'description\': "


Modified SQL:
SELECT 
  a.agency_id,
  a.agency_name,
  COUNT(p.policy_id) AS policy_count,
  SUM(p.premium_amount) AS total_premium,
  AVG(p.premium_amount) AS avg_premium
FROM 
  agencies a
  JOIN agents ag ON a.agency_id = ag.agency_id
  JOIN policies p ON ag.agent_id = p.agent_id
WHERE 
  p.status = 'active'
  AND a.region = 'Northeast'
  AND p.start_date BETWEEN '2025-04-01' AND '2025-06-30'
GROUP BY 
  a.agency_id, a.agency_name
ORDER BY 
  total_premium DESC
LIMIT 10;


In [None]:
# Execute modified SQL 
with Session(insurance_db_engine) as db:
        # Run the query 
        try:
            result = db.execute(text(modsql))
            rows = result.fetchall()
            columns = result.keys()
            data = [dict(zip(columns, r)) for r in rows]
            # Convert to DataFrame for better visualization
            df_data = pd.DataFrame(data)
        except Exception as e:
            print("Error executing query:", e)     
df_data           

Unnamed: 0,agency_id,agency_name,policy_count,total_premium,avg_premium
0,7,Capital Benefits Agency,1,1500.0,1500.0
