# Smart Query Assistant 

In [None]:
import logging
from dotenv import load_dotenv

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

load_dotenv()

from smart_assistant import SmartAssistant
from llm_service import LLMService

llm_service = LLMService()
print(f"Using LLM provider: {llm_service.provider}")

Using LLM provider: claude


In [5]:
# Initialize the SmartAssistant (no database connection for now)
assistant = SmartAssistant(
    llm_service=llm_service,
    yaml_file_path="verified_queries.yaml",
    debug_mode=True
)

# Display the loaded verified queries
print(f"Verified queries:")
for i, query in enumerate(assistant.verified_queries):
    print(f"\n{i+1}. {query.get('name')}")
    print(f"   Question: {query.get('question')}")

2025-04-28 13:54:28,474 - root - INFO - Loaded 5 verified queries from verified_queries.yaml
2025-04-28 13:54:28,475 - root - INFO - Loaded 5 verified queries


Verified queries:

1. Top Performing Agencies by Premium
   Question: Which agencies generate the most premium?

2. Distribution Channel Performance Analysis
   Question: How are our different distribution channels performing?

3. Agent Performance Report
   Question: How are our agents performing in terms of sales?

4. Claims Analysis by Policy Type
   Question: What are our loss ratios by policy type?

5. Regional Agency Performance
   Question: How are our agencies performing by region?


In [3]:
test_question = "Which agencies in the Northeast region generate the most premium?"

In [4]:
print(f"Testing question: '{test_question}'")
match_info = assistant.find_matching_query(test_question)

if match_info:
    print("\nMatch found!")
    print(f"Matched query: {match_info['verified_query'].get('name')}")
    print(f"Similarity: {match_info['similarity']}%")
    print(f"Needs modification: {match_info['modification_needed']}")
    if match_info['modification_needed']:
        print(f"Modifications: {match_info['modifications']}")
        
        # Try adjusting the SQL
        original_sql = match_info['verified_query'].get('sql', '')
        print("\nOriginal SQL:")
        print(original_sql)
        
        adjusted_sql = assistant.adjust_sql(original_sql, match_info['modifications'])
        print("\nAdjusted SQL:")
        print(adjusted_sql)
else:
    print("\nNo match found. Debugging required.")

2025-04-28 13:51:57,229 - root - INFO - Created prompt with 5 queries
2025-04-28 13:51:57,230 - root - DEBUG - Full queries text: Query 1:
Name: Top Performing Agencies by Premium
Question: Which agencies generate the most premium?
SQL: SELECT 
  a.agency_id,
  a.agency_name,
  a.region,
  a.tier,
  SUM(p.premium_amount) AS total_premium
FROM 
  agencies a
  JOIN agents ag ON a.agency_id = ag.agency_id
  JOIN policies p ON ag.agent_id = p.agent_id
WHERE 
  p.status = 'active' -- active, lapsed, cancelled
GROUP BY 
  a.agency_id, a.agency_name, a.region, a.tier
ORDER BY 
  total_premium DESC
LIMIT 10;

Explanation: This query calculates the total premium amount generated by each agency. It joins the agencies, agents, and policies tables to aggregate premium amounts for active policies, then sorts them by total premium in descending order.

Query 2:
Name: Distribution Channel Performance Analysis
Question: How are our different distribution channels performing?
SQL: SELECT 
  dc.channel_

Testing question: 'Which agencies in the Northeast region generate the most premium?'


2025-04-28 13:51:57,436 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x10fcbee90>
2025-04-28 13:51:57,437 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x10f9f7ad0> server_hostname='api.anthropic.com' timeout=5.0
2025-04-28 13:51:57,459 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1068fa3d0>
2025-04-28 13:51:57,459 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'POST']>
2025-04-28 13:51:57,460 - httpcore.http11 - DEBUG - send_request_headers.complete
2025-04-28 13:51:57,461 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'POST']>
2025-04-28 13:51:57,461 - httpcore.http11 - DEBUG - send_request_body.complete
2025-04-28 13:51:57,461 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'POST']>
2025-04-28 13:51:58,423 - httpcore.http11 


Match found!
Matched query: Regional Agency Performance
Similarity: 90%
Needs modification: True
Modifications: Change region from 'Northeast' to 'Northeast' in WHERE clause

Original SQL:
SELECT 
  a.region,
  COUNT(DISTINCT a.agency_id) AS agency_count,
  SUM(p.premium_amount) AS total_premium,
  SUM(p.premium_amount) / COUNT(DISTINCT a.agency_id) AS avg_premium_per_agency,
  COUNT(p.policy_id) AS policy_count
FROM 
  agencies a
  JOIN agents ag ON a.agency_id = ag.agency_id
  JOIN policies p ON ag.agent_id = p.agent_id
WHERE 
  p.status = 'active' -- active, lapsed, cancelled
GROUP BY 
  a.region
ORDER BY 
  total_premium DESC;



2025-04-28 13:52:00,637 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Mon, 28 Apr 2025 18:52:00 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'anthropic-ratelimit-input-tokens-limit', b'50000'), (b'anthropic-ratelimit-input-tokens-remaining', b'49000'), (b'anthropic-ratelimit-input-tokens-reset', b'2025-04-28T18:52:00Z'), (b'anthropic-ratelimit-output-tokens-limit', b'10000'), (b'anthropic-ratelimit-output-tokens-remaining', b'10000'), (b'anthropic-ratelimit-output-tokens-reset', b'2025-04-28T18:52:00Z'), (b'anthropic-ratelimit-requests-limit', b'50'), (b'anthropic-ratelimit-requests-remaining', b'48'), (b'anthropic-ratelimit-requests-reset', b'2025-04-28T18:51:59Z'), (b'anthropic-ratelimit-tokens-limit', b'60000'), (b'anthropic-ratelimit-tokens-remaining', b'59000'), (b'anthropic-ratelimit-tokens-reset', b'2025-04-28T18:52:00Z'), (b'request-id', b'


Adjusted SQL:
Here is the modified SQL query based on the instructions:

SELECT 
  a.region,
  COUNT(DISTINCT a.agency_id) AS total_agencies,
  SUM(p.premium_amount) AS total_premium,
  SUM(p.premium_amount) / COUNT(DISTINCT a.agency_id) AS avg_premium_per_agency,
  COUNT(p.policy_id) AS total_policies
FROM 
  agencies a
  JOIN agents ag ON a.agency_id = ag.agency_id
  JOIN policies p ON ag.agent_id = p.agent_id
WHERE 
  a.region = 'Northeast' -- active, lapsed, cancelled
  AND p.status IN ('active', 'lapsed', 'cancelled')
GROUP BY 
  a.region
ORDER BY 
  total_premium DESC;


In [None]:
test_question_neg = "What is the average claim amount for auto policies?"

print(f"Processing question: '{test_question_neg}'")
response = assistant.process_question(test_question_4)

print(f"\nMatch found: {response.get('match_found', False)}")
print(f"Source: {response.get('source', 'unknown')}")

if not response.get('match_found', False):
    print("\nAI-generated SQL:")
    print(response.get('sql', ''))
    
    if 'ai_generated' in response and 'query_explanation' in response['ai_generated']:
        print("\nExplanation:")
        print(response['ai_generated']['query_explanation'])

2025-04-28 13:11:09,867 - smart_assistant - INFO - Processing question: What is the average claim amount for auto policies?
2025-04-28 13:11:09,867 - INFO - Processing question: What is the average claim amount for auto policies?
2025-04-28 13:11:09,869 - smart_assistant - INFO - Created prompt with 5 queries
2025-04-28 13:11:09,869 - INFO - Created prompt with 5 queries
2025-04-28 13:11:09,869 - smart_assistant - DEBUG - Full queries text: Query 1:
Name: Top Performing Agencies by Premium
Question: Which agencies generate the most premium?
SQL: SELECT 
  a.agency_id,
  a.agency_name,
  a.region,
  a.tier,
  SUM(p.premium_amount) AS total_premium
FROM 
  agencies a
  JOIN agents ag ON a.agency_id = ag.agency_id
  JOIN policies p ON ag.agent_id = p.agent_id
WHERE 
  p.status = 'active' -- active, lapsed, cancelled
GROUP BY 
  a.agency_id, a.agency_name, a.region, a.tier
ORDER BY 
  total_premium DESC
LIMIT 10;

Explanation: This query calculates the total premium amount generated by eac

Processing question: 'What is the average claim amount for auto policies?'


2025-04-28 13:11:10,912 - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
2025-04-28 13:11:10,916 - smart_assistant - INFO - Received response from LLM service
2025-04-28 13:11:10,916 - INFO - Received response from LLM service
2025-04-28 13:11:10,918 - smart_assistant - DEBUG - Raw LLM response: {
  "match": true,
  "query_number": 4,
  "similarity": 90,
  "modification_needed": true,
  "modifications": "Change policy_type from 'auto' to 'auto' in WHERE clause"
}
2025-04-28 13:11:10,918 - DEBUG - Raw LLM response: {
  "match": true,
  "query_number": 4,
  "similarity": 90,
  "modification_needed": true,
  "modifications": "Change policy_type from 'auto' to 'auto' in WHERE clause"
}
2025-04-28 13:11:10,919 - smart_assistant - INFO - Match found! Query #4 with 90% similarity
2025-04-28 13:11:10,919 - INFO - Match found! Query #4 with 90% similarity
2025-04-28 13:11:10,919 - smart_assistant - INFO - Found matching query: Claims Analysis by Policy Type wi


Match found: True
Source: verified
