In [1]:
#!/usr/bin/env python3
"""
Example usage of DuckDBService with LlamaIndex integration.

This example demonstrates:
1. Loading sample data into DuckDB
2. Running SQL queries directly
3. Running natural language queries with LlamaIndex integration
"""

import pandas as pd

from assortment_chatbot.services.duckdb_service import EnhancedDuckDBService

"""Example usage of EnhancedDuckDBService."""
# Create sample data
data = {
    "product": ["Laptop", "Smartphone", "Tablet", "Headphones", "Monitor"],
    "category": ["Electronics", "Electronics", "Electronics", "Audio", "Electronics"],
    "price": [1200, 800, 500, 150, 300],
    "stock": [25, 50, 35, 100, 20],
    "rating": [4.5, 4.2, 3.9, 4.7, 4.1],
}

In [2]:
df = pd.DataFrame(data)
print("Sample data created:")
print(df.head())
print("-" * 50)

Sample data created:
      product     category  price  stock  rating
0      Laptop  Electronics   1200     25     4.5
1  Smartphone  Electronics    800     50     4.2
2      Tablet  Electronics    500     35     3.9
3  Headphones        Audio    150    100     4.7
4     Monitor  Electronics    300     20     4.1
--------------------------------------------------


In [3]:
# Initialize the enhanced DuckDB service (in-memory database)
db_service = EnhancedDuckDBService()

In [4]:
# Load data into the service
success = db_service.load_dataframe(df, "products")

[32m2025-04-23 02:33:21[0m | [1mINFO    [0m | [36massortment_chatbot.services.duckdb_service[0m:[36m_sync_sqlite_from_duckdb[0m:[36m197[0m - [1mSynced table products to SQLite for LlamaIndex integration[0m
[32m2025-04-23 02:33:21[0m | [1mINFO    [0m | [36massortment_chatbot.services.duckdb_service[0m:[36minitialize[0m:[36m181[0m - [1mInitialized LlamaIndex query engines[0m


In [5]:
# Get schema information
schema_info = db_service.get_schema_info()
print("\nSchema information:")
print(f"Tables: {schema_info['tables']}")
print(f"Columns in products: {schema_info['columns']['products']}")
print("-" * 50)


Schema information:
Tables: ['products']
Columns in products: ['product', 'category', 'price', 'stock', 'rating']
--------------------------------------------------


In [6]:
# Direct SQL query
print("\nRunning direct SQL query:")
sql_query = "SELECT * FROM products WHERE price > 500 ORDER BY price DESC"
print(f"SQL: {sql_query}")
results = db_service.execute_query(sql_query)
print(results)
print("-" * 50)


Running direct SQL query:
SQL: SELECT * FROM products WHERE price > 500 ORDER BY price DESC
      product     category  price  stock  rating
0      Laptop  Electronics   1200     25     4.5
1  Smartphone  Electronics    800     50     4.2
--------------------------------------------------


In [8]:
# Natural language query using LlamaIndex integration
print("\nRunning natural language queries:")

# Simple query
nl_query = "What is the most expensive product?"
print(f"\nQuestion: {nl_query}")
result = db_service.process_query(
    query=nl_query, query_type="natural_language", complexity="simple"
)


Running natural language queries:

Question: What is the most expensive product?


In [9]:
result

{'success': False,
 'error': 'LlamaIndex query engines not initialized. Please load data first.',
 'query_type': 'natural_language'}

In [None]:
# More complex query
nl_query = "What is the average price of electronics products with rating above 4?"
print(f"\nQuestion: {nl_query}")
result = db_service.process_query(
    query=nl_query, query_type="natural_language", complexity="advanced"
)

if result["success"]:
    print(f"Answer: {result['data']}")
    print(f"Generated SQL: {result['sql_query']}")
else:
    print(f"Error: {result.get('error', 'Unknown error')}")