SETUP

In [1]:
import pandas as pd
import sqlite3
import os
import warnings
warnings.filterwarnings('ignore')

print("EXPORTING QUERY RESULTS FOR DASHBOARDS")

os.makedirs('../outputs/dashboard_data', exist_ok=True)

conn = sqlite3.connect('../data/processed/ecommerce.db')

print("\nDatabase connected")
print("Export directory created: ../ouputs/dashboard_data/")

EXPORTING QUERY RESULTS FOR DASHBOARDS

Database connected
Export directory created: ../ouputs/dashboard_data/


In [15]:
# Export 1 - Monthly Revenue Trend
query = """
SELECT 
    order_year,
    order_month,
    COUNT(DISTINCT o.order_id) as total_orders,
    COUNT(DISTINCT o.customer_id) as unique_customers,
    ROUND(SUM(p.total_payment_value), 2) as total_revenue,
    ROUND(AVG(p.total_payment_value), 2) as avg_order_value
FROM orders o
JOIN order_payments p ON o.order_id = p.order_id
GROUP BY order_year, order_month
ORDER BY order_year, order_month
"""

monthly_revenue = pd.read_sql_query(query, conn)
monthly_revenue.to_csv('../outputs/dashboard_data/monthly_revenue.csv', index=False)

print("Exported: monthly_revenue.csv")
print(f"   Rows: {len(monthly_revenue)}")

Exported: monthly_revenue.csv
   Rows: 22


In [16]:
# Export 3 - Category Performance
query = """
SELECT 
    pr.product_category_name_english as category,
    COUNT(DISTINCT oi.order_id) as total_orders,
    COUNT(oi.product_id) as items_sold,
    ROUND(SUM(oi.price), 2) as total_revenue,
    ROUND(AVG(oi.price), 2) as avg_item_price,
    ROUND(SUM(oi.freight_value), 2) as total_freight,
    ROUND(AVG(oi.freight_pct_of_price), 2) as avg_freight_pct
FROM order_items oi
JOIN products pr ON oi.product_id = pr.product_id
GROUP BY pr.product_category_name_english
ORDER BY total_revenue DESC
"""

category_performance = pd.read_sql_query(query, conn)
category_performance.to_csv('../outputs/dashboard_data/category_performance.csv', index=False)

print("Exported: category_performance.csv")
print(f"   Rows: {len(category_performance)}")

Exported: category_performance.csv
   Rows: 72


In [18]:
#  Export 4 - Delivery Performance by State
query = """
SELECT
    c.customer_state,
    COUNT(o.order_id) as total_orders,
    ROUND(AVG(o.delivery_time_days), 1) as avg_delivery_days,
    ROUND(AVG(o.delivery_delay_days), 1) as avg_delay_days,
    ROUND(AVG(CAST(o.on_time_delivery AS FLOAT)) * 100, 1) as on_time_pct
FROM orders o
JOIN customers c ON o.customer_id = c.customer_id
WHERE o.delivery_time_days IS NOT NULL
GROUP BY c.customer_state
ORDER BY avg_delivery_days DESC
"""

delivery_performance = pd.read_sql_query(query, conn)
delivery_performance.to_csv('../outputs/dashboard_data/delivery_performance.csv', index=False)

print("Exported: delivery_performance.csv")
print(f"    Rows: {len(delivery_performance)}")

Exported: delivery_performance.csv
    Rows: 27


In [22]:
# Export 5 - Customer Segments
query = """
SELECT
    CASE
        WHEN order_count = 1 THEN 'One-time'
        WHEN order_count BETWEEN 2 AND 3 THEN 'Repeat'
        ELSE 'Loyal'
    END as customer_segment,
    COUNT(*) as customer_count,
    ROUND(AVG(lifetime_value), 2) as avg_lifetime_value,
    ROUND(SUM(lifetime_value), 2) as total_segment_revenue
FROM (
    SELECT
        c.customer_unique_id,
        COUNT(DISTINCT o.order_id) as order_count,
        SUM(p.total_payment_value) as lifetime_value
    FROM customers c
    JOIN orders o ON c.customer_id = o.customer_id
    JOIN order_payments p ON o.order_id = p.order_id
    GROUP BY c.customer_unique_id
) customer_stats
GROUP BY customer_segment
ORDER BY avg_lifetime_value DESC
"""

customer_segments = pd.read_sql_query(query, conn)
customer_segments.to_csv('../outputs/dashboard_data/customer_segments.csv')

print("Exported: customer_segments.csv")
print(f"Rows: {len(customer_segments)}")

Exported: customer_segments.csv
Rows: 3
