In [1]:
import sqlite3
import pandas as pd

# Create in-memory SQLite database
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()

# Create sales table
cursor.execute('''
CREATE TABLE sales (
    sale_id INTEGER PRIMARY KEY AUTOINCREMENT,
    product_name TEXT NOT NULL,
    category TEXT NOT NULL,
    quantity INTEGER NOT NULL,
    price REAL NOT NULL,
    sale_date TEXT NOT NULL
)
''')

# Insert sample sales data
sales_data = [
    ('Laptop Pro', 'Electronics', 2, 1200.00, '2023-01-15'),
    ('Wireless Mouse', 'Electronics', 5, 25.00, '2023-01-16'),
    ('USB-C Cable', 'Electronics', 8, 15.00, '2023-01-17'),
    ('Office Desk', 'Furniture', 1, 350.00, '2023-01-18'),
    ('Ergonomic Chair', 'Furniture', 3, 200.00, '2023-01-19'),
    ('Monitor 27"', 'Electronics', 2, 400.00, '2023-01-20'),
    ('Keyboard Mechanical', 'Electronics', 4, 120.00, '2023-01-21'),
    ('Desk Lamp', 'Furniture', 6, 50.00, '2023-01-22'),
    ('Laptop Stand', 'Electronics', 3, 60.00, '2023-01-23'),
    ('Filing Cabinet', 'Furniture', 1, 280.00, '2023-01-24')
]

cursor.executemany(
    'INSERT INTO sales (product_name, category, quantity, price, sale_date) VALUES (?, ?, ?, ?, ?)',
    sales_data
)
conn.commit()

print("✓ Sales database created successfully")
print(f"✓ {len(sales_data)} sales records inserted")

✓ Sales database created successfully
✓ 10 sales records inserted


## COUNT Function

Counts the number of rows or non-NULL values in a column.

```sql
SELECT COUNT(*) FROM sales;          -- Total rows
SELECT COUNT(DISTINCT category) FROM sales;  -- Unique categories
```

In [2]:
# Count total number of sales
result = pd.read_sql_query('SELECT COUNT(*) as total_sales FROM sales', conn)
print("Total number of sales:")
print(result)
print()

# Count unique categories
result = pd.read_sql_query(
    'SELECT COUNT(DISTINCT category) as unique_categories FROM sales',
    conn
)
print("Number of unique categories:")
print(result)

Total number of sales:
   total_sales
0           10

Number of unique categories:
   unique_categories
0                  2


## SUM and AVG Functions

Calculate total and average values.

```sql
SELECT SUM(price * quantity) as total_revenue FROM sales;
SELECT AVG(price) as average_price FROM sales;
```

In [3]:
# Calculate total revenue
result = pd.read_sql_query(
    'SELECT SUM(price * quantity) as total_revenue FROM sales',
    conn
)
print("Total Revenue:")
print(result)
print()

# Calculate average price
result = pd.read_sql_query(
    'SELECT AVG(price) as average_price FROM sales',
    conn
)
print("Average Price:")
print(result)

Total Revenue:
   total_revenue
0         5635.0

Average Price:
   average_price
0          270.0


## MIN and MAX Functions

Find minimum and maximum values in data.

```sql
SELECT MIN(price) as cheapest, MAX(price) as most_expensive FROM sales;
SELECT MIN(sale_date) as earliest_sale FROM sales;
```

In [4]:
# Find min and max prices
result = pd.read_sql_query(
    'SELECT MIN(price) as cheapest_product, MAX(price) as most_expensive FROM sales',
    conn
)
print("Price Range:")
print(result)
print()

# Find earliest and latest sale dates
result = pd.read_sql_query(
    'SELECT MIN(sale_date) as earliest_sale, MAX(sale_date) as latest_sale FROM sales',
    conn
)
print("Sales Date Range:")
print(result)

Price Range:
   cheapest_product  most_expensive
0              15.0          1200.0

Sales Date Range:
  earliest_sale latest_sale
0    2023-01-15  2023-01-24


## GROUP BY Clause

Groups rows by one or more columns and applies aggregates to each group.

```sql
SELECT category, COUNT(*) as product_count FROM sales GROUP BY category;
SELECT category, SUM(quantity * price) as category_revenue FROM sales GROUP BY category;
```

In [5]:
# Count sales by category
result = pd.read_sql_query(
    'SELECT category, COUNT(*) as product_count FROM sales GROUP BY category ORDER BY product_count DESC',
    conn
)
print("Sales Count by Category:")
print(result)
print()

# Revenue by category
result = pd.read_sql_query(
    'SELECT category, SUM(quantity * price) as category_revenue FROM sales GROUP BY category ORDER BY category_revenue DESC',
    conn
)
print("Revenue by Category:")
print(result)

Sales Count by Category:
      category  product_count
0  Electronics              6
1    Furniture              4

Revenue by Category:
      category  category_revenue
0  Electronics            4105.0
1    Furniture            1530.0


## Multiple Aggregates with GROUP BY

Calculate multiple statistics per group.

```sql
SELECT category, 
       COUNT(*) as item_count,
       AVG(price) as avg_price,
       MIN(price) as min_price,
       MAX(price) as max_price
FROM sales 
GROUP BY category;
```

In [6]:
# Comprehensive category analysis
result = pd.read_sql_query(
    '''SELECT category, 
              COUNT(*) as item_count,
              ROUND(AVG(price), 2) as avg_price,
              MIN(price) as min_price,
              MAX(price) as max_price,
              ROUND(SUM(quantity * price), 2) as total_revenue
       FROM sales 
       GROUP BY category
       ORDER BY total_revenue DESC''',
    conn
)
print("Comprehensive Category Analysis:")
print(result)

Comprehensive Category Analysis:
      category  item_count  avg_price  min_price  max_price  total_revenue
0  Electronics           6     303.33       15.0     1200.0         4105.0
1    Furniture           4     220.00       50.0      350.0         1530.0


## HAVING Clause

Filters grouped results (WHERE filters rows, HAVING filters groups).

```sql
SELECT category, COUNT(*) as item_count 
FROM sales 
GROUP BY category 
HAVING COUNT(*) > 2;
```

In [7]:
# Find categories with more than 3 items
result = pd.read_sql_query(
    '''SELECT category, COUNT(*) as item_count, 
              ROUND(SUM(quantity * price), 2) as total_revenue
       FROM sales 
       GROUP BY category 
       HAVING COUNT(*) > 3
       ORDER BY item_count DESC''',
    conn
)
print("Categories with more than 3 items:")
print(result)
print()

# Find categories with revenue > 1000
result = pd.read_sql_query(
    '''SELECT category, COUNT(*) as item_count, 
              ROUND(SUM(quantity * price), 2) as total_revenue
       FROM sales 
       GROUP BY category 
       HAVING SUM(quantity * price) > 1000
       ORDER BY total_revenue DESC''',
    conn
)
print("Categories with revenue > $1000:")
print(result)

Categories with more than 3 items:
      category  item_count  total_revenue
0  Electronics           6         4105.0
1    Furniture           4         1530.0

Categories with revenue > $1000:
      category  item_count  total_revenue
0  Electronics           6         4105.0
1    Furniture           4         1530.0


## GROUP BY Multiple Columns

Group by multiple columns for deeper insights.

```sql
SELECT category, SUBSTR(sale_date, 1, 7) as month, 
       COUNT(*) as sales_count,
       SUM(quantity * price) as monthly_revenue
FROM sales 
GROUP BY category, SUBSTR(sale_date, 1, 7);
```

In [8]:
# Group by category and price range
result = pd.read_sql_query(
    '''SELECT category,
              CASE 
                WHEN price < 50 THEN 'Low'
                WHEN price < 300 THEN 'Medium'
                ELSE 'High'
              END as price_range,
              COUNT(*) as item_count,
              ROUND(AVG(price), 2) as avg_price
       FROM sales 
       GROUP BY category, price_range
       ORDER BY category, avg_price''',
    conn
)
print("Products by Category and Price Range:")
print(result)

Products by Category and Price Range:
      category price_range  item_count  avg_price
0  Electronics         Low           2      20.00
1  Electronics      Medium           2      90.00
2  Electronics        High           2     800.00
3    Furniture      Medium           3     176.67
4    Furniture        High           1     350.00


## Aggregate Function Summary Table

| Function | Purpose | Example | Notes |
|----------|---------|---------|-------|
| COUNT(*) | Total rows | COUNT(*) | Includes NULL values |
| COUNT(col) | Non-NULL values in column | COUNT(price) | Excludes NULL |
| SUM(col) | Sum of numeric values | SUM(quantity * price) | Returns NULL if no rows |
| AVG(col) | Average of numeric values | AVG(price) | Excludes NULL values |
| MIN(col) | Minimum value | MIN(price) | Works on numbers, dates, strings |
| MAX(col) | Maximum value | MAX(sale_date) | Works on numbers, dates, strings |
| GROUP_CONCAT(col) | Concatenate values | GROUP_CONCAT(product) | SQLite specific |
| GROUP BY | Group results | GROUP BY category | Combines with aggregates |
| HAVING | Filter groups | HAVING COUNT(*) > 2 | Applied after grouping |

In [9]:
# View all sales data for reference
result = pd.read_sql_query('SELECT * FROM sales ORDER BY sale_id', conn)
print("Complete Sales Data:")
print(result)

Complete Sales Data:
   sale_id         product_name     category  quantity   price   sale_date
0        1           Laptop Pro  Electronics         2  1200.0  2023-01-15
1        2       Wireless Mouse  Electronics         5    25.0  2023-01-16
2        3          USB-C Cable  Electronics         8    15.0  2023-01-17
3        4          Office Desk    Furniture         1   350.0  2023-01-18
4        5      Ergonomic Chair    Furniture         3   200.0  2023-01-19
5        6          Monitor 27"  Electronics         2   400.0  2023-01-20
6        7  Keyboard Mechanical  Electronics         4   120.0  2023-01-21
7        8            Desk Lamp    Furniture         6    50.0  2023-01-22
8        9         Laptop Stand  Electronics         3    60.0  2023-01-23
9       10       Filing Cabinet    Furniture         1   280.0  2023-01-24
