# Day 7: SQL Basics and SELECT Queries

## Create Dummy Tables

In [2]:

import sqlite3
import pandas as pd

# Create an in-memory SQLite database
conn = sqlite3.connect(":memory:")
cursor = conn.cursor()

# Create sample customers table
cursor.execute('''
CREATE TABLE customers (
    customer_id INTEGER PRIMARY KEY,
    first_name TEXT,
    last_name TEXT,
    country TEXT
)
''')

# Create sample orders table
cursor.execute('''
CREATE TABLE orders (
    order_id INTEGER PRIMARY KEY,
    customer_id INTEGER,
    region TEXT,
    sales REAL,
    amount REAL,
    FOREIGN KEY(customer_id) REFERENCES customers(customer_id)
)
''')

# Insert dummy data
cursor.executescript('''
INSERT INTO customers VALUES (1, 'Alice', 'Smith', 'USA');
INSERT INTO customers VALUES (2, 'Bob', 'Jones', 'Canada');
INSERT INTO customers VALUES (3, 'Charlie', 'Brown', 'USA');

INSERT INTO orders VALUES (101, 1, 'North', 750.5, 1200);
INSERT INTO orders VALUES (102, 2, 'South', 300, 450);
INSERT INTO orders VALUES (103, 1, 'East', 950, 1300);
INSERT INTO orders VALUES (104, 3, 'North', 100, 80);
''')
conn.commit()


## SELECT * and Specific Columns

In [3]:

# Select all from customers
pd.read_sql_query("SELECT * FROM customers", conn)

# Select specific columns from orders
pd.read_sql_query("SELECT order_id, region, amount FROM orders", conn)


Unnamed: 0,order_id,region,amount
0,101,North,1200.0
1,102,South,450.0
2,103,East,1300.0
3,104,North,80.0


## Filtering with WHERE

In [4]:

pd.read_sql_query("SELECT * FROM orders WHERE amount > 500", conn)
pd.read_sql_query("SELECT * FROM customers WHERE country = 'USA'", conn)


Unnamed: 0,customer_id,first_name,last_name,country
0,1,Alice,Smith,USA
1,3,Charlie,Brown,USA


## Sorting and Limiting Results

In [5]:

pd.read_sql_query("SELECT * FROM orders ORDER BY amount DESC LIMIT 3", conn)


Unnamed: 0,order_id,customer_id,region,sales,amount
0,103,1,East,950.0,1300.0
1,101,1,North,750.5,1200.0
2,102,2,South,300.0,450.0


## Aggregation + GROUP BY

In [7]:

# Total sales per region
pd.read_sql_query("SELECT region, SUM(amount) AS total_sales FROM orders GROUP BY region", conn)

# Average sales in North
pd.read_sql_query("SELECT AVG(sales) AS avg_sales FROM orders WHERE region = 'North'", conn)


Unnamed: 0,avg_sales
0,425.25
