# Day 8: SQL & Data Integration

## Step 1: Create Dummy SQLite Database

In [2]:

import sqlite3
import pandas as pd

# Connect to (or create) a new SQLite database
conn = sqlite3.connect("day8_dummy.db")
cursor = conn.cursor()

# Create sample customers table
cursor.execute('''
CREATE TABLE IF NOT EXISTS customers (
    customer_id INTEGER PRIMARY KEY,
    name TEXT,
    country TEXT
)
''')

# Create sample orders table
cursor.execute('''
CREATE TABLE IF NOT EXISTS orders (
    order_id INTEGER PRIMARY KEY,
    customer_id INTEGER,
    region TEXT,
    sales REAL,
    amount REAL,
    FOREIGN KEY(customer_id) REFERENCES customers(customer_id)
)
''')

# Insert sample data
cursor.executescript('''
DELETE FROM customers;
INSERT INTO customers VALUES (1, 'Alice', 'India');
INSERT INTO customers VALUES (2, 'Bob', NULL);
INSERT INTO customers VALUES (3, 'Charlie', 'USA');

DELETE FROM orders;
INSERT INTO orders VALUES (101, 1, 'North', 750.5, 1200);
INSERT INTO orders VALUES (102, 2, 'South', 300, 450);
INSERT INTO orders VALUES (103, 1, 'East', 950, 1300);
INSERT INTO orders VALUES (104, 3, 'North', 100, 50);
''')

conn.commit()


## Step 2: Advanced Filtering and Aggregation

In [3]:

# SQL: Count and average sales grouped by region where sales > 500 and not 'West'
query1 = '''
SELECT region, COUNT(*) as total_orders, AVG(sales) as avg_sales
FROM orders
WHERE sales > 500 AND region != 'West'
GROUP BY region
'''
pd.read_sql_query(query1, conn)


Unnamed: 0,region,total_orders,avg_sales
0,East,1,950.0
1,North,1,750.5


## Step 3: Subquery Example

In [4]:

query2 = '''
SELECT name FROM customers
WHERE customer_id IN (
    SELECT customer_id FROM orders WHERE amount > 1000
)
'''
pd.read_sql_query(query2, conn)


Unnamed: 0,name
0,Alice


## Step 4: Load SQL Results into Pandas

In [5]:

query3 = "SELECT * FROM orders WHERE amount > 1000"
df_orders = pd.read_sql_query(query3, conn)
df_orders.head()


Unnamed: 0,order_id,customer_id,region,sales,amount
0,101,1,North,750.5,1200.0
1,103,1,East,950.0,1300.0


## Step 5: Modify Data in SQL

In [6]:

# Update and delete operations
cursor.execute("UPDATE customers SET country = 'India' WHERE country IS NULL")
cursor.execute("DELETE FROM orders WHERE amount < 100")
conn.commit()

# View updated customers table
pd.read_sql_query("SELECT * FROM customers", conn)


Unnamed: 0,customer_id,name,country
0,1,Alice,India
1,2,Bob,India
2,3,Charlie,USA


## Step 6: SQL-style Join using Pandas

In [7]:

# Load both tables into pandas
df_customers = pd.read_sql_query("SELECT * FROM customers", conn)
df_orders = pd.read_sql_query("SELECT * FROM orders", conn)

# Merge (JOIN) the two tables
merged_df = pd.merge(df_customers, df_orders, on='customer_id', how='inner')
merged_df


Unnamed: 0,customer_id,name,country,order_id,region,sales,amount
0,1,Alice,India,101,North,750.5,1200.0
1,1,Alice,India,103,East,950.0,1300.0
2,2,Bob,India,102,South,300.0,450.0
