# Graves' Greenery — SQL (DuckDB via ipython-sql)

Runs **`%sql` / `%%sql`** with **PrettyTable** on top of **DuckDB** (no server required).

- CSVs are loaded from `/data` into `outputs/graves_greenery.duckdb`
- You can commit the DB file if you want, or rebuild each session

In [None]:
# --- CONFIG ---
REPO_USER = "danielsgraves"
REPO_NAME = "Graves_Greenery_Analysis"                 # repo folder name after clone
DATA_DIR  = f"/content/{REPO_NAME}/data"               # CSVs live here
DB_FILE   = f"/content/{REPO_NAME}/outputs/graves_greenery.duckdb"  # DuckDB file
LOAD_FROM_CSV = True   # set False to keep existing tables


In [None]:
# --- SYNC REPO: clone if missing, else pull latest ---
import os, subprocess

def run(cmd):
    p = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    print(p.stdout)
    if p.returncode != 0:
        raise RuntimeError(f"Command failed: {cmd}")

if not os.path.exists(f"/content/{REPO_NAME}"):
    run(f"git clone https://github.com/{REPO_USER}/{REPO_NAME}.git /content/{REPO_NAME}")
else:
    os.chdir(f"/content/{REPO_NAME}")
    run("git fetch --all --prune")
    run("git pull --rebase")

os.makedirs(f"/content/{REPO_NAME}/outputs", exist_ok=True)
os.chdir(f"/content/{REPO_NAME}")
print("Working dir:", os.getcwd())
run("ls -la")


In [None]:
# --- INSTALL: ipython-sql + PrettyTable + DuckDB (via SQLAlchemy) ---
%pip -q install ipython-sql prettytable duckdb duckdb-engine sqlalchemy pandas

# Load SQL magic
%reload_ext sql

# Prefer PrettyTable rendering (you can switch to DataFrame by setting autopandas=True)
%config SqlMagic.autopandas = False
%config SqlMagic.feedback = False

# Pick a PrettyTable style that exists in this runtime
import prettytable as pt
candidates = [
    'MARKDOWN', 'PLAIN_COLUMNS', 'MSWORD_FRIENDLY', 'DOUBLE_BORDER', 'SINGLE_BORDER', 'DEFAULT'
]
avail = [s for s in candidates if hasattr(pt, s)]
if avail:
    chosen = avail[0]
    get_ipython().run_line_magic('config', f"SqlMagic.style = '{chosen}'")
    print(f"PrettyTable style set to: {chosen}")
else:
    # Fallback to DataFrame rendering if no style is available for some reason
    get_ipython().run_line_magic('config', "SqlMagic.autopandas = True")
    print("No PrettyTable styles detected; switched to DataFrame rendering.")


In [None]:
# --- CONNECT %sql TO DUCKDB (SQLAlchemy URL) ---
%sql duckdb:///%s
# (Note: Colab prints a harmless warning string, but the connection works.)
# The path above is replaced at runtime by formatting with DB_FILE below.


In [None]:
# Bind the %sql connection to the actual DB file path (DB_FILE)
from IPython.display import display
db_url = f"duckdb:///{DB_FILE}"
%sql $db_url
print("Connected:", db_url)


In [None]:
# --- LOAD ALL CSVs INTO DUCKDB TABLES (via pandas -> SQLAlchemy) ---
import os, glob, pandas as pd
from sqlalchemy import create_engine

engine = create_engine(db_url)

if LOAD_FROM_CSV:
    files = sorted(glob.glob(os.path.join(DATA_DIR, "*.csv")))
    if not files:
        print(f"⚠️ No CSVs found in {DATA_DIR}")
    for path in files:
        table = os.path.splitext(os.path.basename(path))[0]
        df = pd.read_csv(path)
        # light normalization for safer SQL identifiers
        df.columns = [c.strip().replace(" ", "_") for c in df.columns]
        df.to_sql(table, con=engine, if_exists="replace", index=False)
        print(f"Loaded {table} ({len(df):,} rows)")

print("Ready.")


In [None]:
%%sql
SHOW TABLES;


## SQL Sandbox
Use `%%sql` below to run queries directly against DuckDB (PrettyTable output by default).

In [None]:
%%sql
-- Examples (replace with your schema):
-- SELECT COUNT(*) AS n FROM dim_customers;
-- SELECT category, COUNT(*) AS n FROM dim_plants GROUP BY category ORDER BY n DESC LIMIT 10;


# Project Overview

# Problem Statement

# Data Cleaning and Preparation

# Exploratory Data Analysis (EDA)

# Solution and Implementation

# Recommendations and Next Steps