# 🌿 Graves Greenery — Colab (DuckDB + `%sql conn`)

This notebook uses a **single DuckDB connection** for both loading and querying to avoid configuration conflicts.

**What it does**
1) Installs deps and clones your public GitHub repo (`danielsgraves/Graves_Greenery_Analysis`)
2) Opens one DuckDB connection and binds `%sql` to it: `%%sql` works with PrettyTable output
3) Loads all CSVs under `/data/**` as tables
4) Lists tables and runs a simple test query


In [None]:
!pip -q install --upgrade duckdb duckdb-engine "sqlalchemy>=2.0" ipython-sql jupysql

import os, subprocess
REPO_USER = "danielsgraves"          # <-- correct owner
REPO_NAME = "Graves_Greenery_Analysis"
REPO_DIR  = f"/content/{REPO_NAME}"

if not os.path.exists(REPO_DIR):
    subprocess.run(
        f"git clone --depth 1 https://github.com/{REPO_USER}/{REPO_NAME}.git {REPO_DIR}",
        shell=True, check=True
    )
else:
    subprocess.run(f"git -C {REPO_DIR} pull --ff-only", shell=True, check=True)

print("Repo ready at:", REPO_DIR)
print("CSV root:", f"{REPO_DIR}/data")

In [None]:
import duckdb
DB_PATH = "/content/graves_greenery.duckdb"  # one file, used everywhere

# Open a single DBAPI connection (read/write)
conn = duckdb.connect(DB_PATH)

# Bind JupySQL/ipython-sql to THIS connection (no URL parsing)
%reload_ext sql
%sql conn
print("Bound %sql to:", conn, "→", DB_PATH)

In [None]:
%%sql
SELECT * FROM pragma_database_list();

In [None]:
import os, re, glob
from pathlib import Path

REPO_DIR = "/content/Graves_Greenery_Analysis"
CSV_GLOB = "data/**/*.[cC][sS][vV]"  # case-insensitive .csv
INCLUDE_PARENT_IN_TABLE = False

",
def slugify_table_name(path, include_parent=False):
    p = Path(path)
    stem = re.sub(r'[^a-z0-9_]+','_', p.stem.lower()).strip('_')
    if include_parent and p.parent != p.parent.parent:
        parent = re.sub(r'[^a-z0-9_]+','_', p.parent.name.lower()).strip('_')
        stem = f"{parent}_{stem}"
    if re.match(r'^\d', stem):
        stem = 't_' + stem
    return stem

files = glob.glob(os.path.join(REPO_DIR, CSV_GLOB), recursive=True)
print(f"Found {len(files)} CSV(s). Loading…")

# Load all CSVs into tables via the SAME connection
for f in files:
    tbl = slugify_table_name(f, INCLUDE_PARENT_IN_TABLE)
    conn.execute(
        f"""
        CREATE OR REPLACE TABLE "{tbl}" AS
        SELECT * FROM read_csv_auto(?, header=True, sample_size=-1, ignore_errors=True);
        """,
        [f]
    )
print("Loaded a few:", [slugify_table_name(f) for f in files[:5]])

In [None]:
%%sql
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'main'
ORDER BY table_name;

In [None]:
%%sql
-- Replace 'customers' if your table has a different name from the list above
SELECT * FROM customers LIMIT 5;

In [None]:
%%sql
-- Optional: keep a dim_* alias if you prefer that naming
CREATE OR REPLACE VIEW dim_customers AS SELECT * FROM customers;
SELECT * FROM dim_customers LIMIT 5;