# 🌿 Graves Greenery Colab Template
This notebook creates a local DuckDB environment, clones your public GitHub repo, loads all CSVs under `/data/**`, and lets you query them with `%sql`.

In [None]:
!pip -q install --upgrade duckdb duckdb-engine "sqlalchemy>=2.0" ipython-sql jupysql

import os, subprocess
REPO_USER = "danielgraves"
REPO_NAME = "Graves_Greenery_Analysis"
REPO_DIR = f"/content/{REPO_NAME}"

if not os.path.exists(REPO_DIR):
    subprocess.run(f"git clone --depth 1 https://github.com/{REPO_USER}/{REPO_NAME}.git {REPO_DIR}", shell=True, check=True)
else:
    subprocess.run(f"git -C {REPO_DIR} pull --ff-only", shell=True, check=True)

print("Repo ready at:", REPO_DIR)
print("CSV root:", f"{REPO_DIR}/data")

In [None]:
DB_PATH = "/content/graves_greenery.duckdb"
CSV_GLOB = "data/**/*.[cC][sS][vV]"
INCLUDE_PARENT_IN_TABLE = False
print("DB file:", DB_PATH)

In [None]:
%reload_ext sql
from sql.connection import Connection
try:
    if Connection.current is not None and getattr(Connection.current, 'session', None):
        try: Connection.current.session.close()
        except Exception: pass
except Exception:
    pass
Connection.connections.clear()
Connection.current = None
print('SQL magic reset.')

# Use absolute path DSN
%sql duckdb:////content/graves_greenery.duckdb

In [None]:
%%sql
SELECT * FROM pragma_database_list();

In [None]:
import glob, re
from pathlib import Path

def slugify_table_name(path, include_parent=False):
    p = Path(path)
    stem = re.sub(r'[^a-z0-9_]+','_', p.stem.lower()).strip('_')
    if include_parent and p.parent != p.parent.parent:
        parent = re.sub(r'[^a-z0-9_]+','_', p.parent.name.lower()).strip('_')
        stem = f"{parent}_{stem}"
    if re.match(r'^\d', stem):
        stem = 't_' + stem
    return stem

files = glob.glob(os.path.join(REPO_DIR, CSV_GLOB), recursive=True)
print(f"Found {len(files)} CSV(s).")

loaded = []
for f in files:
    tbl = slugify_table_name(f, include_parent=INCLUDE_PARENT_IN_TABLE)
    q = f"""
    CREATE OR REPLACE TABLE "{tbl}" AS
    SELECT * FROM read_csv_auto('{f}', header=True, sample_size=-1, ignore_errors=True);
    """
    get_ipython().run_cell_magic('sql', '', q)
    loaded.append((tbl, f))

print("Loaded tables (first few):", [t for t,_ in loaded[:8]])

In [None]:
%%sql
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'main'
ORDER BY table_name;

In [None]:
%%sql
SELECT * FROM customers LIMIT 5;

In [None]:
# Optional view alias for dim_customers
%%sql
CREATE OR REPLACE VIEW dim_customers AS SELECT * FROM customers;
SELECT * FROM dim_customers LIMIT 5;