# Graves' Greenery — Setup\nSQL via DuckDB (ipython-sql + PrettyTable)\n\nSingle DuckDB connection (no server). All CSVs in `/data` are exposed as VIEWS (or TABLES) without opening any secondary DuckDB connection.

In [None]:
# --- CONFIG ---\nREPO_USER = "danielsgraves"\nREPO_NAME = "Graves_Greenery_Analysis"                  # repo folder name after clone\nREPO_DIR  = f"/content/{REPO_NAME}"\nDATA_DIR  = f"{REPO_DIR}/data"                           # CSVs live here\nDB_FILE   = f"{REPO_DIR}/outputs/graves_greenery.duckdb"  # DuckDB file\nLOAD_AS_VIEWS = True   # True: CSV -> VIEW (zero copy). False: CSV -> TABLE (materialize)\nPRINT_SAMPLE_ROWS = 5   # preview rows in dashboard (0 to disable)\n

In [None]:
# --- SYNC REPO: clone if missing, else pull latest ---\nimport os, subprocess\n\ndef run(cmd):\n    p = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)\n    print(p.stdout)\n    if p.returncode != 0:\n        raise RuntimeError(f"Command failed: {cmd}")\n\nif not os.path.exists(REPO_DIR):\n    run(f"git clone https://github.com/{REPO_USER}/{REPO_NAME}.git {REPO_DIR}")\nelse:\n    os.chdir(REPO_DIR)\n    run("git fetch --all --prune")\n    run("git pull --rebase")\n\nos.makedirs(f"{REPO_DIR}/outputs", exist_ok=True)\nos.chdir(REPO_DIR)\nprint("Working dir:", os.getcwd())\nrun("ls -la")\n

In [None]:
# --- INSTALL: ipython-sql + PrettyTable + DuckDB SQLAlchemy driver ---\n%pip -q install ipython-sql prettytable duckdb duckdb-engine sqlalchemy pandas\n\n# Load SQL magic\n%reload_ext sql\n\n# Prefer PrettyTable by default (flip to DataFrame with autopandas=True if needed)\n%config SqlMagic.autopandas = False\n%config SqlMagic.feedback = False\n\n# Pick a PrettyTable style that exists; fallback to DataFrame rendering\nimport prettytable as pt\ncandidates = ['MARKDOWN','PLAIN_COLUMNS','MSWORD_FRIENDLY','DOUBLE_BORDER','SINGLE_BORDER','DEFAULT']\navail = [s for s in candidates if hasattr(pt, s)]\nif avail:\n    chosen = avail[0]\n    get_ipython().run_line_magic('config', f"SqlMagic.style = '{chosen}'")\n    print(f"PrettyTable style set to: {chosen}")\nelse:\n    get_ipython().run_line_magic('config', "SqlMagic.autopandas = True")\n    print("No PrettyTable styles detected; switched to DataFrame rendering.")\n

In [None]:
# --- CLOSE ANY OLD CONNECTIONS, THEN CONNECT %sql TO DUCKDB ---\n%sql --connections\n%sql --close all\ndb_url = f"duckdb:///{DB_FILE}"\n%sql $db_url\nprint("Connected:", db_url)\n

In [None]:
# --- LOAD ALL CSVs USING THE ACTIVE %sql CONNECTION (NO SECOND DUCKDB CONNECTION) ---\nimport os, glob\nfrom IPython import get_ipython\n\nfiles = sorted(glob.glob(os.path.join(DATA_DIR, "*.csv")))\nprint(f"Found {len(files)} CSV(s) in {DATA_DIR}")\nif not files:\n    print("⚠️ No CSVs found. Put your data files under /data in the repo.")\nelse:\n    for path in files:\n        table = os.path.splitext(os.path.basename(path))[0]\n        if LOAD_AS_VIEWS:\n            sql = f"""\n            CREATE OR REPLACE VIEW {table} AS\n            SELECT * FROM read_csv_auto('{path}', HEADER=TRUE);\n            """\n        else:\n            sql = f"""\n            CREATE OR REPLACE TABLE {table} AS\n            SELECT * FROM read_csv_auto('{path}', HEADER=TRUE);\n            """\n        get_ipython().run_cell_magic('sql', '', sql)\n        print(("VIEW ready: " if LOAD_AS_VIEWS else "TABLE loaded: ") + table)\n\nprint("Done.")\n

In [None]:
%%sql\nSHOW TABLES;\n

In [None]:
# --- ROW COUNT DASHBOARD (USES %sql CONNECTION ONLY) ---\n%config SqlMagic.autopandas = True   # easier to collect DataFrames for the dashboard\nres = %sql SELECT name FROM duckdb_tables() ORDER BY name;\ntables = res.DataFrame()['name'].tolist()\n\nimport pandas as pd\nrows = []\nfor t in tables:\n    cnt = %sql SELECT COUNT(*) AS n FROM $t;\n    n = int(cnt.DataFrame()['n'][0])\n    rows.append({'table': t, 'rows': n})\ncounts = pd.DataFrame(rows).sort_values('table').reset_index(drop=True)\ncounts\n\nif PRINT_SAMPLE_ROWS and tables:\n    for t in tables:\n        print(f"\n### {t} (first {PRINT_SAMPLE_ROWS} rows)")\n        _ = %sql SELECT * FROM $t LIMIT $PRINT_SAMPLE_ROWS;\n\n# If you prefer PrettyTable in later cells again:\n%config SqlMagic.autopandas = False\n

## SQL Sandbox\nUse `%%sql` below to run queries directly against DuckDB.

In [None]:
%%sql\n-- Examples (replace with your schema):\n-- SELECT COUNT(*) AS n FROM dim_customers;\n-- SELECT category, COUNT(*) AS n FROM dim_plants GROUP BY category ORDER BY n DESC LIMIT 10;\n

# Project Overview

# Problem Statement

# Data Cleaning and Preparation

# Exploratory Data Analysis (EDA)

# Solution and Implementation

# Recommendations and Next Steps