<a href="https://colab.research.google.com/github/danielsgraves/Graves_Greenery_Analysis/blob/main/notebook/Graves_Greenery.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://colab.research.google.com/github/danielsgraves/Graves_Greenery_Analysis/blob/main/notebook/Graves_Greenery.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Graves' Greenery Dataset

> Colab is pre-configured with a **custom** SQL cell magic (`%sqlite` / `%%sqlite`) that uses a local SQLite database file under `/outputs`.
>


In [None]:
# --- CONFIG ---
REPO_USER = "danielsgraves"
REPO_NAME = "Graves_Greenery_Analysis"                 # repo folder name after clone
DATA_DIR  = f"/content/{REPO_NAME}/data"               # CSVs live here
DB_FILE   = f"/content/{REPO_NAME}/outputs/graves_greenery.db"  # SQLite DB file
LOAD_FROM_CSV = True   # set False to keep existing DB tables between sessions

In [None]:
# --- SYNC REPO: clone if missing, else pull latest ---
import os, subprocess

def run(cmd):
    p = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    print(p.stdout)
    if p.returncode != 0:
        raise RuntimeError(f"Command failed: {cmd}")

if not os.path.exists(f"/content/{REPO_NAME}"):
    run(f"git clone https://github.com/{REPO_USER}/{REPO_NAME}.git /content/{REPO_NAME}")
else:
    os.chdir(f"/content/{REPO_NAME}")
    run("git fetch --all --prune")
    run("git pull --rebase")

os.makedirs(f"/content/{REPO_NAME}/outputs", exist_ok=True)
os.chdir(f"/content/{REPO_NAME}")
print("Working dir:", os.getcwd())
run("ls -la")

In [None]:
# --- CUSTOM %%sqlite MAGIC (no external deps beyond stdlib + pandas) ---
import sqlite3, pandas as pd
from IPython.core.magic import register_line_cell_magic

# single shared connection for this session
_conn = sqlite3.connect(DB_FILE)

@register_line_cell_magic
def sqlite(line, cell=None):
    """
    Usage:
      %sqlite SELECT 1;
      %%sqlite
      SELECT * FROM dim_customers LIMIT 5;

    Returns a pandas DataFrame. It will render once if not assigned; if you assign it
    (e.g., df = %%sqlite ...), it will not auto-display.
    """
    sql = line if cell is None else (line + "\n" + cell)
    df = pd.read_sql_query(sql, _conn)
    return df

print("Custom %sqlite / %%sqlite magic registered. DB:", DB_FILE)

In [None]:
# --- LOAD ALL CSVs INTO SQLITE TABLES (idempotent) ---
import glob, os

if LOAD_FROM_CSV or (not os.path.exists(DB_FILE)):
    try:
        _conn.close()
    except Exception:
        pass
    _conn = sqlite3.connect(DB_FILE)

    files = sorted(glob.glob(os.path.join(DATA_DIR, "*.csv")))
    if not files:
        print(f"⚠️ No CSVs found in {DATA_DIR}")
    for path in files:
        table = os.path.splitext(os.path.basename(path))[0]
        df = pd.read_csv(path)
        # mild normalization for safer SQL
        df.columns = [c.strip().replace(" ", "_") for c in df.columns]
        df.to_sql(table, _conn, if_exists="replace", index=False)
        print(f"Loaded {table} ({len(df):,} rows)")

# list tables
%sqlite SELECT name AS table_name FROM sqlite_master WHERE type='table' ORDER BY name;

## SQL Sandbox
Use `%%sqlite` below to run queries directly against the local SQLite database.

In [None]:
%%sqlite
SELECT *
FROM  graves_greenery_full_denormalized
LIMIT 5;

In [None]:
# --- Helper: save any query result to /outputs as CSV ---
def sql_to_csv(query: str, out_path: str):
    df = pd.read_sql_query(query, _conn)
    df.to_csv(out_path, index=False)
    print(f"Saved {len(df):,} rows → {out_path}")

# Example:
# sql_to_csv("SELECT * FROM dim_plants LIMIT 100", "outputs/dim_plants_sample.csv")

# Project Overview

# Problem Statement

# Data Cleaning and Preparation

# Exploratory Data Analysis (EDA)

# Solution and Implementation

# Recommendations and Next Steps