<a href="https://colab.research.google.com/github/danielsgraves/Graves_Greenery_Analysis/blob/main/notebook/Graves_Greenery.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Graves' Greenery Dataset

> Colab is pre-configured for SQL magic (`%sql` / `%%sql`) with a local SQLite database.

In [None]:
# --- CONFIG ---
REPO_URL  = "https://github.com/danielsgraves/Graves_Greenery_Analysis.git"
REPO_NAME = "Graves_Greenery_Analysis"
DATA_DIR  = f"/content/{REPO_NAME}/data"
DB_FILE   = f"/content/{REPO_NAME}/outputs/graves_greenery.db"
LOAD_FROM_CSV = True  # set False to keep existing DB tables

In [None]:
# --- SYNC REPO ---
import os, subprocess

def run(cmd):
    p = subprocess.run(cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    print(p.stdout)
    if p.returncode != 0:
        raise RuntimeError(f"Command failed: {cmd}")

if not os.path.exists(f"/content/{REPO_NAME}"):
    run(f"git clone {REPO_URL} /content/{REPO_NAME}")
else:
    os.chdir(f"/content/{REPO_NAME}")
    run("git fetch --all --prune")
    run("git pull --rebase")

os.chdir(f"/content/{REPO_NAME}")
print("Working dir:", os.getcwd())
run("ls -la")

In [None]:
# --- INSTALL & LOAD SQL MAGIC (PINNED VERSIONS) ---
!pip install -q jupysql==0.10.12 prettytable==3.9.0 sqlalchemy
%load_ext sql
%config SqlMagic.autopandas = True        # return DataFrames (avoid PrettyTable rendering)
%config SqlMagic.style = 'PLAIN_COLUMNS'   # style exists in prettytable 3.9.0
%config SqlMagic.displaycon = False
%config SqlMagic.autolimit = 0

In [None]:
# --- CONNECT TO SQLITE ---
import os
os.makedirs("outputs", exist_ok=True)
%sql sqlite:///outputs/graves_greenery.db
print("SQLite ready → outputs/graves_greenery.db")

In [None]:
# --- LOAD CSVs INTO TABLES (first run or when toggled) ---
import os, glob, sqlite3, pandas as pd

if LOAD_FROM_CSV or not os.path.exists(DB_FILE):
    conn = sqlite3.connect(DB_FILE)
    if os.path.exists(DATA_DIR):
        for path in sorted(glob.glob(os.path.join(DATA_DIR, "*.csv"))):
            table = os.path.splitext(os.path.basename(path))[0]
            try:
                df = pd.read_csv(path)
                df.to_sql(table, conn, if_exists="replace", index=False)
                print(f"Loaded {table} ({len(df):,} rows)")
            except Exception as e:
                print(f"Failed to load {path}: {e}")
    else:
        print(f"⚠️ DATA_DIR not found: {DATA_DIR}")
    conn.close()

In [None]:
%%sql
SELECT name AS table_name
FROM sqlite_master
WHERE type='table'
ORDER BY name;

In [None]:
# --- REFRESH REPO HELPER ---
def refresh_repo():
    import subprocess, os
    os.chdir(f"/content/{REPO_NAME}")
    subprocess.run("git pull --rebase", shell=True, check=False)
    print("Repo refreshed.")
# refresh_repo()

## SQL Sandbox
Use `%%sql` below to run queries directly against SQLite.

In [None]:
%%sql
-- Example (edit/replace):
-- SELECT COUNT(*) AS n FROM dim_customers;
-- SELECT * FROM fact_order_items LIMIT 5;

# Project Overview

# Problem Statement

# Data Cleaning and Preparation

# Exploratory Data Analysis (EDA)

# Solution and Implementation

# Recommendations and Next Steps