<a href="https://colab.research.google.com/github/danielsgraves/Graves_Greenery_Analysis/blob/main/Graves_Greenery.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Graves' Greenery Dataset

#***Setup***


1) install mysql + python libraries

In [None]:
# Step 1: Install MySQL server and Python libraries (run once per fresh Colab runtime)
!apt-get -y update
!DEBIAN_FRONTEND=noninteractive apt-get -y install mysql-server
!pip -q install pymysql sqlalchemy ipython-sql pandas

Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Hit:2 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:5 https://cli.github.com/packages stable InRelease
Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:8 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Hit:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:12 https://r2u.stat.illinois.edu/ubuntu jammy/main amd64 Packages [2,810 kB]
Get:13 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [3,738 kB]
Get:14 https://r2u.s

2) start mysql & set root password

In [None]:
# Step 2: Start MySQL, silence warning, set root pw, and health check

# 1) stop any running mysql, fix home dir for mysql user, prep dirs
!service mysql stop || true
!mkdir -p /var/lib/mysql /run/mysqld
!chown -R mysql:mysql /var/lib/mysql /run/mysqld
!usermod -d /var/lib/mysql mysql || true   # silences "su: warning: cannot change wdirectory to /nonexistent"

# 2) start mysql
!service mysql start

# 3) try to set root password (if socket login works)
!mysql -uroot -e "ALTER USER 'root'@'localhost' IDENTIFIED WITH mysql_native_password BY 'root'; FLUSH PRIVILEGES;" || echo "[Info] root pw may already be set."

# 4) health check
print("\n[Health] mysqld processes:")
!pgrep -a mysqld || echo "mysqld not running"

print("\n[Health] mysqladmin ping:")
!mysqladmin -uroot -proot ping || echo "mysqladmin ping failed"

print("\n[Health] Version + NOW():")
!mysql -uroot -proot -e "SELECT VERSION() AS server_version, NOW() AS now;"

 * Stopping MySQL database server mysqld
   ...done.
 * Starting MySQL database server mysqld
   ...done.

[Health] mysqld processes:
8012 /bin/sh /usr/bin/mysqld_safe
8159 /usr/sbin/mysqld --basedir=/usr --datadir=/var/lib/mysql --plugin-dir=/usr/lib/mysql/plugin --log-error=/var/log/mysql/error.log --pid-file=1df2f739bf88.pid

[Health] mysqladmin ping:
mysqld is alive

[Health] Version + NOW():
+-------------------------+---------------------+
| server_version          | now                 |
+-------------------------+---------------------+
| 8.0.43-0ubuntu0.22.04.2 | 2025-10-03 19:41:01 |
+-------------------------+---------------------+


3) mount google drive (persistence)

In [None]:
# Step 3: Mount Google Drive so your uploads/dumps persist across sessions
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# set up a working folder in Drive (change if you like)
import os
BASE_DIR = "/content/drive/MyDrive/ColabSQL"
os.makedirs(BASE_DIR, exist_ok=True)
print("Working folder:", BASE_DIR)

Mounted at /content/drive
Working folder: /content/drive/MyDrive/ColabSQL


4) create database & sqlalchemy engine

In [None]:
# Step 4: Create a DB and build an engine for pandas.to_sql and %%sql
DB_NAME = "practiceDB"
!mysql -u root -proot -e "CREATE DATABASE IF NOT EXISTS {DB_NAME};"

from sqlalchemy import create_engine
engine = create_engine(f"mysql+pymysql://root:root@localhost/{DB_NAME}", pool_recycle=3600)
print("Engine ready for database:", DB_NAME)

Engine ready for database: practiceDB


5) upload CSVs from this device (phone or computer)

In [None]:
# STEP 5 — Upload CSVs from your device (phone/computer) and persist to Drive
from google.colab import files
import os

# Uses BASE_DIR from earlier (Step 3). Make sure Step 3 created this.
assert 'BASE_DIR' in globals(), "Run Step 3 first to mount Drive and set BASE_DIR."

uploaded = files.upload()  # opens file picker

if not uploaded:
    raise SystemExit("No files selected. Re-run this cell to choose CSVs.")

saved_paths = []
for fname, content in uploaded.items():
    # Save a copy to /content (runtime) and to Drive (persistence)
    local_path = os.path.join("/content", fname)
    with open(local_path, "wb") as f:
        f.write(content)

    drive_path = os.path.join(BASE_DIR, fname)
    with open(drive_path, "wb") as f:
        f.write(content)

    saved_paths.append(drive_path)
    print("Saved to Drive:", drive_path)

print("\nFiles ready:")
for p in saved_paths:
    print(" -", p)

Saving stolen_vehicles.csv to stolen_vehicles.csv
Saving locations.csv to locations.csv
Saving make_details.csv to make_details.csv
Saved to Drive: /content/drive/MyDrive/ColabSQL/stolen_vehicles.csv
Saved to Drive: /content/drive/MyDrive/ColabSQL/locations.csv
Saved to Drive: /content/drive/MyDrive/ColabSQL/make_details.csv

Files ready:
 - /content/drive/MyDrive/ColabSQL/stolen_vehicles.csv
 - /content/drive/MyDrive/ColabSQL/locations.csv
 - /content/drive/MyDrive/ColabSQL/make_details.csv


6) import uploaded CSVs into mysql tables

In [None]:
# STEP 6 — Robust CSV → MySQL importer (replaces your old Step 6)
import os, re, csv
import pandas as pd

# Requires 'engine' from Step 4 and 'saved_paths' from Step 5
assert 'engine' in globals(), "Run Step 4 first to create the SQLAlchemy engine."
assert 'saved_paths' in globals() and saved_paths, "Run Step 5 first to upload/select CSVs."

def sanitize_table_name(path: str) -> str:
    base = os.path.splitext(os.path.basename(path))[0]
    base = base.strip().lower()
    base = re.sub(r'[^a-z0-9_]+', '_', base)
    if not re.match(r'^[a-z_]', base):
        base = f"t_{base}"
    base = re.sub(r'_+', '_', base).strip('_')
    return base or "t_table"

def detect_delimiter(path: str, sample_bytes: int = 64_000) -> str:
    # Try to sniff delimiter; default to comma
    try:
        with open(path, "r", encoding="utf-8", errors="ignore") as f:
            sample = f.read(sample_bytes)
        dialect = csv.Sniffer().sniff(sample, delimiters=[',',';','\t','|'])
        return dialect.delimiter
    except Exception:
        return ','

def read_csv_robust(path: str) -> pd.DataFrame:
    delim = detect_delimiter(path)
    # 1) Try fast C engine with low_memory=False
    try:
        return pd.read_csv(path, sep=delim, engine="c", low_memory=False)
    except Exception as e_c:
        print(f"   [Info] C engine failed ({type(e_c).__name__}: {e_c}). Falling back to Python engine …")
        # 2) Fall back to python engine (no low_memory), skip bad rows
        return pd.read_csv(path, sep=delim, engine="python", on_bad_lines="skip")

for path in saved_paths:
    table = sanitize_table_name(path)
    print(f"\n→ Importing {os.path.basename(path)}  →  table `{table}`")

    df = read_csv_robust(path)

    # Normalize column names for MySQL safety
    df.columns = (
        pd.Index(map(str, df.columns))
        .str.strip()
        .str.replace(r"[^\w]+", "_", regex=True)
        .str.replace(r"_+", "_", regex=True)
        .str.strip("_")
        .str.lower()
    )

    # Optional: lighter memory dtypes
    try:
        df = df.convert_dtypes()
    except Exception:
        pass

    # Write to MySQL (replace if exists)
    df.to_sql(
        table,
        con=engine,
        if_exists="replace",
        index=False,
        chunksize=10_000,
        method="multi",
    )
    print(f"   ✔ {len(df):,} rows → `{table}`")


→ Importing stolen_vehicles.csv  →  table `stolen_vehicles`
   ✔ 4,553 rows → `stolen_vehicles`

→ Importing locations.csv  →  table `locations`
   ✔ 16 rows → `locations`

→ Importing make_details.csv  →  table `make_details`
   ✔ 138 rows → `make_details`


7) enable %%sql magic & connect (write sql directly)

In [None]:
# STEP 7A — Enable ipython-sql and set a guaranteed-valid PrettyTable style

# 1) make sure prettytable & ipython-sql are present (quiet install-ok to rerun)
%pip -q install prettytable ipython-sql

# 2) load SQL magic and connect
%reload_ext sql
%sql mysql+pymysql://root:root@localhost/practiceDB

# 3) pick a style that actually exists in this runtime
import prettytable as pt

candidates = [
    'PLAIN_COLUMNS',
    'MSWORD_FRIENDLY',
    'MARKDOWN',
    'DOUBLE_BORDER',
    'DEFAULT',           # some versions define this, some don't
    'RANDOM'
]
available = [s for s in candidates if hasattr(pt, s)]

# configure ipython-sql
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False

if available:
    chosen = available[0]
    get_ipython().run_line_magic('config', f"SqlMagic.style = '{chosen}'")
    print(f"ipython-sql connected. PrettyTable style set to: {chosen}")
else:
    # last-resort: avoid PrettyTable entirely by forcing DataFrame render
    # (ipython-sql will still try PrettyTable if it can; so we warn and suggest 7B)
    print("No PrettyTable styles detected. If %%sql still errors, run Step 7B (pandas helper) below.")

ipython-sql connected. PrettyTable style set to: PLAIN_COLUMNS


  available = [s for s in candidates if hasattr(pt, s)]
  available = [s for s in candidates if hasattr(pt, s)]
  available = [s for s in candidates if hasattr(pt, s)]
  available = [s for s in candidates if hasattr(pt, s)]
  available = [s for s in candidates if hasattr(pt, s)]
  available = [s for s in candidates if hasattr(pt, s)]


8) quick sanity checks

In [None]:
%%sql
-- Step 8a: list tables
SHOW TABLES;

 * mysql+pymysql://root:***@localhost/practiceDB


Unnamed: 0,Tables_in_practiceDB
0,locations
1,make_details
2,stolen_vehicles


In [None]:
%%sql
-- Step 8b: peek at a table (edit the name)
SELECT * FROM menu_items LIMIT 5;

UsageError: Cell magic `%%sql` not found.


9) dump your db to drive (save progress)

In [None]:
# Step 9: Save full DB (schema+data) to Drive for easy restore
DUMP_PATH = os.path.join(BASE_DIR, f"{DB_NAME}_backup.sql")
!mysqldump -u root -proot {DB_NAME} > "{DUMP_PATH}"
print("Database dumped to:", DUMP_PATH)

Database dumped to: /content/drive/MyDrive/ColabSQL/practiceDB_backup.sql


10) restore later from the dump (fast resume)

In [None]:
# Step 10: Restore DB from the dump in Drive
RESTORE_PATH = os.path.join(BASE_DIR, f"{DB_NAME}_backup.sql")

import os
if not os.path.exists(RESTORE_PATH):
    raise FileNotFoundError(f"Dump not found at {RESTORE_PATH}. Run the dump step first.")

!service mysql start
!mysql -u root -proot -e "CREATE DATABASE IF NOT EXISTS {DB_NAME};"
!mysql -u root -proot {DB_NAME} < "{RESTORE_PATH}"
print("Database restored from:", RESTORE_PATH)

NameError: name 'os' is not defined

11) if the VM idles, restart mysql & reconnect

In [None]:
# Step 11: Quick recovery after idle/disconnect
!service mysql restart
%load_ext sql
%sql mysql+pymysql://root:root@localhost/practiceDB
print("MySQL restarted and SQL magic reconnected.")

mysql: unrecognized service
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/sql/connection.py", line 45, in __init__
    engine = sqlalchemy.create_engine(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<string>", line 2, in create_engine
  File "/usr/local/lib/python3.12/dist-packages/sqlalchemy/util/deprecations.py", line 281, in warned
    return fn(*args, **kwargs)  # type: ignore[no-any-return]
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/sqlalchemy/engine/create.py", line 617, in create_engine
    dbapi = dbapi_meth(**dbapi_args)
            ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/sqlalchemy/dialects/mysql/pymysql.py", line 89, in import_dbapi
    return __import__("pymysql")
           ^^^^^^^^^^^^^^^^^^^^^
ModuleNotFoundError: No module named 'pymysql'

Connection info needed in SQLAlchemy format, example:
               postgresql://username:password@hostname/dbname
          

# Project Overview

# Problem Statement

# Data Cleaning and Preparation

# Exploratory Data Analysis (EDA)

# Solution and Implementation

# Recommendations and Next Steps