# 02 – Feature Engineering

Build a coarse grid and create labels from MRDS presence/absence.


In [1]:
%pip install -q -r ../requirements.txt


Note: you may need to restart the kernel to use updated packages.


In [None]:
# notebooks/02a_build_grid_and_coords.py  (replace your 02a)

import os, sys
import numpy as np
import joblib
from pathlib import Path

# Make 'src/' importable
project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

from src.data_utils import state_polygon
from src.features import build_grid_from_bbox, grid_centroids

STATE = "Missouri"
GRID_PATH = project_root / "data/processed/grid_gdf.joblib"
os.makedirs(GRID_PATH.parent, exist_ok=True)

# 1) Get accurate state polygon (from geology.gpkg if available; else bbox)
poly = state_polygon(STATE)  # returns shapely Polygon/MultiPolygon in EPSG:4326

# 2) Build grid on polygon bbox, then mask to polygon
bbox = poly.bounds
grid = build_grid_from_bbox(bbox)
grid = grid[grid.geometry.intersects(poly)].copy().reset_index(drop=True)

# 3) Ensure CRS and save grid
if grid.crs is None:
    grid = grid.set_crs(4326)
elif grid.crs.to_epsg() != 4326:
    grid = grid.to_crs(4326)

joblib.dump(grid, GRID_PATH)
print(f"Saved polygon-masked grid to {GRID_PATH} with {len(grid)} cells")

# 4) Compute coords on *this* grid and save
coords = grid_centroids(grid)                      # shape (N,2)
np.save(project_root / "data/processed/X_coords.npy", coords)
print("Saved data/processed/X_coords.npy", coords.shape)


Saved polygon-masked grid to c:\Users\angel\ore-exploration-ml\ore-exploration-ml\data\processed\grid_gdf.joblib with 7646 cells
Saved data/processed/X_coords.npy (7646, 2)



  centroids = grid.geometry.centroid
