# 05 - Validation
Proposito: validacion cruzada simple + metricas.

Inputs:
- `config/project.json`
- CSV en `cfg["data_csv_path"]`

Outputs esperados:
- Consola con metricas (sin archivos nuevos).


### 1. Project setup


In [1]:
import os, sys, json, glob
from IPython.display import Image, display

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
if os.path.basename(os.getcwd()) == "notebooks":
    os.chdir(PROJECT_ROOT)
else:
    PROJECT_ROOT = os.getcwd()
    os.chdir(PROJECT_ROOT)

if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

print("PROJECT_ROOT:", PROJECT_ROOT)
print("CWD:", os.getcwd())

from src.preprocess import load_and_preprocess
from src.validation import simple_cross_validation


PROJECT_ROOT: c:\Users\joelm\Documents\geostats
CWD: c:\Users\joelm\Documents\geostats


### 2. Load config


In [2]:
cfg_path = "config/project.json"
print("Config:", os.path.abspath(cfg_path))
with open(cfg_path, "r", encoding="utf-8") as f:
    cfg = json.load(f)

print("Data CSV:", cfg["data_csv_path"])
mapping = {
    "x": cfg["columns"].get("x"),
    "y": cfg["columns"].get("y"),
    "z": cfg["columns"].get("z"),
    "var": cfg["columns"].get("variable_objetivo"),
}
print("Column mapping:", mapping)


Config: c:\Users\joelm\Documents\geostats\config\project.json
Data CSV: csv/Conminution.csv
Column mapping: {'x': 'X', 'y': 'Y', 'z': 'Z', 'var': 'Bwi_kWh_t'}


### 3. Load data


In [3]:
df, df_raw, mapping = load_and_preprocess(cfg)

rows_before = len(df_raw)
rows_after = len(df)
removed_pct = 0.0 if rows_before == 0 else (rows_before - rows_after) * 100.0 / rows_before

print("Shape raw:", df_raw.shape, "clean:", df.shape)
print("Dtypes:\n", df.dtypes)
print("Rows removed (%):", f"{removed_pct:.2f}")
print("X range:", (df["x"].min(), df["x"].max()))
print("Y range:", (df["y"].min(), df["y"].max()))
if "z" in df.columns and df["z"].notna().any():
    print("Z range:", (df["z"].min(), df["z"].max()))
else:
    print("Z range: n/a")


Shape raw: (600, 12) clean: (600, 12)
Dtypes:
 x             float64
y             float64
z             float64
Hole ID           str
Samples_ID        str
Lote              str
domain            str
Minz              str
EM_new            str
EM                str
var           float64
Axb_SMC       float64
dtype: object
Rows removed (%): 0.00
X range: (np.float64(2363710.832), np.float64(2365405.149))
Y range: (np.float64(6485685.572631), np.float64(6487146.812069))
Z range: (np.float64(3104.889936), np.float64(3834.876096))


### 4. Validation


In [4]:
cv_df, metrics = simple_cross_validation(
    df,
    "x",
    "y",
    "var",
    radius=float(cfg["kriging"].get("search_radius", 150.0)),
    max_samples=int(cfg["kriging"].get("max_samples", 12)),
)

print(metrics)


{'ME': -0.007291392206040331, 'MAE': 1.724424903324948, 'RMSE': 2.5190891248328855}


### 5. Artifacts generated


In [5]:
expected_figures = []
expected_tables = []
expected_models = []

def _existing(paths):
    return [p for p in paths if os.path.exists(p)]

figure_paths = _existing(expected_figures)
table_paths = _existing(expected_tables)
model_paths = _existing(expected_models)

print("Figures:", [os.path.abspath(p) for p in figure_paths])
print("Tables:", [os.path.abspath(p) for p in table_paths])
print("Models:", [os.path.abspath(p) for p in model_paths])

for p in figure_paths:
    display(Image(filename=p))


Figures: []
Tables: []
Models: []
