# 07 - Validation
Proposito: CV de kriging (LOO o K-fold espacial), métricas y swath plots por dominio.

Inputs:
- `config/project.json`
- CSV en `cfg["data_csv_path"]`

Outputs esperados:
- Tabla de métricas globales y por dominio.
- Swath plots por dominio (X, Y, ejes principales).


### 1. Project setup


In [None]:
import os, sys, json
import pandas as pd
import matplotlib.pyplot as plt

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
if os.path.basename(os.getcwd()) == "notebooks":
    os.chdir(PROJECT_ROOT)
else:
    PROJECT_ROOT = os.getcwd()
    os.chdir(PROJECT_ROOT)

if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

print("PROJECT_ROOT:", PROJECT_ROOT)
print("CWD:", os.getcwd())

from src.preprocess import load_and_preprocess
from src.variography import experimental_variogram_2d, fit_variogram_model
from src.geostats_pipeline.validation import (
    kriging_cross_validation,
    compute_cv_metrics,
    plot_swath_panels,
)


In [None]:
cfg_path = "config/project.json"
print("Config:", os.path.abspath(cfg_path))
with open(cfg_path, "r", encoding="utf-8") as f:
    cfg = json.load(f)

print("Data CSV:", cfg["data_csv_path"])
mapping = {
    "x": cfg["columns"].get("x"),
    "y": cfg["columns"].get("y"),
    "z": cfg["columns"].get("z"),
    "var": cfg["columns"].get("variable_objetivo"),
    "domain": cfg["columns"].get("domain"),
}
print("Column mapping:", mapping)


### 2. Load data


In [None]:
df, df_raw, mapping_out = load_and_preprocess(cfg)
print(df.head())
print("Rows:", len(df))


### 3. Variograma rápido (para CV de kriging)


In [None]:
vario_cfg = cfg["variogram"]
exp = experimental_variogram_2d(df, "x", "y", "var", vario_cfg)
vario_model = fit_variogram_model(exp, data_variance=float(df["var"].var()))
vario_model


### 4. Kriging CV (LOO o K-fold espacial)


In [None]:
cv_result = kriging_cross_validation(
    df,
    "x",
    "y",
    "var",
    vario=vario_model,
    params=cfg["kriging"],
    method="kfold",
    n_splits=5,
)
cv_df = cv_result.data
metrics = cv_result.metrics
metrics


### 5. Métricas por dominio


In [None]:
domain_col = "domain"
if domain_col in cv_df.columns:
    metrics_by_domain = (
        cv_df.groupby(domain_col, observed=True)
        .apply(lambda group: pd.Series(compute_cv_metrics(group, vcol="var")))
        .reset_index()
    )
    metrics_by_domain
else:
    print("No hay columna domain en los datos estandarizados.")


### 6. Swath plots por dominio


In [None]:
if domain_col in cv_df.columns:
    fig = plot_swath_panels(cv_df, "x", "y", "var", domain_col, n_bins=12)
    plt.show()
else:
    print("No hay columna domain para swath plots.")
