In [4]:
import pandas as pd
import numpy as np
import re
from pathlib import Path

RESULTADO_DIR = Path(r"E:\proyecto_agric_precision\Dataset de Imagenes\ortomosaicos_canales\resultado")
MEAS_DIR = Path(r"E:\proyecto_agric_precision\Dataset de Imagenes\ortomosaicos_canales\measurements")

YIELD_RAW = MEAS_DIR / "Solynta_field_2025_GT_yield.csv"
yraw = pd.read_csv(YIELD_RAW)

# --- plot_id desde obsUnitId: toma SOLO el número después de la P ---
def obs_to_plotid(x):
    if pd.isna(x):
        return np.nan
    s = str(x).strip()
    m = re.search(r"P\s*0*(\d+)\s*$", s, re.IGNORECASE)   # captura P1, P01, P001...
    return int(m.group(1)) if m else np.nan

yraw["plot_id"] = yraw["obsUnitId"].apply(obs_to_plotid)

# rendimiento total (kg/m2)
yield_col = "tubwght_total_kgm-2"
yraw["yield_kgm2"] = pd.to_numeric(yraw[yield_col], errors="coerce")
yraw["yield_t_ha"] = yraw["yield_kgm2"] * 10.0  # 1 kg/m2 = 10 t/ha

# filtrar plots válidos
y2 = yraw.dropna(subset=["plot_id", "yield_kgm2"]).copy()
y2["plot_id"] = y2["plot_id"].astype(int)

# resumir por plot
yield_by = (y2.groupby("plot_id", as_index=False)[["yield_kgm2","yield_t_ha"]]
              .mean()
              .sort_values("plot_id"))

OUT10 = RESULTADO_DIR / "PASO_10_yield_2025"
OUT10.mkdir(parents=True, exist_ok=True)
out_yield = OUT10 / "yield_by_plot_2025.csv"
yield_by.to_csv(out_yield, index=False)

print("✅ Guardado:", out_yield)
print("plots únicos:", yield_by["plot_id"].nunique(), "| rango:", yield_by["plot_id"].min(), "-", yield_by["plot_id"].max())
print("plot_id NaN en yield raw:", yraw["plot_id"].isna().sum())
display(yield_by.head())


✅ Guardado: E:\proyecto_agric_precision\Dataset de Imagenes\ortomosaicos_canales\resultado\PASO_10_yield_2025\yield_by_plot_2025.csv
plots únicos: 202 | rango: 1 - 202
plot_id NaN en yield raw: 0


Unnamed: 0,plot_id,yield_kgm2,yield_t_ha
0,1,3.286667,32.866667
1,2,4.936296,49.362963
2,3,2.993333,29.933333
3,4,2.379487,23.794872
4,5,4.164444,41.644444


In [5]:
import pandas as pd
from pathlib import Path

RESULTADO_DIR = Path(r"E:\proyecto_agric_precision\Dataset de Imagenes\ortomosaicos_canales\resultado")

X_FILE = RESULTADO_DIR / "PASO_09_temporal_3fechas_obsUnitId" / "dataset_wide_3dates.csv"
Y_FILE = RESULTADO_DIR / "PASO_10_yield_2025" / "yield_by_plot_2025.csv"

X = pd.read_csv(X_FILE)
Y = pd.read_csv(Y_FILE)

common = set(X["plot_id"]).intersection(set(Y["plot_id"]))
print("X plots:", X["plot_id"].nunique(), "| Y plots:", Y["plot_id"].nunique())
print("✅ Plots en común:", len(common))


X plots: 202 | Y plots: 202
✅ Plots en común: 202


In [6]:
from pathlib import Path

OUT11 = RESULTADO_DIR / "PASO_11_dataset_final"
OUT11.mkdir(parents=True, exist_ok=True)

df = X.merge(Y, on="plot_id", how="inner")

print("✅ Dataset final:", df.shape)
print("plots con yield:", df["plot_id"].nunique())
display(df.head())

out_csv = OUT11 / "dataset_Xy_3dates_yield2025.csv"
out_xlsx = OUT11 / "dataset_Xy_3dates_yield2025.xlsx"
df.to_csv(out_csv, index=False)
df.to_excel(out_xlsx, index=False)

print("✅ Guardado:", out_csv)
print("✅ Guardado:", out_xlsx)


✅ Dataset final: (202, 25)
plots con yield: 202


Unnamed: 0,plot_id,obsUnitId,CHLGR_mean_20251002,CHLGR_mean_20251103,CHLGR_mean_20251210,MARI_mean_20251002,MARI_mean_20251103,MARI_mean_20251210,NDVI_mean_20251002,NDVI_mean_20251103,...,SAVI2_mean_20251103,SAVI2_mean_20251210,SR_mean_20251002,SR_mean_20251103,SR_mean_20251210,VF_20251002,VF_20251103,VF_20251210,yield_kgm2,yield_t_ha
0,1,1,4.458802,5.138012,2.853492,5.038614,7.177726,4.443617,0.876604,0.88683,...,0.0039,0.003851346,15.36269,17.003988,6.728218,0.234843,0.346904,0.091534,3.286667,32.866667
1,2,2,5.206114,4.734136,3.428803,6.521187,5.317302,3.839409,0.899277,0.852209,...,0.004334,0.00495971,19.509604,12.621037,9.198384,0.156293,0.345024,0.096375,4.936296,49.362963
2,3,3,5.469518,6.041914,3.069994,5.774983,7.422002,27505.001953,0.891328,0.844865,...,0.004433,6.130523e-07,17.635601,11.901986,8.733396,0.21133,0.317738,0.134983,2.993333,29.933333
3,4,4,5.090737,2.946303,2.614598,5.251598,2.87374,24947.978516,0.883821,0.688718,...,0.004439,5.749103e-07,16.541691,5.425055,6.849193,0.158639,0.111688,0.095709,2.379487,23.794872
4,5,5,4.79189,6.231022,3.508522,4.923483,7.333121,37030.667969,0.873134,0.807354,...,0.004632,5.173079e-07,15.007159,9.394213,9.851377,0.216232,0.382168,0.121769,4.164444,41.644444


✅ Guardado: E:\proyecto_agric_precision\Dataset de Imagenes\ortomosaicos_canales\resultado\PASO_11_dataset_final\dataset_Xy_3dates_yield2025.csv
✅ Guardado: E:\proyecto_agric_precision\Dataset de Imagenes\ortomosaicos_canales\resultado\PASO_11_dataset_final\dataset_Xy_3dates_yield2025.xlsx
