In [1]:
# =====================================================
# üå≥ survival_source_lookup.ipynb
# WorldTreeSystem / MonthlyReport Extension
# Objetivo: identificar la fuente temporal ("Inventory YYYY")
# de las m√©tricas de supervivencia por contrato.
# =====================================================

from core.db import get_engine
from core.libs import pd, np, timestamp_now, Path
from core.paths import DATABASE_EXPORTS_DIR
import warnings
warnings.filterwarnings("ignore", category=UserWarning)


In [2]:
# -----------------------------------------------------
# ‚öôÔ∏è CONFIGURACI√ìN
# -----------------------------------------------------
OUT_DIR = DATABASE_EXPORTS_DIR / "out"
OUT_DIR.mkdir(exist_ok=True)
OUTPUT_MAP_CSV = OUT_DIR / "survival_metric_by_etp.csv"
OUTPUT_AGG_CSV = OUT_DIR / "survival_metric_summary_by_etp.csv"

engine = get_engine()
print(f"üß† Inicio del proceso: {timestamp_now()}")
print(f"üìÇ Directorio de salida: {OUT_DIR}")


üíª Conectado a la base de datos helloworldtree
üß† Inicio del proceso: 2025-11-06 14:55:22
üìÇ Directorio de salida: C:\Users\HeyCe\World Tree Technologies Inc\Operations - Documentos\WorldTreeSystem\DatabaseExports\out


In [3]:
# def columnas_csv(schema: str, table: str):
#     query = f"""
#     SELECT column_name
#     FROM information_schema.columns
#     WHERE table_schema = '{schema}'
#       AND table_name = '{table}'
#     ORDER BY ordinal_position
#     """
#     cols = pd.read_sql_query(query, engine)['column_name'].tolist()
#     joined = ", ".join(cols)
#     print(f"üß© {schema}.{table} ({len(cols)} columnas):\n")
#     print(joined)
#     return joined
#
# # === Ejemplo ===
# cols_sc = columnas_csv("masterdatabase", "survival_current")
# cols_cti = columnas_csv("masterdatabase", "contract_tree_information")

In [4]:
# -----------------------------------------------------
# 1Ô∏è‚É£ CARGAR TABLAS BASE
# -----------------------------------------------------
query_sc = """
SELECT contract_code, current_survival_pct, current_surviving_trees, survival_metric_source
FROM masterdatabase.survival_current
WHERE current_surviving_trees IS NOT NULL
"""
sc = pd.read_sql(query_sc, engine)

query_cti = """
SELECT contract_code, region, etp_year, status
FROM masterdatabase.contract_tree_information
"""
cti = pd.read_sql(query_cti, engine)

print(f"‚úÖ survival_current cargada: {len(sc)} filas")
print(f"‚úÖ contract_tree_information cargada: {len(cti)} filas")


‚úÖ survival_current cargada: 428 filas
‚úÖ contract_tree_information cargada: 453 filas


In [6]:
# -----------------------------------------------------
# 2Ô∏è‚É£ MERGE Y FORMATEO (usando survival_metric_source original)
# -----------------------------------------------------
df = sc.merge(cti, on="contract_code", how="left")

# Usamos la columna ya existente en survival_current
if "survival_metric_source" in df.columns:
    df["survival_metric_source"] = df["survival_metric_source"].fillna("Unknown Source")
else:
    # fallback: por si alg√∫n inventario no la tuviera
    df["survival_metric_source"] = "Unknown Source"

#print("\nüìã Ejemplo del DataFrame combinado:")
#display(df.head(5))


# -----------------------------------------------------
# 3Ô∏è‚É£ AGREGA (ETP ‚Üí fila por regi√≥n; columnas = status)
# -----------------------------------------------------
# Pivot con m√©tricas por status:
pivot = pd.pivot_table(
    df,
    index=["etp_year", "region"],
    columns="status",
    values={
        "contract_code": "nunique",            # contratos √∫nicos
        "current_surviving_trees": "sum",      # √°rboles vivos totales
        "current_survival_pct": "mean",        # % supervivencia promedio
    },
    fill_value=0,
    aggfunc={
        "contract_code": pd.Series.nunique,
        "current_surviving_trees": "sum",
        "current_survival_pct": "mean",
    },
    observed=False,
)

# Aplanar nombres de columnas: <metrica>__<status>
pivot.columns = [f"{m}__{s}" for m, s in pivot.columns]
pivot = pivot.reset_index()

# (Opcional) Totales por regi√≥n (independientes de status) para QA/consumo r√°pido
totales = (
    df.groupby(["etp_year", "region"], dropna=False)
      .agg(
          total_contracts=("contract_code", "nunique"),
          total_surviving_trees=("current_surviving_trees", "sum"),
          mean_survival_pct=("current_survival_pct", "mean"),
      )
      .reset_index()
)

# Unir totales a la tabla pivot
agg = totales.merge(pivot, on=["etp_year", "region"], how="left")

print("\nüìä Resumen (ETP ‚Üí fila por regi√≥n; columnas = status):")
display(agg.head(10))



üìä Resumen (ETP ‚Üí fila por regi√≥n; columnas = status):


Unnamed: 0,etp_year,region,total_contracts,total_surviving_trees,mean_survival_pct,contract_code__Active,contract_code__Out of Program,contract_code__RFT,contract_code__Rescinded,current_survival_pct__Active,current_survival_pct__Out of Program,current_survival_pct__RFT,current_survival_pct__Rescinded,current_surviving_trees__Active,current_surviving_trees__Out of Program,current_surviving_trees__RFT,current_surviving_trees__Rescinded
0,2015,Costa Rica,12,1474,0.055825,0.0,12.0,0.0,0.0,0.0,0.055825,0.0,0.0,0.0,1474.0,0.0,0.0
1,2015,USA,13,4530,0.227455,5.0,7.0,1.0,0.0,0.535383,0.04,0.0,0.0,4072.0,458.0,0.0,0.0
2,2016,Costa Rica,27,2913,0.098273,1.0,26.0,0.0,0.0,0.623377,0.078077,0.0,0.0,623.0,2290.0,0.0,0.0
3,2016,USA,18,8930,0.386551,7.0,9.0,2.0,0.0,0.762207,0.08,0.45123,0.0,6681.0,797.0,1452.0,0.0
4,2017,USA,1,6486,0.415235,1.0,0.0,0.0,0.0,0.415235,0.0,0.0,0.0,6486.0,0.0,0.0,0.0
5,2018,Costa Rica,12,7199,0.267944,1.0,10.0,1.0,0.0,0.70533,0.189,0.62,0.0,1411.0,5169.0,619.0,0.0
6,2018,Guatemala,11,9158,0.304569,4.0,7.0,0.0,0.0,0.792565,0.025714,0.0,0.0,8974.0,184.0,0.0,0.0
7,2018,Mexico,6,8160,0.198333,0.0,4.0,2.0,0.0,0.0,0.0,0.595,0.0,0.0,0.0,8160.0,0.0
8,2018,USA,20,14606,0.526448,13.0,7.0,0.0,0.0,0.809921,0.0,0.0,0.0,14606.0,0.0,0.0,0.0
9,2020,Costa Rica,35,40590,0.625429,20.0,14.0,1.0,0.0,0.8045,0.355,0.83,0.0,24784.0,12071.0,3735.0,0.0


In [8]:
# # -----------------------------------------------------
# # üß© CONTRATOS POR INVENTARIO / REGI√ìN / ETP
# # usando compute_allocation_type_contract
# # -----------------------------------------------------
# from MonthlyReport.tables_process import compute_allocation_type_contract
#
# # Calcular tipo de asignaci√≥n (COP / ETP / COP/ETP)
# df["allocation_type"] = compute_allocation_type_contract(df)
#
# # Contar contratos por grupo
# contracts_summary = (
#     df.groupby(["etp_year", "region", "allocation_type", "survival_metric_source"], dropna=False)
#       .agg(contract_count=("contract_code", "nunique"))
#       .reset_index()
#       .sort_values(["etp_year", "region", "allocation_type", "survival_metric_source"])
# )
#
# print("\nüìä Contratos por ETP / Regi√≥n / Fuente de inventario:")
# display(contracts_summary.head(10))
#
# # Export opcional
# #contracts_summary.to_csv(OUT_DIR / "contracts_by_inventory_source.csv", index=False)



üìä Contratos por ETP / Regi√≥n / Fuente de inventario:


Unnamed: 0,etp_year,region,allocation_type,survival_metric_source,contract_count
0,2015,Costa Rica,,2024 Survival% Inventory + Estimate,1
1,2015,Costa Rica,,master 31/12/2024,11
2,2015,USA,,2024 Survival% Inventory + Estimate,4
3,2015,USA,,Inventory 2025,3
4,2015,USA,,as of 31/12/2023,1
5,2015,USA,,master 31/12/2024,5
6,2016,Costa Rica,,2024 Survival% Inventory + Estimate,3
7,2016,Costa Rica,,Inventory 2025,1
8,2016,Costa Rica,,master 31/12/2024,23
9,2016,USA,,2024 Survival% Inventory + Estimate,2


In [11]:
# -----------------------------------------------------
# üß© CONTRATOS POR INVENTARIO / REGI√ìN / ETP
# usando compute_allocation_type_contract
# -----------------------------------------------------
from MonthlyReport.tables_process import compute_allocation_type_contract

# Calcular tipo de asignaci√≥n (COP / ETP / COP/ETP)
df["allocation_type"] = compute_allocation_type_contract(df)

# Contar contratos por grupo
contracts_summary = (
    df.groupby(["etp_year", "region", "allocation_type", "survival_metric_source"], dropna=False)
      .agg(contract_count=("contract_code", "nunique"))
      .reset_index()
      .sort_values(["etp_year", "region", "allocation_type", "survival_metric_source"])
)

print("\nüìä Contratos por ETP / Regi√≥n / Fuente de inventario:")
display(contracts_summary.head(10))

# Export opcional
#contracts_summary.to_csv(OUT_DIR / "contracts_by_inventory_source.csv", index=False)



üìä Contratos por ETP / Regi√≥n / Fuente de inventario:


Unnamed: 0,etp_year,region,allocation_type,survival_metric_source,contract_count
0,2015,Costa Rica,,2024 Survival% Inventory + Estimate,1
1,2015,Costa Rica,,master 31/12/2024,11
2,2015,USA,,2024 Survival% Inventory + Estimate,4
3,2015,USA,,Inventory 2025,3
4,2015,USA,,as of 31/12/2023,1
5,2015,USA,,master 31/12/2024,5
6,2016,Costa Rica,,2024 Survival% Inventory + Estimate,3
7,2016,Costa Rica,,Inventory 2025,1
8,2016,Costa Rica,,master 31/12/2024,23
9,2016,USA,,2024 Survival% Inventory + Estimate,2
