# Notas

Este notebook se ejecut√≥ y desarroll√≥ en SageMaker Studio.

In [None]:
import requests
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import os
from typing import List, Dict

# Configuraci√≥n Global
plt.style.use('seaborn-v0_8-darkgrid')
START_DATE = "2010-01-01"

# -----------------------------------------------------------------------------
# 1. GESTOR DE CONFIGURACI√ìN (Mantenemos tu l√≥gica verificada)
# -----------------------------------------------------------------------------
class SeriesConfigLoader:
    def __init__(self):
        self.df_config = None
        files = os.listdir()
        csvs = [f for f in files if f.endswith('.csv')]
        xlsxs = [f for f in files if f.endswith('.xlsx') or f.endswith('.xls')]
        
        if csvs:
            try:
                self.df_config = pd.read_csv(csvs[0], sep=None, engine='python')
            except:
                self.df_config = pd.read_csv(csvs[0], sep=';', encoding='latin-1')
            print(f"üìÇ Config cargada desde CSV: {csvs[0]}")
        elif xlsxs:
            self.df_config = pd.read_excel(xlsxs[0])
            print(f"üìÇ Config cargada desde Excel: {xlsxs[0]}")
        else:
            raise FileNotFoundError("‚ùå Faltan archivos de configuraci√≥n (.csv/.xlsx)")

        self.df_config.columns = [str(c).lower().strip() for c in self.df_config.columns]

    def get_verified_codes(self) -> Dict[str, str]:
        # Mapeo estrat√©gico de c√≥digos
        target_map = {
            "IPV": "F034.IPV.FLU.BCCH.2008.0.T",
            "IMACEC": "F032.IMC.IND.Z.Z.EP18.Z.Z.0.M",
            "TPM": "F022.TPM.TIN.D001.NO.Z.D",
            "CONST_ACT": "F032.PIB.FLU.R.CLP.2018.06.Z.0.M", 
            "SERV_INM": "F032.PIB.FLU.R.CLP.2018.10.Z.0.M"
        }
        # Verificaci√≥n simple
        col_code = next((c for c in self.df_config.columns if "cod" in c), None)
        final_codes = {}
        if col_code:
            known_codes = set(self.df_config[col_code].astype(str).str.strip())
            for k, v in target_map.items():
                final_codes[k] = v if v in known_codes else v # Fallback seguro
        else:
            final_codes = target_map
        return final_codes

# -----------------------------------------------------------------------------
# 2. MOTOR DE INGESTA (API)
# -----------------------------------------------------------------------------
class BancoCentralClient:
    BASE_URL = "https://si3.bcentral.cl/SieteRestWS/SieteRestWS.ashx"
    def __init__(self, user, password):
        self.user = user
        self.password = password

    def get_data(self, series_dict, start_date):
        dfs = []
        print("\n--- Extrayendo Datos ---")
        for name, code in series_dict.items():
            try:
                params = {"user": self.user, "pass": self.password, "function": "GetSeries", "timeseries": code}
                r = requests.get(self.BASE_URL, params=params, timeout=10)
                if r.status_code == 200 and r.json()["Codigo"] == 0:
                    df = pd.DataFrame(r.json()["Series"]["Obs"])
                    df["indexDateString"] = pd.to_datetime(df["indexDateString"], format="%d-%m-%Y")
                    df["value"] = pd.to_numeric(df["value"], errors='coerce')
                    df = df.set_index("indexDateString")[["value"]]
                    df = df[df.index >= start_date].resample('ME').last()
                    df.columns = [name]
                    dfs.append(df)
            except:
                pass # Silencioso para no ensuciar output
        
        if not dfs: return pd.DataFrame()
        return pd.concat(dfs, axis=1).ffill().interpolate().dropna()

# -----------------------------------------------------------------------------
# 3. MOTOR PREDICTIVO AVANZADO (Escenarios Din√°micos)
# -----------------------------------------------------------------------------
class RealEstateForecaster:
    def __init__(self, target_col='IPV'):
        self.target_col = target_col
        # Aumentamos complejidad para capturar no-linealidades
        self.model = xgb.XGBRegressor(n_estimators=800, max_depth=6, learning_rate=0.03, subsample=0.8, random_state=42)
        self.last_real_val = None
        self.feature_cols = []

    def prepare_data(self, df):
        data = df.copy()
        target_diff = f"{self.target_col}_diff"
        data[target_diff] = data[self.target_col].diff()
        
        # Lags del Target
        for i in range(1, 4): 
            data[f"{target_diff}_lag{i}"] = data[target_diff].shift(i)
        
        # Lags de Ex√≥genas
        exogs = [c for c in df.columns if c != self.target_col]
        for c in exogs:
            data[f"{c}_lag1"] = data[c].shift(1)
            
        data = data.dropna()
        self.feature_cols = [c for c in data.columns if "lag" in c]
        self.last_real_val = df[self.target_col].iloc[-1]
        return data, target_diff

    def predict_scenario(self, last_row_features, months, scenario_params):
        """
        Simula el futuro alterando la econom√≠a paso a paso.
        scenario_params: dict con 'tpm_drift', 'imacec_growth', etc.
        """
        preds_diff = []
        curr_feats = last_row_features.copy()
        
        # Extraemos par√°metros del escenario
        tpm_drift = scenario_params.get('tpm_drift', 0.0)       # Cambio aditivo mensual en tasa
        macro_growth = scenario_params.get('macro_growth', 1.0) # Multiplicador mensual (1.002 = +0.2%)
        
        for _ in range(months):
            # 1. ACTUALIZACI√ìN MACROECON√ìMICA (Simulaci√≥n)
            # Antes de predecir, la econom√≠a cambia
            if "TPM_lag1" in curr_feats.index:
                # La tasa sube o baja poco a poco, con un piso de 0.5%
                curr_feats["TPM_lag1"] = max(0.5, curr_feats["TPM_lag1"] + tpm_drift)
            
            if "IMACEC_lag1" in curr_feats.index:
                curr_feats["IMACEC_lag1"] *= macro_growth
                
            if "CONST_ACT_lag1" in curr_feats.index:
                curr_feats["CONST_ACT_lag1"] *= macro_growth

            # 2. PREDUCCI√ìN DEL MODELO (Con la nueva econom√≠a)
            in_data = curr_feats[self.feature_cols].values.reshape(1, -1)
            pred_val = self.model.predict(in_data)[0]
            preds_diff.append(pred_val)
            
            # 3. ROTACI√ìN DE LAGS DEL TARGET
            if f"{self.target_col}_diff_lag1" in curr_feats.index:
                curr_feats[f"{self.target_col}_diff_lag3"] = curr_feats[f"{self.target_col}_diff_lag2"]
                curr_feats[f"{self.target_col}_diff_lag2"] = curr_feats[f"{self.target_col}_diff_lag1"]
                curr_feats[f"{self.target_col}_diff_lag1"] = pred_val
                
        # Reconstrucci√≥n de Niveles
        levels = []
        val = self.last_real_val
        for d in preds_diff:
            val += d
            levels.append(val)
        return levels

# -----------------------------------------------------------------------------
# 4. EJECUCI√ìN, VISUALIZACI√ìN Y "RAZONAMIENTO"
# -----------------------------------------------------------------------------
try:
    # A. Carga y Datos (Igual que antes)
    loader = SeriesConfigLoader()
    target_codes = loader.get_verified_codes()
    
    # --- CREDENCIALES ---
    USER_BC = "X" 
    PASS_BC = "X"
    
    client = BancoCentralClient(USER_BC, PASS_BC)
    df_history = client.get_data(target_codes, START_DATE)

    # Fallback Demo
    if df_history.empty:
        print("\n‚ö†Ô∏è Usando datos sint√©ticos (Modo Demo)")
        idx = pd.date_range(START_DATE, "2025-02-01", freq="ME")
        df_history = pd.DataFrame({
            "IPV": np.cumsum(np.random.normal(0.3, 0.6, len(idx))) + 100,
            "IMACEC": np.random.normal(100, 5, len(idx)),
            "TPM": np.clip(np.random.normal(4, 1.5, len(idx)), 0.5, 12),
            "CONST_ACT": np.random.normal(100, 10, len(idx)),
            "SERV_INM": np.random.normal(100, 2, len(idx))
        }, index=idx)

    # B. Entrenamiento
    forecaster = RealEstateForecaster("IPV")
    df_model, target_name = forecaster.prepare_data(df_history)
    X = df_model[forecaster.feature_cols]
    y = df_model[target_name]
    forecaster.model.fit(X, y)

    # --- NUEVO: RAZONAMIENTO DEL MODELO (Feature Importance) ---
    print("\n" + "="*60)
    print("AUDITOR√çA L√ìGICA DEL MODELO (¬øQu√© mueve la aguja?)")
    print("="*60)
    
    # Extraer importancia
    importance = forecaster.model.feature_importances_
    feats = forecaster.feature_cols
    df_imp = pd.DataFrame({'Feature': feats, 'Peso': importance}).sort_values('Peso', ascending=False)
    
    # Interpretaci√≥n autom√°tica
    top_driver = df_imp.iloc[0]
    print(f"1. Driver Principal: {top_driver['Feature']} (Peso: {top_driver['Peso']:.2%})")
    if "lag" in top_driver['Feature'] and "IPV" in top_driver['Feature']:
        print("   -> INTERPRETACI√ìN: El modelo es altamente 'Inercial'. El precio de ayer dicta el de hoy.")
        print("      Esto es normal en Real Estate, pero reduce la sensibilidad a cambios bruscos de tasa.")
    elif "TPM" in top_driver['Feature']:
        print("   -> INTERPRETACI√ìN: El modelo es sensible a las Tasas. ¬°Ojo a la pol√≠tica monetaria!")
    
    print("\nRanking Completo:")
    print(df_imp.head(5).to_string(index=False))

    # C. Proyecciones (Escenarios)
    scenarios = {
        "Base (Inercial)":     {"tpm_drift": 0.0,    "macro_growth": 1.0015}, 
        "Optimista (Boom)":    {"tpm_drift": -0.08,  "macro_growth": 1.004},  
        "Pesimista (Crisis)":  {"tpm_drift": +0.10,  "macro_growth": 0.998}   
    }
    
    results = {}
    last_row = df_model.iloc[-1]
    last_real_price = df_history['IPV'].iloc[-1]

    print("\n" + "="*60)
    print("AN√ÅLISIS DE ESCENARIOS (5 A√±os / 60 Meses)")
    print("="*60)

    for name, params in scenarios.items():
        # Ejecutar simulaci√≥n
        pred_levels = forecaster.predict_scenario(last_row, 60, params)
        results[name] = pred_levels
        
        # C√°lculo de Rentabilidad
        final_price = pred_levels[-1]
        total_return = (final_price / last_real_price) - 1
        cagr = (final_price / last_real_price)**(1/5) - 1 # Tasa anual compuesta
        
        # Auditor√≠a de Coherencia
        check = "‚úÖ L√≥gico"
        if name == "Optimista (Boom)" and total_return < 0:
            check = "‚ùå RARO (Deber√≠a subir)"
        elif name == "Pesimista (Crisis)" and total_return > 0.20:
            check = "‚ùå RARO (Demasiado optimista para crisis)"

        print(f"Escenario: {name.upper()}")
        print(f"   - Precio Final: {final_price:.2f} (Inicio: {last_real_price:.2f})")
        print(f"   - Plusval√≠a Total: {total_return:+.2%}")
        print(f"   - Crecimiento Anual (CAGR): {cagr:+.2%}")
        print(f"   - Supuestos: Tasa {'Baja' if params['tpm_drift'] < 0 else 'Sube'} | Econ {'Crece' if params['macro_growth'] > 1 else 'Cae'}")
        print(f"   - Veredicto Autom√°tico: {check}")
        print("-" * 30)

    # D. Visualizaci√≥n
    plt.figure(figsize=(14, 8))
    
    # Historia
    cut_idx = max(0, len(df_history) - 120) 
    plt.plot(df_history.index[cut_idx:], df_history['IPV'].iloc[cut_idx:], 
             label="Historia Real", color='black', linewidth=2, alpha=0.7)
    
    # Proyecciones
    dates_future = [df_history.index[-1] + pd.DateOffset(months=m+1) for m in range(60)]
    colors = {"Base (Inercial)": "blue", "Optimista (Boom)": "green", "Pesimista (Crisis)": "red"}
    
    for name, data in results.items():
        plot_dates = [df_history.index[-1]] + dates_future
        plot_vals = [df_history['IPV'].iloc[-1]] + data
        plt.plot(plot_dates, plot_vals, label=name, color=colors[name], linestyle='--', linewidth=2)
        plt.text(plot_dates[-1], plot_vals[-1], f"{data[-1]:.0f}", color=colors[name], fontweight='bold')

    plt.title(f"Proyecci√≥n IPV con Validaci√≥n L√≥gica\nDriver Principal: {top_driver['Feature']}", fontsize=14)
    plt.ylabel("√çndice de Precios (IPV)")
    plt.legend(loc="upper left")
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.tight_layout()
    plt.show()
    
    # Gr√°fico de Importancia (Extra)
    plt.figure(figsize=(10, 4))
    xgb.plot_importance(forecaster.model, max_num_features=10, importance_type='weight', title='Ranking de Variables (¬øQu√© mueve el precio?)')
    plt.tight_layout()
    plt.show()

except Exception as e:
    print(f"\n‚ùå ERROR FATAL: {e}")