In [1]:
# ================================================================================
# TIMESFM h=1 USD/PEN FORECASTING - LOCAL (VS CODE)
# ================================================================================
# Modelo: TimesFM (Google) - 200M parameters
# Dataset: DATA.csv (USD/PEN daily)
# Target: PEN_log_ret[t] (h=1 one-step-ahead)
# Environment: Jupyter Notebook - VS Code - Windows
# ================================================================================

# ================================================================================
# CELDA 1: IMPORTS Y CONFIGURACI√ìN
# ================================================================================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
import logging
from datetime import datetime
from typing import Tuple, Dict, List, Optional
import warnings
warnings.filterwarnings('ignore')

# TimesFM espec√≠fico
import timesfm
import torch

# Configuraci√≥n de logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

# Configuraci√≥n de visualizaci√≥n
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Semilla aleatoria
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
torch.manual_seed(RANDOM_STATE)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(RANDOM_STATE)

logger.info("=" * 80)
logger.info("TIMESFM USD/PEN FORECASTING - LOCAL WINDOWS")
logger.info("=" * 80)
logger.info(f"PyTorch version: {torch.__version__}")
logger.info(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    logger.info(f"CUDA device: {torch.cuda.get_device_name(0)}")
logger.info("=" * 80)


 See https://github.com/google-research/timesfm/blob/master/README.md for updated APIs.


2025-11-26 15:19:51 - INFO - TIMESFM USD/PEN FORECASTING - LOCAL WINDOWS
2025-11-26 15:19:51 - INFO - PyTorch version: 2.5.1+cu121
2025-11-26 15:19:51 - INFO - CUDA available: True
2025-11-26 15:19:51 - INFO - CUDA device: NVIDIA GeForce RTX 2060


Loaded PyTorch TimesFM, likely because python version is 3.10.19 | packaged by Anaconda, Inc. | (main, Oct 21 2025, 16:41:31) [MSC v.1929 64 bit (AMD64)].


In [2]:
# %%
# ================================================================================
# CELDA 2: CONSTANTES Y CONFIGURACI√ìN - RUTAS WINDOWS LOCAL
# ================================================================================

# ============================================================================
# üî¥ RUTAS WINDOWS - AJUSTAR SI ES NECESARIO
# ============================================================================
BASE_DIR = Path(r"C:\Users\Carlos Palma\OneDrive\Documents\Cursos\UTEC Computer Science\TESIS\NUEVO PAPER\tesis_maestria")
DATA_PATH = BASE_DIR / "DATA.csv"
OUTPUT_DIR = BASE_DIR / "TimesFM_h1_USD_PEN"
CHECKPOINT_DIR = OUTPUT_DIR / "checkpoints"
PREDICTIONS_DUMP = BASE_DIR / "predictions_dump"
OOF_DIR = BASE_DIR / "oof_predictions"

# Crear directorios
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)
PREDICTIONS_DUMP.mkdir(parents=True, exist_ok=True)
OOF_DIR.mkdir(parents=True, exist_ok=True)

# ============================================================================
# ‚òÖ‚òÖ‚òÖ CONFIGURACI√ìN CORREGIDA: TARGET EN NIVELES ‚òÖ‚òÖ‚òÖ
# ============================================================================
TARGET_COL = 'PEN'              # ‚Üê NIVELES (antes era 'PEN_log_ret')
FREQ = 'D'
N_HOLDOUT = 60                  # üìå EXACTAMENTE 60 D√çAS
H_FORECAST = 1                  # üìå HORIZONTE h=1
MIN_TRAIN = 252                 # M√≠nimo 1 a√±o de entrenamiento
STEP_SIZE = 21                  # Para walk-forward OOF

# TimesFM Configuration
TIMESFM_CONFIG = {
    'model_id': 'google/timesfm-1.0-200m-pytorch',
    'context_len': 512,         # 2 a√±os de datos diarios
    'horizon_len': H_FORECAST,
    'batch_size': 32,
    'backend': 'gpu' if torch.cuda.is_available() else 'cpu'
}

# Baselines de referencia
BASELINE_ARX = {'DA': 51.67, 'MASE': 0.9398}

# Run ID
RUN_ID = datetime.now().strftime("%Y%m%d_%H%M%S")

logger.info("Configuration loaded - VERSI√ìN NIVELES:")
logger.info(f"  Target: {TARGET_COL} (NIVELES)")
logger.info(f"  Base dir: {BASE_DIR}")
logger.info(f"  N_HOLDOUT: {N_HOLDOUT}")
logger.info(f"  H_FORECAST: {H_FORECAST}")
logger.info(f"  TimesFM model: {TIMESFM_CONFIG['model_id']}")
logger.info(f"  Context length: {TIMESFM_CONFIG['context_len']}")
logger.info(f"  Backend: {TIMESFM_CONFIG['backend']}")



2025-11-26 15:19:51 - INFO - Configuration loaded - VERSI√ìN NIVELES:
2025-11-26 15:19:51 - INFO -   Target: PEN (NIVELES)
2025-11-26 15:19:51 - INFO -   Base dir: C:\Users\Carlos Palma\OneDrive\Documents\Cursos\UTEC Computer Science\TESIS\NUEVO PAPER\tesis_maestria
2025-11-26 15:19:51 - INFO -   N_HOLDOUT: 60
2025-11-26 15:19:51 - INFO -   H_FORECAST: 1
2025-11-26 15:19:51 - INFO -   TimesFM model: google/timesfm-1.0-200m-pytorch
2025-11-26 15:19:51 - INFO -   Context length: 512
2025-11-26 15:19:51 - INFO -   Backend: gpu


In [3]:
# %%
# ================================================================================
# CELDA 3: FXMetrics CLASS - ‚òÖ‚òÖ‚òÖ ADAPTADA PARA NIVELES‚ÜíRETORNOS ‚òÖ‚òÖ‚òÖ
# ================================================================================

class FXMetrics:
    """M√©tricas para evaluaci√≥n - Convierte niveles a retornos para c√°lculo"""
    
    @staticmethod
    def directional_accuracy_from_levels(y_true_levels: np.ndarray, 
                                          y_pred_levels: np.ndarray,
                                          y_prev_levels: np.ndarray) -> float:
        """
        Calcular DA a partir de predicciones en NIVELES.
        
        Args:
            y_true_levels: Niveles reales [t]
            y_pred_levels: Niveles predichos [t]
            y_prev_levels: Niveles del d√≠a anterior [t-1]
            
        Returns:
            DA en porcentaje (0-100)
        """
        # Convertir a retornos
        true_returns = np.log(y_true_levels / y_prev_levels)
        pred_returns = np.log(y_pred_levels / y_prev_levels)
        
        # Direcciones
        true_dir = np.sign(true_returns)
        pred_dir = np.sign(pred_returns)
        
        # Accuracy
        correct = np.sum(true_dir == pred_dir)
        total = len(true_dir)
        
        return (correct / total) * 100
    
    @staticmethod
    def mase_from_levels(y_true_levels: np.ndarray,
                         y_pred_levels: np.ndarray,
                         y_prev_levels: np.ndarray,
                         train_levels: np.ndarray) -> float:
        """
        Calcular MASE a partir de predicciones en NIVELES.
        
        Args:
            y_true_levels: Niveles reales holdout
            y_pred_levels: Niveles predichos
            y_prev_levels: Niveles d√≠a anterior
            train_levels: Niveles del train (para naive MAE)
        """
        # Convertir a retornos
        true_returns = np.log(y_true_levels / y_prev_levels)
        pred_returns = np.log(y_pred_levels / y_prev_levels)
        
        # MAE del modelo
        model_mae = np.mean(np.abs(true_returns - pred_returns))
        
        # MAE del naive (random walk en retornos)
        train_returns = np.log(train_levels[1:] / train_levels[:-1])
        naive_mae = np.mean(np.abs(train_returns))
        
        return model_mae / naive_mae if naive_mae > 0 else np.inf
    
    @staticmethod
    def mae_from_levels(y_true_levels: np.ndarray,
                        y_pred_levels: np.ndarray,
                        y_prev_levels: np.ndarray) -> float:
        """MAE en espacio de retornos"""
        true_returns = np.log(y_true_levels / y_prev_levels)
        pred_returns = np.log(y_pred_levels / y_prev_levels)
        return np.mean(np.abs(true_returns - pred_returns))

    # Versiones legacy para compatibilidad (cuando ya tienes retornos)
    @staticmethod
    def calculate_da(y_true: np.ndarray, y_pred: np.ndarray) -> float:
        """DA directo con niveles (usando diferencias)"""
        if len(y_true) != len(y_pred):
            raise ValueError(f"Length mismatch: y_true={len(y_true)}, y_pred={len(y_pred)}")
        
        direction_true = np.sign(np.diff(y_true))
        direction_pred = np.sign(np.diff(y_pred))
        correct = np.sum(direction_true == direction_pred)
        total = len(direction_true)
        
        return (correct / total) * 100
    
    @staticmethod
    def calculate_mase(y_true: np.ndarray, y_pred: np.ndarray, train_values: np.ndarray) -> float:
        """MASE directo"""
        mae = np.mean(np.abs(y_true - y_pred))
        naive_mae = np.mean(np.abs(np.diff(train_values)))
        return mae / naive_mae if naive_mae > 0 else np.inf
    
    @staticmethod
    def calculate_mae(y_true: np.ndarray, y_pred: np.ndarray) -> float:
        """MAE simple"""
        return np.mean(np.abs(y_true - y_pred))

logger.info("‚úì FXMetrics class defined (con conversi√≥n niveles‚Üíretornos)")


2025-11-26 15:19:51 - INFO - ‚úì FXMetrics class defined (con conversi√≥n niveles‚Üíretornos)


In [4]:
# %%
# ================================================================================
# CELDA 4: TimesFMConverter CLASS
# ================================================================================

class TimesFMConverter:
    """Convierte datos a formato TimesFM"""
    
    @staticmethod
    def convert_to_timesfm_df(df: pd.DataFrame, 
                              value_col: str,
                              series_id: str = 'series',
                              freq: str = 'D') -> pd.DataFrame:
        """
        Convierte DataFrame a formato TimesFM.
        
        Args:
            df: DataFrame con DatetimeIndex
            value_col: Columna con valores (ahora NIVELES)
            series_id: Identificador de la serie
            freq: Frecuencia ('D' para diario)
            
        Returns:
            DataFrame con columnas: unique_id, ds, y
        """
        timesfm_df = pd.DataFrame({
            'unique_id': series_id,
            'ds': df.index,
            'y': df[value_col].values
        })
        
        logger.info(f"Converted to TimesFM format:")
        logger.info(f"  Rows: {len(timesfm_df)}")
        logger.info(f"  Value column: {value_col} (NIVELES)")
        logger.info(f"  Y range: [{timesfm_df['y'].min():.4f}, {timesfm_df['y'].max():.4f}]")
        
        return timesfm_df
    
    @staticmethod
    def split_timesfm_data(timesfm_df: pd.DataFrame,
                          n_holdout: int) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Split TimesFM DataFrame en train/holdout"""
        split_idx = len(timesfm_df) - n_holdout
        
        train_df = timesfm_df.iloc[:split_idx].copy()
        holdout_df = timesfm_df.iloc[split_idx:].copy()
        
        logger.info(f"Split data:")
        logger.info(f"  Train: {len(train_df)} observations")
        logger.info(f"  Holdout: {len(holdout_df)} observations")
        
        return train_df, holdout_df

logger.info("‚úì TimesFMConverter class defined")


2025-11-26 15:19:51 - INFO - ‚úì TimesFMConverter class defined


In [5]:
# %%
# ================================================================================
# CELDA 5: TimesFMEvaluator CLASS - ‚òÖ‚òÖ‚òÖ PREDICE NIVELES ‚òÖ‚òÖ‚òÖ
# ================================================================================

class TimesFMEvaluator:
    """Evaluaci√≥n de TimesFM con rolling forecast - VERSI√ìN NIVELES"""
    
    def __init__(self, 
                 model,
                 train_df: pd.DataFrame,
                 holdout_df: pd.DataFrame,
                 full_df: pd.DataFrame):
        """
        Args:
            model: TimesFM model instance
            train_df: Train data (TimesFM format)
            holdout_df: Holdout data (TimesFM format)
            full_df: Full data (TimesFM format) = train + holdout
        """
        self.model = model
        self.train_df = train_df
        self.holdout_df = holdout_df
        self.full_df = full_df
        
    def rolling_forecast(self) -> Dict:
        """
        Rolling forecast h=1 en holdout - PREDICE NIVELES
        
        ============================================================
        METODOLOG√çA CORRECTA - ROLLING FORECAST SIN REENTRENAMIENTO
        ============================================================
        
        ‚úÖ V√ÅLIDO: fit(train) ‚Üí predict(train + holdout_observed_values)
        ‚ùå LEAKAGE: fit(train + holdout) ‚Üí predict(...)
        
        Para cada d√≠a i en holdout:
        - Contexto = train + holdout[:i] (valores REALES como INPUT)
        - Predice d√≠a i+1 (SOLO INFERENCE, NO RETRAINING)
        
        El modelo ahora predice NIVELES directamente.
        
        Returns:
            Dict con predictions (niveles) y metadata
        """
        logger.info("=" * 80)
        logger.info("ROLLING FORECAST (TimesFM - NIVELES)")
        logger.info("=" * 80)
        
        predictions_levels = []
        n_holdout = len(self.holdout_df)
        
        logger.info(f"Generating {n_holdout} forecasts (NIVELES)...")
        logger.info(f"üìå Model is PRE-TRAINED (no retraining during forecast)")
        logger.info(f"üìå Predicting LEVELS (PEN ‚âà 3.64), not returns")
        
        for i in range(n_holdout):
            if i % 10 == 0:
                logger.info(f"  Forecasting day {i+1}/{n_holdout}...")
            
            # Contexto: train + holdout hasta i (valores reales como INPUT)
            context_length = len(self.train_df) + i
            context_df = self.full_df.iloc[:context_length].copy()
            
            try:
                # Forecast con TimesFM (SOLO INFERENCE, NO RETRAINING)
                # Ahora predice NIVELES
                forecast_df = self.model.forecast_on_df(
                    inputs=context_df,
                    freq="D",
                    value_name="y",
                    num_jobs=-1,
                    forecast_context_len=min(TIMESFM_CONFIG['context_len'], len(context_df))
                )
                
                # Extraer predicci√≥n h=1 (NIVEL)
                if len(forecast_df) > 0 and 'timesfm' in forecast_df.columns:
                    pred_level = forecast_df['timesfm'].iloc[0]
                    predictions_levels.append(float(pred_level))
                else:
                    logger.warning(f"Empty or invalid forecast at step {i}")
                    # Usar √∫ltimo valor conocido como fallback
                    predictions_levels.append(float(context_df['y'].iloc[-1]))
                    
            except Exception as e:
                logger.error(f"Error at step {i}: {str(e)}")
                # Usar √∫ltimo valor conocido como fallback
                predictions_levels.append(float(context_df['y'].iloc[-1]))
        
        predictions_levels = np.array(predictions_levels)
        
        logger.info(f"‚úì Forecasts generated: {len(predictions_levels)} predictions (NIVELES)")
        logger.info(f"  Predictions range: [{predictions_levels.min():.4f}, {predictions_levels.max():.4f}]")
        logger.info(f"  Predictions mean: {predictions_levels.mean():.4f}")
        logger.info(f"  Predictions std: {predictions_levels.std():.4f}")
        
        # üìå VALIDACI√ìN CR√çTICA
        if len(predictions_levels) != N_HOLDOUT:
            raise ValueError(f"Expected {N_HOLDOUT} predictions, got {len(predictions_levels)}")
        
        return {
            'predictions_levels': predictions_levels,
            'n_predictions': len(predictions_levels)
        }

logger.info("‚úì TimesFMEvaluator class defined (VERSI√ìN NIVELES)")


2025-11-26 15:19:51 - INFO - ‚úì TimesFMEvaluator class defined (VERSI√ìN NIVELES)


In [6]:
# %%
# ================================================================================
# CELDA 6: CheckpointManager CLASS
# ================================================================================

class CheckpointManager:
    """Sistema de checkpoints para recuperaci√≥n"""
    
    def __init__(self, checkpoint_dir: Path):
        self.checkpoint_dir = checkpoint_dir
        self.checkpoint_dir.mkdir(parents=True, exist_ok=True)
        
    def save_checkpoint(self, name: str, data: Dict) -> None:
        """Guardar checkpoint"""
        checkpoint_path = self.checkpoint_dir / f"{name}.json"
        
        # Convertir numpy arrays a listas para JSON
        serializable_data = {}
        for key, value in data.items():
            if isinstance(value, np.ndarray):
                serializable_data[key] = value.tolist()
            else:
                serializable_data[key] = value
        
        with open(checkpoint_path, 'w') as f:
            json.dump(serializable_data, f, indent=2)
        
        logger.info(f"‚úì Checkpoint saved: {name}")
    
    def load_checkpoint(self, name: str) -> Dict:
        """Cargar checkpoint"""
        checkpoint_path = self.checkpoint_dir / f"{name}.json"
        
        if not checkpoint_path.exists():
            raise FileNotFoundError(f"Checkpoint not found: {name}")
        
        with open(checkpoint_path, 'r') as f:
            data = json.load(f)
        
        logger.info(f"‚úì Checkpoint loaded: {name}")
        return data
    
    def checkpoint_exists(self, name: str) -> bool:
        """Verificar si checkpoint existe"""
        checkpoint_path = self.checkpoint_dir / f"{name}.json"
        return checkpoint_path.exists()

checkpoint_manager = CheckpointManager(CHECKPOINT_DIR)
logger.info("‚úì CheckpointManager initialized")

2025-11-26 15:19:51 - INFO - ‚úì CheckpointManager initialized


In [7]:
# ================================================================================
# CELDA 6: CheckpointManager CLASS
# ================================================================================

class CheckpointManager:
    """Sistema de checkpoints para recuperaci√≥n"""
    
    def __init__(self, checkpoint_dir: Path):
        self.checkpoint_dir = checkpoint_dir
        self.checkpoint_dir.mkdir(parents=True, exist_ok=True)
        
    def save_checkpoint(self, name: str, data: Dict) -> None:
        """Guardar checkpoint"""
        checkpoint_path = self.checkpoint_dir / f"{name}.json"
        
        # Convertir numpy arrays a listas para JSON
        serializable_data = {}
        for key, value in data.items():
            if isinstance(value, np.ndarray):
                serializable_data[key] = value.tolist()
            else:
                serializable_data[key] = value
        
        with open(checkpoint_path, 'w') as f:
            json.dump(serializable_data, f, indent=2)
        
        logger.info(f"‚úì Checkpoint saved: {name}")
    
    def load_checkpoint(self, name: str) -> Dict:
        """Cargar checkpoint"""
        checkpoint_path = self.checkpoint_dir / f"{name}.json"
        
        if not checkpoint_path.exists():
            raise FileNotFoundError(f"Checkpoint not found: {name}")
        
        with open(checkpoint_path, 'r') as f:
            data = json.load(f)
        
        logger.info(f"‚úì Checkpoint loaded: {name}")
        return data
    
    def checkpoint_exists(self, name: str) -> bool:
        """Verificar si checkpoint existe"""
        checkpoint_path = self.checkpoint_dir / f"{name}.json"
        return checkpoint_path.exists()

checkpoint_manager = CheckpointManager(CHECKPOINT_DIR)
logger.info("‚úì CheckpointManager initialized")


2025-11-26 15:19:51 - INFO - ‚úì CheckpointManager initialized


In [8]:
# %%
# ================================================================================
# CELDA 7: FUNCI√ìN PARA GUARDAR OOF
# ================================================================================

def save_oof_predictions(predictions: np.ndarray,
                         dates: List,
                         actuals: np.ndarray,
                         model_name: str,
                         prediction_type: str,
                         metadata: Dict,
                         output_dir: Path) -> None:
    """
    Guardar predicciones OOF en formato est√°ndar.
    
    Args:
        predictions: Array de predicciones (RETORNOS para meta-learner)
        dates: Lista de fechas
        actuals: Array de valores reales (RETORNOS)
        model_name: Nombre del modelo
        prediction_type: 'log_returns' o 'levels'
        metadata: Diccionario con metadata adicional
        output_dir: Directorio de salida
    """
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # Crear DataFrame
    df_oof = pd.DataFrame({
        'ds': dates,
        'y_pred': predictions,
        'y_real': actuals,
        'model': model_name,
        'type': prediction_type
    })
    
    # Validaciones
    n_zeros = (df_oof['y_pred'] == 0).sum()
    
    # Guardar CSV
    csv_path = output_dir / f'train_oof_{model_name}.csv'
    df_oof.to_csv(csv_path, index=False)
    
    logger.info(f"‚úì OOF predictions saved: {csv_path}")
    logger.info(f"  Observations: {len(df_oof)}")
    logger.info(f"  Zeros: {n_zeros}")
    logger.info(f"  Type: {prediction_type}")
    
    # Guardar metadata
    json_path = output_dir / f'train_oof_{model_name}_metadata.json'
    with open(json_path, 'w') as f:
        json.dump(metadata, f, indent=2, default=str)
    
    logger.info(f"‚úì OOF metadata saved: {json_path}")

logger.info("‚úì save_oof_predictions function defined")


2025-11-26 15:19:51 - INFO - ‚úì save_oof_predictions function defined


In [9]:
# %%
# ================================================================================
# CELDA 8: CARGAR DATOS
# ================================================================================

logger.info("=" * 80)
logger.info("LOADING DATA")
logger.info("=" * 80)

# Cargar CSV
df_raw = pd.read_csv(DATA_PATH)
logger.info(f"‚úì Data loaded: {len(df_raw)} rows, {len(df_raw.columns)} columns")

# Convertir fecha a DatetimeIndex
df_raw['Dates'] = pd.to_datetime(df_raw['Dates'])
df_raw = df_raw.set_index('Dates')
df_raw = df_raw.sort_index()

logger.info(f"Date range: {df_raw.index.min()} to {df_raw.index.max()}")
logger.info(f"Columns: {list(df_raw.columns)}")



2025-11-26 15:19:51 - INFO - LOADING DATA
2025-11-26 15:19:51 - INFO - ‚úì Data loaded: 8201 rows, 12 columns
2025-11-26 15:19:51 - INFO - Date range: 1994-01-31 00:00:00 to 2025-07-07 00:00:00
2025-11-26 15:19:51 - INFO - Columns: ['PEN', 'MXN', 'CLP', 'COBRE', 'DXY', 'UST10Y', 'CPI', 'MXPE', 'RESERV', 'T_TRADE', 'Tasa_cds']


In [10]:
# %%
# ================================================================================
# CELDA 9: PREPARAR DATOS - ‚òÖ‚òÖ‚òÖ USAR NIVELES ‚òÖ‚òÖ‚òÖ
# ================================================================================

logger.info("=" * 80)
logger.info("DATA PREPARATION (NIVELES)")
logger.info("=" * 80)

# TimesFM NO acepta ex√≥genas, solo necesitamos el target (NIVELES)
df = df_raw.copy()

# ‚òÖ‚òÖ‚òÖ NO creamos log returns como target - usamos NIVELES directamente ‚òÖ‚òÖ‚òÖ
# Solo creamos log returns para referencia y c√°lculo de m√©tricas
df['PEN_log_ret'] = np.log(df['PEN'] / df['PEN'].shift(1))

# PEN_lag_1 para conversi√≥n a retornos despu√©s
df['PEN_lag_1'] = df['PEN'].shift(1)

# Forward fill y limpiar NaN
df = df.ffill()
df = df.dropna()

logger.info(f"‚úì Target: PEN (NIVELES)")
logger.info(f"   PEN range: [{df['PEN'].min():.4f}, {df['PEN'].max():.4f}]")
logger.info(f"   PEN mean: {df['PEN'].mean():.4f}")
logger.info(f"Final dataset shape: {df.shape}")


2025-11-26 15:19:51 - INFO - DATA PREPARATION (NIVELES)
2025-11-26 15:19:51 - INFO - ‚úì Target: PEN (NIVELES)
2025-11-26 15:19:51 - INFO -    PEN range: [2.0520, 4.1375]
2025-11-26 15:19:51 - INFO -    PEN mean: 3.1662
2025-11-26 15:19:51 - INFO - Final dataset shape: (8200, 13)


In [11]:
# %%
# ================================================================================
# CELDA 10: TRAIN/HOLDOUT SPLIT
# ================================================================================

logger.info("=" * 80)
logger.info("TRAIN/HOLDOUT SPLIT")
logger.info("=" * 80)

# Split: √∫ltimos N_HOLDOUT d√≠as
split_idx = len(df) - N_HOLDOUT

train_df = df.iloc[:split_idx].copy()
holdout_df = df.iloc[split_idx:].copy()

logger.info(f"Train: {len(train_df)} observations ({len(train_df)/len(df)*100:.1f}%)")
logger.info(f"  Date range: {train_df.index.min()} to {train_df.index.max()}")
logger.info(f"Holdout: {len(holdout_df)} observations ({len(holdout_df)/len(df)*100:.1f}%)")
logger.info(f"  Date range: {holdout_df.index.min()} to {holdout_df.index.max()}")

# Validaci√≥n cr√≠tica
assert len(holdout_df) == N_HOLDOUT, f"Holdout must have exactly {N_HOLDOUT} observations"

# Extraer niveles
train_levels = train_df['PEN'].values
holdout_levels = holdout_df['PEN'].values

logger.info(f"Train PEN levels: [{train_levels.min():.4f}, {train_levels.max():.4f}]")
logger.info(f"Holdout PEN levels: [{holdout_levels.min():.4f}, {holdout_levels.max():.4f}]")



2025-11-26 15:19:51 - INFO - TRAIN/HOLDOUT SPLIT
2025-11-26 15:19:51 - INFO - Train: 8140 observations (99.3%)
2025-11-26 15:19:51 - INFO -   Date range: 1994-02-01 00:00:00 to 2025-04-14 00:00:00
2025-11-26 15:19:51 - INFO - Holdout: 60 observations (0.7%)
2025-11-26 15:19:51 - INFO -   Date range: 2025-04-15 00:00:00 to 2025-07-07 00:00:00
2025-11-26 15:19:51 - INFO - Train PEN levels: [2.0520, 4.1375]
2025-11-26 15:19:51 - INFO - Holdout PEN levels: [3.5425, 3.7420]


In [12]:
# %%
# ================================================================================
# CELDA 11: CONVERTIR A FORMATO TIMESFM (NIVELES)
# ================================================================================

logger.info("=" * 80)
logger.info("CONVERT TO TIMESFM FORMAT (NIVELES)")
logger.info("=" * 80)

converter = TimesFMConverter()

# Convertir full dataset - USANDO NIVELES
full_timesfm = converter.convert_to_timesfm_df(
    df=df,
    value_col=TARGET_COL,  # 'PEN' (niveles)
    series_id='USDPEN',
    freq='D'
)

# Split en formato TimesFM
train_timesfm, holdout_timesfm = converter.split_timesfm_data(
    timesfm_df=full_timesfm,
    n_holdout=N_HOLDOUT
)

logger.info("‚úì Data converted to TimesFM format (NIVELES)")



2025-11-26 15:19:51 - INFO - CONVERT TO TIMESFM FORMAT (NIVELES)
2025-11-26 15:19:51 - INFO - Converted to TimesFM format:
2025-11-26 15:19:51 - INFO -   Rows: 8200
2025-11-26 15:19:51 - INFO -   Value column: PEN (NIVELES)
2025-11-26 15:19:51 - INFO -   Y range: [2.0520, 4.1375]
2025-11-26 15:19:51 - INFO - Split data:
2025-11-26 15:19:51 - INFO -   Train: 8140 observations
2025-11-26 15:19:51 - INFO -   Holdout: 60 observations
2025-11-26 15:19:51 - INFO - ‚úì Data converted to TimesFM format (NIVELES)


In [13]:
# %%
# ================================================================================
# CELDA 12: INICIALIZAR TIMESFM Y EJECUTAR ZERO-SHOT
# ================================================================================

logger.info("=" * 80)
logger.info("PHASE 1: ZERO-SHOT EVALUATION (TimesFM - NIVELES)")
logger.info("=" * 80)

# Verificar checkpoint existente
if checkpoint_manager.checkpoint_exists('zero_shot_results_niveles'):
    logger.info("‚ö†Ô∏è Zero-shot checkpoint exists. Loading...")
    zero_shot_results = checkpoint_manager.load_checkpoint('zero_shot_results_niveles')
    predictions_levels = np.array(zero_shot_results['predictions_levels'])
else:
    logger.info("Starting zero-shot evaluation from scratch...")
    
    # Inicializar TimesFM
    logger.info("Initializing TimesFM model...")
    logger.info("‚ö†Ô∏è This may take a few minutes on first run (downloading model)...")
    
    model = timesfm.TimesFm(
        hparams=timesfm.TimesFmHparams(
            context_len=TIMESFM_CONFIG['context_len'],
            horizon_len=TIMESFM_CONFIG['horizon_len'],
            per_core_batch_size=TIMESFM_CONFIG['batch_size'],
            backend=TIMESFM_CONFIG['backend']
        ),
        checkpoint=timesfm.TimesFmCheckpoint(
            huggingface_repo_id=TIMESFM_CONFIG['model_id']
        )
    )
    
    logger.info(f"‚úì TimesFM initialized")
    logger.info(f"  Model: {TIMESFM_CONFIG['model_id']}")
    logger.info(f"  Context length: {TIMESFM_CONFIG['context_len']}")
    logger.info(f"  Horizon: h={TIMESFM_CONFIG['horizon_len']}")
    logger.info(f"  Backend: {TIMESFM_CONFIG['backend']}")
    
    # Crear evaluator
    evaluator = TimesFMEvaluator(
        model=model,
        train_df=train_timesfm,
        holdout_df=holdout_timesfm,
        full_df=full_timesfm
    )
    
    # Rolling forecast (NIVELES)
    forecast_results = evaluator.rolling_forecast()
    predictions_levels = forecast_results['predictions_levels']
    
    # Guardar checkpoint
    zero_shot_results = {
        'predictions_levels': predictions_levels.tolist()
    }
    checkpoint_manager.save_checkpoint('zero_shot_results_niveles', zero_shot_results)
    
    logger.info("‚úì Zero-shot evaluation completed and checkpointed")



2025-11-26 15:19:51 - INFO - PHASE 1: ZERO-SHOT EVALUATION (TimesFM - NIVELES)
2025-11-26 15:19:51 - INFO - Starting zero-shot evaluation from scratch...
2025-11-26 15:19:51 - INFO - Initializing TimesFM model...
2025-11-26 15:19:51 - INFO - ‚ö†Ô∏è This may take a few minutes on first run (downloading model)...
2025-11-26 15:19:52 - INFO - HTTP Request: GET https://huggingface.co/api/models/google/timesfm-1.0-200m-pytorch/revision/main "HTTP/1.1 200 OK"


Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

2025-11-26 15:19:54 - INFO - Loading checkpoint from C:\Users\Carlos Palma\.cache\huggingface\hub\models--google--timesfm-1.0-200m-pytorch\snapshots\0581e2c56cb06feb51cfd98fc2b4005b74f7187b\torch_model.ckpt
2025-11-26 15:19:54 - INFO - Sending checkpoint to device cuda:0
2025-11-26 15:19:54 - INFO - ‚úì TimesFM initialized
2025-11-26 15:19:54 - INFO -   Model: google/timesfm-1.0-200m-pytorch
2025-11-26 15:19:54 - INFO -   Context length: 512
2025-11-26 15:19:54 - INFO -   Horizon: h=1
2025-11-26 15:19:54 - INFO -   Backend: gpu
2025-11-26 15:19:54 - INFO - ROLLING FORECAST (TimesFM - NIVELES)
2025-11-26 15:19:54 - INFO - Generating 60 forecasts (NIVELES)...
2025-11-26 15:19:54 - INFO - üìå Model is PRE-TRAINED (no retraining during forecast)
2025-11-26 15:19:54 - INFO - üìå Predicting LEVELS (PEN ‚âà 3.64), not returns
2025-11-26 15:19:54 - INFO -   Forecasting day 1/60...
2025-11-26 15:19:54 - INFO - Preprocessing dataframe.


Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:19:59 - INFO - Finished creating output dataframe.
2025-11-26 15:19:59 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:20:03 - INFO - Finished creating output dataframe.
2025-11-26 15:20:03 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:20:08 - INFO - Finished creating output dataframe.
2025-11-26 15:20:08 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:20:13 - INFO - Finished creating output dataframe.
2025-11-26 15:20:13 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:20:17 - INFO - Finished creating output dataframe.
2025-11-26 15:20:17 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:20:22 - INFO - Finished creating output dataframe.
2025-11-26 15:20:22 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:20:27 - INFO - Finished creating output dataframe.
2025-11-26 15:20:27 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:20:31 - INFO - Finished creating output dataframe.
2025-11-26 15:20:31 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:20:36 - INFO - Finished creating output dataframe.
2025-11-26 15:20:36 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:20:40 - INFO - Finished creating output dataframe.
2025-11-26 15:20:40 - INFO -   Forecasting day 11/60...
2025-11-26 15:20:40 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:20:45 - INFO - Finished creating output dataframe.
2025-11-26 15:20:45 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:20:50 - INFO - Finished creating output dataframe.
2025-11-26 15:20:50 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:20:55 - INFO - Finished creating output dataframe.
2025-11-26 15:20:55 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:21:00 - INFO - Finished creating output dataframe.
2025-11-26 15:21:00 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:21:06 - INFO - Finished creating output dataframe.
2025-11-26 15:21:06 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:21:11 - INFO - Finished creating output dataframe.
2025-11-26 15:21:11 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:21:17 - INFO - Finished creating output dataframe.
2025-11-26 15:21:17 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:21:22 - INFO - Finished creating output dataframe.
2025-11-26 15:21:22 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:21:27 - INFO - Finished creating output dataframe.
2025-11-26 15:21:27 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:21:32 - INFO - Finished creating output dataframe.
2025-11-26 15:21:32 - INFO -   Forecasting day 21/60...
2025-11-26 15:21:32 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:21:37 - INFO - Finished creating output dataframe.
2025-11-26 15:21:37 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:21:42 - INFO - Finished creating output dataframe.
2025-11-26 15:21:42 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:21:47 - INFO - Finished creating output dataframe.
2025-11-26 15:21:47 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:21:52 - INFO - Finished creating output dataframe.
2025-11-26 15:21:52 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:21:57 - INFO - Finished creating output dataframe.
2025-11-26 15:21:57 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.


2025-11-26 15:22:02 - INFO - Finished creating output dataframe.
2025-11-26 15:22:02 - INFO - Preprocessing dataframe.


Finished preprocessing dataframe.
Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:22:07 - INFO - Finished creating output dataframe.
2025-11-26 15:22:07 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.
Finished forecasting.


2025-11-26 15:22:13 - INFO - Finished creating output dataframe.
2025-11-26 15:22:13 - INFO - Preprocessing dataframe.


Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:22:18 - INFO - Finished creating output dataframe.
2025-11-26 15:22:18 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:22:24 - INFO - Finished creating output dataframe.
2025-11-26 15:22:24 - INFO -   Forecasting day 31/60...
2025-11-26 15:22:24 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:22:29 - INFO - Finished creating output dataframe.
2025-11-26 15:22:29 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:22:34 - INFO - Finished creating output dataframe.
2025-11-26 15:22:35 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:22:40 - INFO - Finished creating output dataframe.
2025-11-26 15:22:40 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:22:45 - INFO - Finished creating output dataframe.
2025-11-26 15:22:45 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:22:50 - INFO - Finished creating output dataframe.
2025-11-26 15:22:50 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:22:56 - INFO - Finished creating output dataframe.
2025-11-26 15:22:56 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:23:01 - INFO - Finished creating output dataframe.
2025-11-26 15:23:01 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:23:05 - INFO - Finished creating output dataframe.
2025-11-26 15:23:05 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:23:10 - INFO - Finished creating output dataframe.
2025-11-26 15:23:10 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:23:15 - INFO - Finished creating output dataframe.
2025-11-26 15:23:15 - INFO -   Forecasting day 41/60...
2025-11-26 15:23:15 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:23:20 - INFO - Finished creating output dataframe.
2025-11-26 15:23:20 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:23:25 - INFO - Finished creating output dataframe.
2025-11-26 15:23:25 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:23:30 - INFO - Finished creating output dataframe.
2025-11-26 15:23:30 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:23:34 - INFO - Finished creating output dataframe.
2025-11-26 15:23:34 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:23:39 - INFO - Finished creating output dataframe.
2025-11-26 15:23:39 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:23:44 - INFO - Finished creating output dataframe.
2025-11-26 15:23:44 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.


2025-11-26 15:23:50 - INFO - Finished creating output dataframe.


Finished preprocessing dataframe.
Finished forecasting.


2025-11-26 15:23:50 - INFO - Preprocessing dataframe.


Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:23:55 - INFO - Finished creating output dataframe.
2025-11-26 15:23:55 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:24:00 - INFO - Finished creating output dataframe.
2025-11-26 15:24:00 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:24:05 - INFO - Finished creating output dataframe.
2025-11-26 15:24:05 - INFO -   Forecasting day 51/60...
2025-11-26 15:24:05 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:24:10 - INFO - Finished creating output dataframe.
2025-11-26 15:24:10 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:24:15 - INFO - Finished creating output dataframe.
2025-11-26 15:24:15 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:24:20 - INFO - Finished creating output dataframe.
2025-11-26 15:24:20 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.
Finished forecasting.


2025-11-26 15:24:25 - INFO - Finished creating output dataframe.
2025-11-26 15:24:25 - INFO - Preprocessing dataframe.


Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:24:30 - INFO - Finished creating output dataframe.
2025-11-26 15:24:30 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:24:35 - INFO - Finished creating output dataframe.
2025-11-26 15:24:35 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:24:40 - INFO - Finished creating output dataframe.
2025-11-26 15:24:40 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:24:45 - INFO - Finished creating output dataframe.
2025-11-26 15:24:45 - INFO - Preprocessing dataframe.


Finished forecasting.
Processing dataframe with multiple processes.
Finished preprocessing dataframe.
Finished forecasting.


2025-11-26 15:24:51 - INFO - Finished creating output dataframe.
2025-11-26 15:24:51 - INFO - Preprocessing dataframe.


Processing dataframe with multiple processes.
Finished preprocessing dataframe.


2025-11-26 15:24:56 - INFO - Finished creating output dataframe.
2025-11-26 15:24:56 - INFO - ‚úì Forecasts generated: 60 predictions (NIVELES)
2025-11-26 15:24:56 - INFO -   Predictions range: [3.5428, 3.7379]
2025-11-26 15:24:56 - INFO -   Predictions mean: 3.6391
2025-11-26 15:24:56 - INFO -   Predictions std: 0.0505
2025-11-26 15:24:56 - INFO - ‚úì Checkpoint saved: zero_shot_results_niveles
2025-11-26 15:24:56 - INFO - ‚úì Zero-shot evaluation completed and checkpointed


Finished forecasting.


In [20]:
# %%
# ================================================================================
# CELDA 13: CALCULAR M√âTRICAS - ‚òÖ‚òÖ‚òÖ CONVERSI√ìN NIVELES‚ÜíRETORNOS ‚òÖ‚òÖ‚òÖ
# ================================================================================

logger.info("=" * 80)
logger.info("CALCULATING METRICS (NIVELES ‚Üí RETORNOS)")
logger.info("=" * 80)

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# ‚òÖ‚òÖ‚òÖ CONVERSI√ìN A RETORNOS PARA M√âTRICAS COMPARABLES ‚òÖ‚òÖ‚òÖ
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

# Niveles reales y anteriores del holdout
y_true_levels = holdout_df['PEN'].values
y_prev_levels = holdout_df['PEN_lag_1'].values

# Calcular m√©tricas usando conversi√≥n niveles‚Üíretornos
da = FXMetrics.directional_accuracy_from_levels(
    y_true_levels, predictions_levels, y_prev_levels
)
mase = FXMetrics.mase_from_levels(
    y_true_levels, predictions_levels, y_prev_levels, train_levels
)
mae = FXMetrics.mae_from_levels(
    y_true_levels, predictions_levels, y_prev_levels
)

# Convertir predicciones de niveles a retornos (para exportaci√≥n)
predictions_returns = np.log(predictions_levels / y_prev_levels)
y_true_returns = np.log(y_true_levels / y_prev_levels)

logger.info("‚úì Metrics calculated (en espacio de RETORNOS)")
logger.info("")
logger.info("=" * 80)
logger.info("RESULTS - TIMESFM ZERO-SHOT (NIVELES)")
logger.info("=" * 80)
logger.info(f"")
logger.info(f"üìä M√âTRICAS FINALES (calculadas en RETORNOS):")
logger.info(f"   DA:   {da:.2f}%")
logger.info(f"   MASE: {mase:.4f}")
logger.info(f"   MAE:  {mae:.6f}")
logger.info(f"")
logger.info(f"üìä COMPARACI√ìN CON BASELINE:")
logger.info(f"   ARX (baseline): DA={BASELINE_ARX['DA']:.2f}%, MASE={BASELINE_ARX['MASE']:.4f}")
logger.info(f"   TimesFM:        DA={da:.2f}%, MASE={mase:.4f}")
logger.info(f"   Delta DA:       {da - BASELINE_ARX['DA']:+.2f}%")
logger.info(f"   Delta MASE:     {mase - BASELINE_ARX['MASE']:+.4f}")


2025-11-26 15:26:33 - INFO - CALCULATING METRICS (NIVELES ‚Üí RETORNOS)
2025-11-26 15:26:33 - INFO - ‚úì Metrics calculated (en espacio de RETORNOS)
2025-11-26 15:26:33 - INFO - 
2025-11-26 15:26:33 - INFO - RESULTS - TIMESFM ZERO-SHOT (NIVELES)
2025-11-26 15:26:33 - INFO - 
2025-11-26 15:26:33 - INFO - üìä M√âTRICAS FINALES (calculadas en RETORNOS):
2025-11-26 15:26:33 - INFO -    DA:   58.33%
2025-11-26 15:26:33 - INFO -    MASE: 1.3022
2025-11-26 15:26:33 - INFO -    MAE:  0.002530
2025-11-26 15:26:33 - INFO - 
2025-11-26 15:26:33 - INFO - üìä COMPARACI√ìN CON BASELINE:
2025-11-26 15:26:33 - INFO -    ARX (baseline): DA=51.67%, MASE=0.9398
2025-11-26 15:26:33 - INFO -    TimesFM:        DA=58.33%, MASE=1.3022
2025-11-26 15:26:33 - INFO -    Delta DA:       +6.66%
2025-11-26 15:26:33 - INFO -    Delta MASE:     +0.3624


In [21]:
# %%
# ================================================================================
# CELDA 13.5: GENERATE OOF PREDICTIONS - ‚òÖ‚òÖ‚òÖ VERSI√ìN NIVELES ‚òÖ‚òÖ‚òÖ
# ================================================================================

def generate_oof_predictions_timesfm(model,
                                      train_df_timesfm: pd.DataFrame,
                                      full_df_timesfm: pd.DataFrame,
                                      train_df_original: pd.DataFrame,
                                      checkpoint_dir: Path = None) -> dict:
    """
    Generar predicciones Out-of-Fold usando Walk-Forward CV.
    
    El modelo predice NIVELES, luego convertimos a RETORNOS para el meta-learner.
    
    Args:
        model: TimesFM model instance (pre-trained)
        train_df_timesfm: Train data en formato TimesFM
        full_df_timesfm: Full data (train + holdout) en formato TimesFM
        train_df_original: DataFrame original con PEN_lag_1
        checkpoint_dir: Directorio para checkpoints
    
    Returns:
        dict con predicciones OOF (en RETORNOS), m√©tricas y metadata
    """
    
    if checkpoint_dir is None:
        checkpoint_dir = OOF_DIR
    
    checkpoint_dir = Path(checkpoint_dir)
    checkpoint_dir.mkdir(parents=True, exist_ok=True)
    
    # Archivo de checkpoint temporal
    checkpoint_file = checkpoint_dir / 'oof_timesfm_niveles_checkpoint.pkl'
    
    print("\n" + "="*80)
    print("üîÑ GENERANDO OOF PREDICTIONS (NIVELES ‚Üí RETORNOS)")
    print("="*80)
    print("üìå TimesFM es pre-entrenado: NO reentrenamos, solo forecasting")
    print("üìå Predice NIVELES, convertimos a RETORNOS para meta-learner")
    
    # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    # PASO 1: Verificar si existe checkpoint
    # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    
    resume_from_checkpoint = False
    checkpoint_data = None
    
    if checkpoint_file.exists():
        print(f"\nüìÇ CHECKPOINT ENCONTRADO: {checkpoint_file}")
        print("   Continuando desde checkpoint...")
        
        try:
            with open(checkpoint_file, 'rb') as f:
                checkpoint_data = pickle.load(f)
            
            print(f"\n‚úÖ Checkpoint cargado:")
            print(f"   √öltimo fold completado: {checkpoint_data['last_fold']}")
            print(f"   Predicciones guardadas: {len(checkpoint_data['oof_predictions'])}")
            
            resume_from_checkpoint = True
            
        except Exception as e:
            print(f"\n‚ö†Ô∏è Error cargando checkpoint: {e}")
            print("   Empezando desde cero...")
            resume_from_checkpoint = False
    
    # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    # PASO 2: Inicializar o recuperar estado
    # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    
    if resume_from_checkpoint and checkpoint_data:
        oof_predictions = checkpoint_data['oof_predictions']
        oof_dates = checkpoint_data['oof_dates']
        oof_actuals = checkpoint_data['oof_actuals']
        fold_metrics = checkpoint_data['fold_metrics']
        start_fold = checkpoint_data['last_fold'] + 1
        
        print(f"\n‚ñ∂Ô∏è RESUMIENDO desde fold {start_fold}")
        
    else:
        oof_predictions = []  # Retornos
        oof_dates = []
        oof_actuals = []      # Retornos
        fold_metrics = []
        start_fold = 1
        
        print(f"\n‚ñ∂Ô∏è INICIANDO desde fold 1")
    
    # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    # PASO 3: Configuraci√≥n Walk-Forward
    # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    
    n_train = len(train_df_timesfm)
    n_splits = (n_train - MIN_TRAIN) // STEP_SIZE
    
    print(f"\nüìä Configuraci√≥n Walk-Forward:")
    print(f"   Train TimesFM: {n_train}")
    print(f"   MIN_TRAIN: {MIN_TRAIN}")
    print(f"   STEP_SIZE: {STEP_SIZE}")
    print(f"   N_SPLITS: {n_splits}")
    
    import time
    start_time = time.time()
    
    # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    # PASO 4: Walk-Forward Loop
    # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    
    for fold_idx in range(start_fold - 1, n_splits):
        fold_start_time = time.time()
        
        # Definir ventanas
        train_end = MIN_TRAIN + fold_idx * STEP_SIZE
        val_start = train_end
        val_end = min(val_start + STEP_SIZE, n_train)
        
        if val_end > n_train:
            break
        
        print(f"\n   Fold {fold_idx + 1}/{n_splits}: train[:{train_end}], val[{val_start}:{val_end}]")
        
        fold_preds = []
        fold_actuals = []
        fold_dates = []
        
        # Rolling forecast dentro del fold
        for i in range(val_start, val_end):
            # Contexto: todo hasta i
            context_df = train_df_timesfm.iloc[:i].copy()
            
            try:
                # Forecast (NIVEL)
                forecast_df = model.forecast_on_df(
                    inputs=context_df,
                    freq="D",
                    value_name="y",
                    num_jobs=-1,
                    forecast_context_len=min(TIMESFM_CONFIG['context_len'], len(context_df))
                )
                
                if len(forecast_df) > 0 and 'timesfm' in forecast_df.columns:
                    pred_level = float(forecast_df['timesfm'].iloc[0])
                else:
                    pred_level = float(context_df['y'].iloc[-1])
                    
            except Exception as e:
                pred_level = float(context_df['y'].iloc[-1])
            
            # Valores reales
            true_level = float(train_df_timesfm.iloc[i]['y'])
            prev_level = float(train_df_timesfm.iloc[i-1]['y'])
            date = train_df_original.index[i]
            
            # ‚òÖ‚òÖ‚òÖ Convertir a RETORNOS para meta-learner ‚òÖ‚òÖ‚òÖ
            pred_return = np.log(pred_level / prev_level)
            true_return = np.log(true_level / prev_level)
            
            fold_preds.append(pred_return)
            fold_actuals.append(true_return)
            fold_dates.append(date)
        
        # Agregar a resultados globales
        oof_predictions.extend(fold_preds)
        oof_actuals.extend(fold_actuals)
        oof_dates.extend(fold_dates)
        
        # M√©tricas del fold
        fold_da = np.mean(np.sign(fold_preds) == np.sign(fold_actuals)) * 100
        fold_mae = np.mean(np.abs(np.array(fold_preds) - np.array(fold_actuals)))
        
        fold_metrics.append({
            'fold': fold_idx + 1,
            'train_end': train_end,
            'val_size': len(fold_preds),
            'da': fold_da,
            'mae': fold_mae
        })
        
        fold_time = time.time() - fold_start_time
        print(f"      ‚úì {len(fold_preds)} preds | DA: {fold_da:.1f}% | Time: {fold_time:.1f}s")
        
        # Guardar checkpoint cada 5 folds
        if (fold_idx + 1) % 5 == 0:
            checkpoint_data = {
                'last_fold': fold_idx + 1,
                'oof_predictions': oof_predictions,
                'oof_dates': [str(d) for d in oof_dates],
                'oof_actuals': oof_actuals,
                'fold_metrics': fold_metrics,
                'elapsed_seconds': time.time() - start_time
            }
            with open(checkpoint_file, 'wb') as f:
                pickle.dump(checkpoint_data, f)
            print(f"      üíæ Checkpoint guardado (fold {fold_idx + 1})")
    
    # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    # PASO 5: Calcular m√©tricas finales OOF
    # ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    
    oof_predictions = np.array(oof_predictions)
    oof_actuals = np.array(oof_actuals)
    
    # M√©tricas
    da_oof = np.mean(np.sign(oof_predictions) == np.sign(oof_actuals)) * 100
    mae_oof = np.mean(np.abs(oof_predictions - oof_actuals))
    
    # MASE
    train_returns = np.log(train_levels[1:] / train_levels[:-1])
    naive_mae = np.mean(np.abs(train_returns))
    mase_oof = mae_oof / naive_mae if naive_mae > 0 else np.inf
    
    elapsed_total = time.time() - start_time
    
    print(f"\n{'='*60}")
    print(f"üìä RESUMEN OOF FINAL")
    print(f"{'='*60}")
    print(f"   Observaciones: {len(oof_predictions)}")
    print(f"   DA OOF:        {da_oof:.2f}%")
    print(f"   MAE OOF:       {mae_oof:.6f}")
    print(f"   MASE OOF:      {mase_oof:.4f}")
    print(f"   Tiempo total:  {elapsed_total/60:.1f} minutos")
    
    # Guardar OOF definitivo
    save_oof_predictions(
        predictions=oof_predictions,
        dates=oof_dates,
        actuals=oof_actuals,
        model_name='TimesFM',
        prediction_type='log_returns',  # ‚Üê Exportamos RETORNOS
        metadata={
            'method': 'walk_forward_niveles',
            'target_training': 'PEN (niveles)',
            'output_format': 'log_returns',
            'min_train': MIN_TRAIN,
            'step_size': STEP_SIZE,
            'n_splits': len(fold_metrics),
            'n_observations': len(oof_predictions),
            'model': 'TimesFM',
            'model_id': TIMESFM_CONFIG['model_id'],
            'da_oof': float(da_oof),
            'mae_oof': float(mae_oof),
            'mase_oof': float(mase_oof),
            'elapsed_minutes': float(elapsed_total/60),
            'note': 'Model trained on LEVELS, output converted to LOG_RETURNS'
        },
        output_dir=OOF_DIR
    )
    
    # Eliminar checkpoint temporal
    if checkpoint_file.exists():
        checkpoint_file.unlink()
        print(f"\nüóëÔ∏è Checkpoint temporal eliminado (proceso completado)")
    
    print(f"\n‚úÖ OOF completado exitosamente")
    print(f"   üìÅ Guardado en: {OOF_DIR}/train_oof_TimesFM.csv")
    
    return {
        'predictions': oof_predictions,
        'dates': oof_dates,
        'actuals': oof_actuals,
        'metrics': {
            'da': da_oof,
            'mae': mae_oof,
            'mase': mase_oof
        },
        'fold_metrics': fold_metrics
    }

logger.info("‚úì generate_oof_predictions_timesfm function defined (VERSI√ìN NIVELES)")


2025-11-26 15:26:51 - INFO - ‚úì generate_oof_predictions_timesfm function defined (VERSI√ìN NIVELES)


In [22]:
# %%
# ================================================================================
# CELDA 14: GENERAR OOF
# ================================================================================

logger.info("=" * 80)
logger.info("GENERATING OOF PREDICTIONS")
logger.info("=" * 80)

# Verificar si ya existe
oof_csv_path = OOF_DIR / 'train_oof_TimesFM.csv'

if oof_csv_path.exists():
    logger.info(f"‚ö†Ô∏è OOF file already exists: {oof_csv_path}")
    logger.info("‚úì Usando OOF existente")
    oof_df = pd.read_csv(oof_csv_path)
    oof_results = {
        'predictions': oof_df['y_pred'].values,
        'dates': oof_df['ds'].tolist(),
        'actuals': oof_df['y_real'].values
    }
else:
    logger.info("üîÑ Generando OOF predictions...")
    oof_results = generate_oof_predictions_timesfm(
        model=model,
        train_df_timesfm=train_timesfm,
        full_df_timesfm=full_timesfm,
        train_df_original=train_df,
        checkpoint_dir=OOF_DIR
    )

logger.info("‚úì OOF predictions ready")


2025-11-26 15:26:58 - INFO - GENERATING OOF PREDICTIONS
2025-11-26 15:26:58 - INFO - ‚ö†Ô∏è OOF file already exists: C:\Users\Carlos Palma\OneDrive\Documents\Cursos\UTEC Computer Science\TESIS\NUEVO PAPER\tesis_maestria\oof_predictions\train_oof_TimesFM.csv
2025-11-26 15:26:58 - INFO - ‚úì Usando OOF existente
2025-11-26 15:26:58 - INFO - ‚úì OOF predictions ready


In [23]:
# %%
# ================================================================================
# CELDA 15: üìå EXPORTAR PREDICCIONES PARA META-LEARNER (CR√çTICO)
# ================================================================================

logger.info("")
logger.info("=" * 80)
logger.info("üìå EXPORTING PREDICTIONS FOR META-LEARNER")
logger.info("=" * 80)

# ‚òÖ‚òÖ‚òÖ Crear DataFrame con RETORNOS (convertidos desde niveles) ‚òÖ‚òÖ‚òÖ
export_df = pd.DataFrame({
    'ds': holdout_df.index,             # 60 fechas del holdout
    'y_pred': predictions_returns,       # 60 predicciones de log returns (convertidas)
    'model': 'TimesFM',                  # Nombre del modelo
    'type': 'log_returns'                # Tipo de predicci√≥n
})

# Validaciones CR√çTICAS
assert len(export_df) == N_HOLDOUT, f"Expected {N_HOLDOUT} rows, got {len(export_df)}"
assert export_df['y_pred'].isna().sum() == 0, "NaN values found in predictions"
assert export_df['model'].unique()[0] == 'TimesFM', "Model name mismatch"
assert export_df['type'].unique()[0] == 'log_returns', "Type mismatch"

# Guardar en predictions_dump/
export_path = PREDICTIONS_DUMP / "pred_TimesFM.csv"
export_df.to_csv(export_path, index=False)

logger.info(f"‚úì Predictions exported: {export_path}")
logger.info(f"  Rows: {len(export_df)}")
logger.info(f"  Columns: {list(export_df.columns)}")
logger.info(f"  Prediction type: log_returns (converted from levels)")
logger.info(f"  Date range: {export_df['ds'].min()} to {export_df['ds'].max()}")

# Vista previa
logger.info("")
logger.info("Preview of exported predictions:")
print(export_df.head(10))
print("...")
print(export_df.tail(5))


2025-11-26 15:27:03 - INFO - 
2025-11-26 15:27:03 - INFO - üìå EXPORTING PREDICTIONS FOR META-LEARNER
2025-11-26 15:27:03 - INFO - ‚úì Predictions exported: C:\Users\Carlos Palma\OneDrive\Documents\Cursos\UTEC Computer Science\TESIS\NUEVO PAPER\tesis_maestria\predictions_dump\pred_TimesFM.csv
2025-11-26 15:27:03 - INFO -   Rows: 60
2025-11-26 15:27:03 - INFO -   Columns: ['ds', 'y_pred', 'model', 'type']
2025-11-26 15:27:03 - INFO -   Prediction type: log_returns (converted from levels)
2025-11-26 15:27:03 - INFO -   Date range: 2025-04-15 00:00:00 to 2025-07-07 00:00:00
2025-11-26 15:27:03 - INFO - 
2025-11-26 15:27:03 - INFO - Preview of exported predictions:


          ds    y_pred    model         type
0 2025-04-15 -0.001533  TimesFM  log_returns
1 2025-04-16 -0.001107  TimesFM  log_returns
2 2025-04-17  0.000593  TimesFM  log_returns
3 2025-04-18  0.000825  TimesFM  log_returns
4 2025-04-21  0.000411  TimesFM  log_returns
5 2025-04-22 -0.000475  TimesFM  log_returns
6 2025-04-23 -0.001458  TimesFM  log_returns
7 2025-04-24 -0.001190  TimesFM  log_returns
8 2025-04-25 -0.000865  TimesFM  log_returns
9 2025-04-28 -0.001470  TimesFM  log_returns
...
           ds    y_pred    model         type
55 2025-07-01  0.000098  TimesFM  log_returns
56 2025-07-02 -0.000115  TimesFM  log_returns
57 2025-07-03  0.001034  TimesFM  log_returns
58 2025-07-04  0.000417  TimesFM  log_returns
59 2025-07-07  0.000303  TimesFM  log_returns


In [24]:
# %%
# ================================================================================
# CELDA 16: GUARDAR CONFIGURACI√ìN Y M√âTRICAS
# ================================================================================

logger.info("")
logger.info("=" * 80)
logger.info("SAVING CONFIGURATION AND METRICS")
logger.info("=" * 80)

# Configuraci√≥n
config = {
    'model_name': 'TimesFM',
    'model_id': TIMESFM_CONFIG['model_id'],
    'variant': 'zero-shot',
    'version': 'NIVELES',
    'target': TARGET_COL,                   # 'PEN' (niveles)
    'target_description': 'PEN levels, converted to log_returns for metrics',
    'n_holdout': N_HOLDOUT,
    'h_forecast': H_FORECAST,
    'context_len': TIMESFM_CONFIG['context_len'],
    'backend': TIMESFM_CONFIG['backend'],
    'train_size': len(train_timesfm),
    'holdout_size': len(holdout_timesfm),
    'random_state': RANDOM_STATE,
    'exogenous_features': 0,                # TimesFM no usa ex√≥genas
    'exogenous_list': [],
    'methodology': 'rolling_forecast_no_retrain',
    'run_id': RUN_ID,
    'timestamp': datetime.now().isoformat()
}

config_path = OUTPUT_DIR / "config.json"
with open(config_path, 'w') as f:
    json.dump(config, f, indent=2)

logger.info(f"‚úì Config saved: {config_path}")

# M√©tricas
metrics = {
    'model': 'TimesFM',
    'variant': 'zero-shot',
    'version': 'NIVELES',
    'DA': round(da, 2),
    'MASE': round(mase, 4),
    'MAE': round(mae, 6),
    'n_predictions': len(predictions_levels),
    'baseline_comparison': {
        'ARX_DA': BASELINE_ARX['DA'],
        'ARX_MASE': BASELINE_ARX['MASE'],
        'delta_DA': round(da - BASELINE_ARX['DA'], 2),
        'delta_MASE': round(mase - BASELINE_ARX['MASE'], 4)
    },
    'run_id': RUN_ID,
    'timestamp': datetime.now().isoformat()
}

metrics_path = OUTPUT_DIR / "metrics.json"
with open(metrics_path, 'w') as f:
    json.dump(metrics, f, indent=2)

logger.info(f"‚úì Metrics saved: {metrics_path}")




2025-11-26 15:27:19 - INFO - 
2025-11-26 15:27:19 - INFO - SAVING CONFIGURATION AND METRICS
2025-11-26 15:27:19 - INFO - ‚úì Config saved: C:\Users\Carlos Palma\OneDrive\Documents\Cursos\UTEC Computer Science\TESIS\NUEVO PAPER\tesis_maestria\TimesFM_h1_USD_PEN\config.json
2025-11-26 15:27:19 - INFO - ‚úì Metrics saved: C:\Users\Carlos Palma\OneDrive\Documents\Cursos\UTEC Computer Science\TESIS\NUEVO PAPER\tesis_maestria\TimesFM_h1_USD_PEN\metrics.json


In [25]:
# %%
# ================================================================================
# CELDA 17: RESUMEN FINAL
# ================================================================================

logger.info("")
logger.info("=" * 80)
logger.info("üèÅ TIMESFM USD/PEN - COMPLETADO (VERSI√ìN NIVELES)")
logger.info("=" * 80)

print(f"""
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë  TIMESFM USD/PEN FORECASTING - RESUMEN FINAL (VERSI√ìN NIVELES)               ‚ïë
‚ï†‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï£
‚ïë                                                                              ‚ïë
‚ïë  üìä CONFIGURACI√ìN:                                                           ‚ïë
‚ïë     Target:          PEN (NIVELES)                                           ‚ïë
‚ïë     Ex√≥genas:        Ninguna (TimesFM es univariado)                         ‚ïë
‚ïë     Holdout:         {N_HOLDOUT} d√≠as                                                ‚ïë
‚ïë     Context length:  {TIMESFM_CONFIG['context_len']}                                              ‚ïë
‚ïë                                                                              ‚ïë
‚ïë  üìà M√âTRICAS HOLDOUT (calculadas en RETORNOS):                               ‚ïë
‚ïë     DA:              {da:.2f}%                                                    ‚ïë
‚ïë     MASE:            {mase:.4f}                                                   ‚ïë
‚ïë     MAE:             {mae:.6f}                                                 ‚ïë
‚ïë                                                                              ‚ïë
‚ïë  üìä COMPARACI√ìN CON BASELINE:                                                ‚ïë
‚ïë     ARX:             DA={BASELINE_ARX['DA']:.2f}%, MASE={BASELINE_ARX['MASE']:.4f}                          ‚ïë
‚ïë     TimesFM:         DA={da:.2f}%, MASE={mase:.4f}                              ‚ïë
‚ïë     Delta:           DA {da - BASELINE_ARX['DA']:+.2f}%, MASE {mase - BASELINE_ARX['MASE']:+.4f}                    ‚ïë
‚ïë                                                                              ‚ïë
‚ïë  üìÅ ARCHIVOS GENERADOS:                                                      ‚ïë
‚ïë     Predicciones:    {PREDICTIONS_DUMP}/pred_TimesFM.csv            ‚ïë
‚ïë     OOF:             {OOF_DIR}/train_oof_TimesFM.csv                ‚ïë
‚ïë     Config:          {OUTPUT_DIR}/config.json                       ‚ïë
‚ïë     Metrics:         {OUTPUT_DIR}/metrics.json                      ‚ïë
‚ïë                                                                              ‚ïë
‚ïë  ‚úÖ Listo para meta-learner                                                  ‚ïë
‚ïë                                                                              ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù
""")

2025-11-26 15:27:23 - INFO - 
2025-11-26 15:27:23 - INFO - üèÅ TIMESFM USD/PEN - COMPLETADO (VERSI√ìN NIVELES)



‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë  TIMESFM USD/PEN FORECASTING - RESUMEN FINAL (VERSI√ìN NIVELES)               ‚ïë
‚ï†‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï£
‚ïë                                                                              ‚ïë
‚ïë  üìä CONFIGURACI√ìN:                                                           ‚ïë
‚ïë     Target:          PEN (NIVELES)                                           ‚ïë
‚ïë     Ex√≥genas:        Ninguna (TimesFM es univariado)                         ‚ïë
‚ïë     Holdout:         60 d√≠as                                                ‚ïë
‚ï