In [1]:

import sys
import numpy as np
import pandas as pd
import torch
import mlflow
from pathlib import Path
import json
from datetime import datetime
from typing import Dict, Any
import yaml
from abc import ABC, abstractmethod
import inspect

project_root = Path().resolve().parents[0]
sys.path.append(str(project_root))

from src.models.base.base_model import ParametricModel, DeepLearningModel
from src.models.parametric.gbm import GeometricBrownianMotion
from src.models.parametric.ou_process import OUProcess
from src.models.parametric.merton_jump_diffusion import MertonJumpDiffusion
from src.models.parametric.garch11 import GARCH11
from src.models.parametric.de_jump_diffusion import DoubleExponentialJumpDiffusion
from src.models.non_parametric.block_bootstrap import BlockBootstrap
from src.models.non_parametric.time_gan import TimeGAN
from src.models.non_parametric.quant_gan import QuantGAN
from src.models.non_parametric.time_vae import TimeVAE
from src.models.non_parametric.takahashi import TakahashiDiffusion

from src.utils.display_utils import show_with_start_divider, show_with_end_divider
from src.utils.preprocessing_utils import (
    create_dataloaders,
    preprocess_data,
    LogReturnTransformation,
    sliding_window_view,
)
from src.utils.configs_utils import get_dataset_cfgs
from src.utils.evaluation_classes_utils import (
    TaxonomyEvaluator,
    DiversityEvaluator,
    FidelityEvaluator,
    RuntimeEvaluator,
    StylizedFactsEvaluator,
    VisualAssessmentEvaluator
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class UnifiedEvaluator:
    """
    Unified evaluator class to initialize the MLFlow experiment and evaluate the models.
    """

    def __init__(
        self,
        experiment_name: str,
        parametric_dataset_cfgs: Dict[str, Any],
        non_parametric_dataset_cfgs: Dict[str, Any]
    ):
        """
        Initialize the evaluator with MLFlow experiment.

        Args:
            experiment_name (str): Name of the MLFlow experiment
            parametric_dataset_cfgs (Dict[str, Any]): Configuration for parametric dataset
        """
        self.parametric_dataset_cfgs = parametric_dataset_cfgs
        self.non_parametric_dataset_cfgs = non_parametric_dataset_cfgs
        self.experiment_name = experiment_name
        mlflow.set_experiment(experiment_name)

        self.results = {}
        self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.results_dir = project_root / "results" / f"evaluation_{self.timestamp}"
        self.results_dir.mkdir(parents=True, exist_ok=True)

    def evaluate_model(
        self,
        model,
        model_name: str,
        real_data: np.ndarray,
        train_data,
        generation_kwargs: Dict[str, Any],
        fit_kwargs: Dict[str, Any] = None,
        seed: int = 42
    ) -> None:
        """
        Unified evaluation for both parametric.

        Args:
            model: The type of generative model to evaluate
            model_name: Name of the model for logging
            real_data: Real data for comparison
            train_data: Training data
            generation_kwargs: Optional kwargs for model.generate()

        Returns:
            Dictionary containing all evaluation metrics
        """
        show_with_start_divider(f"Evaluating {model_name}")
        num_samples = generation_kwargs.get('num_samples', 500)
        if fit_kwargs is not None:
            num_epochs = fit_kwargs.get('num_epochs', 1)
            
        model_dir = self.results_dir / model_name
        model_dir.mkdir(parents=True, exist_ok=True)

        with mlflow.start_run(run_name=f"{model_name}_{self.timestamp}"):
            mlflow.log_param("model_name", model_name)

            evaluation_results: Dict[str, Any] = {}

            print(f"Training {model_name}...")
            if isinstance(model, DeepLearningModel):
                model.fit(train_data, num_epochs=num_epochs)
            else:
                model.fit(train_data)

            print(f"\nGenerating {num_samples} samples...")
            runtime_evaluator = RuntimeEvaluator(
                generate_func=model.generate,
                generation_kwargs=generation_kwargs
            )
            runtime_results = runtime_evaluator.evaluate()
            mlflow.log_metric(
                f"generation_time_{num_samples}_samples",
                runtime_results[f"generation_time_{num_samples}_samples"]
            )
            evaluation_results.update(runtime_results)

            generated_data = model.generate(**generation_kwargs)

            if "torch" in str(type(generated_data)):
                generated_data = generated_data.numpy()
            if "torch" in str(type(real_data)):
                real_data = real_data.numpy()
            else:
                real_data = np.asarray(real_data)

            if real_data.ndim == 1:
                window_size = generation_kwargs.get('generation_length', 1)
                real_data = sliding_window_view(real_data, window_size, 1)
            idx = np.random.permutation(real_data.shape[0])[:num_samples]
            real_data = real_data[idx]

            print(f"Generated data shape: {generated_data.shape}")
            print(f"Real data shape: {real_data.shape}")

            evaluators = [
                FidelityEvaluator(real_data, generated_data),
                # DiversityEvaluator(real_data, generated_data),
                StylizedFactsEvaluator(real_data, generated_data),
                VisualAssessmentEvaluator(real_data, generated_data, model_dir)
            ]

            for evaluator in evaluators:
                print(f"Computing {evaluator.__class__.__name__}...")
                results = evaluator.evaluate()
                if results is not None:
                    evaluation_results.update(results)

            metrics_path = model_dir / "metrics.json"
            with open(metrics_path, 'w') as f:
                json.dump(evaluation_results, f, indent=2, default=str)

            mlflow.log_artifact(str(metrics_path))
            print(f"Evaluation completed for {model_name} (results saved at {metrics_path}).")

            return evaluation_results

    def run_complete_evaluation(self, num_samples: int = 500, seed: int = 42) -> Dict[str, Any]:
        """
        Run complete evaluation on all models with 500 generated samples per model.

        Args:
            dataset_config: Configuration for data preprocessing
            models_config: Configuration for models

        Returns:
            Dictionary containing results for all models
        """
        show_with_start_divider("Starting Complete Evaluation Pipeline")
        train_data_para, valid_data_para, test_data_para, _, _, _ = preprocess_data(self.parametric_dataset_cfgs)

        length_para = train_data_para.shape[0]
        print(f"  - Parametric train data shape: {train_data_para.shape}")
        print(f"  - Parametric valid data shape: {valid_data_para.shape}")
        print(f"  - Parametric test data shape: {test_data_para.shape}")

        (
            train_data_non_para,
            valid_data_non_para,
            test_data_non_para,
            train_initial_non_para,
            valid_initial_non_para,
            test_initial_non_para
        ) = preprocess_data(self.non_parametric_dataset_cfgs)
        train_loader_non_para, valid_loader_non_para, test_loader_non_para = create_dataloaders(
            train_data_non_para, 
            valid_data_non_para, 
            test_data_non_para, 
            batch_size=32, 
            train_seed=42, 
            valid_seed=42, 
            test_seed=42,
            train_initial=train_initial_non_para,
            valid_initial=valid_initial_non_para,
            test_initial=test_initial_non_para,
        )

        num_timeseries, generation_length = train_data_non_para.shape
        print(f"  - Non-parametric train data shape: {train_data_non_para.shape}")
        print(f"  - Non-parametric valid data shape: {valid_data_non_para.shape}")
        print(f"  - Non-parametric test data shape: {test_data_non_para.shape}")

        parametric_models = {}
        # parametric_models["GBM"] = GeometricBrownianMotion()
        # parametric_models["OU Process"] = OUProcess()
        # parametric_models["MJD"] = MertonJumpDiffusion()
        # parametric_models["GARCH11"] = GARCH11()
        # parametric_models["DEJD"] = DoubleExponentialJumpDiffusion()
        # parametric_models["BlockBootstrap"] = BlockBootstrap(block_size=generation_length)

        non_parametric_models = {}
        # non_parametric_models["TimeGAN"] = TimeGAN(seq_len=generation_length, hidden_dim=24, num_layers=3, learning_rate=1e-5)
        # non_parametric_models["QuantGAN"] = QuantGAN()
        non_parametric_models["TimeVAE"] = TimeVAE(
            length=None,
            num_channels=1,
            latent_dim=10,
            hidden_layer_sizes=[100, 200, 400],
            trend_poly=0,
            custom_seas=None,
            use_residual_conn=True,
            reconstruction_wt=3.0,
            lr=1e-5
        )
        # non_parametric_models["Takahashi DDPM"] = TakahashiDiffusion(
        #     length=None,
        #     num_channels=1,
        #     num_steps=100,
        #     beta_start=0.0001,
        #     beta_end=0.02,
        #     wavelet='haar',
        #     lr=1e-5
        # )

        all_results = {}

        generation_kwargs_para = {'num_samples': num_samples, 'generation_length': generation_length}
        for model_name, model in parametric_models.items():
            results = self.evaluate_model(
                model=model,
                model_name=model_name,
                real_data=valid_data_para,
                train_data=train_data_para,
                generation_kwargs=generation_kwargs_para,
                seed=seed
            )
            all_results[model_name] = results

        # Evaluate non-parametric models on non-parametric dataset (use DataLoader and num_epochs)
        generation_kwargs_non_para = {'num_samples': num_samples, 'generation_length': generation_length}
        fit_kwargs_non_para = {'num_epochs': 10}
        for model_name, model in non_parametric_models.items():
            results = self.evaluate_model(
                model=model,
                model_name=model_name,
                real_data=valid_data_non_para,
                train_data=train_loader_non_para,
                generation_kwargs=generation_kwargs_non_para,
                fit_kwargs=fit_kwargs_non_para,
                seed=seed
            )
            all_results[model_name] = results

        results_file = self.results_dir / "complete_evaluation.json"
        with open(results_file, 'w') as f:
            json.dump(all_results, f, indent=2, default=str)
        mlflow.log_artifact(str(results_file))

        show_with_end_divider("EVALUATION COMPLETE")
        print(f"Results saved to: {results_file}")
        print(f"MLFlow experiment: {self.experiment_name}")

        return all_results

def main():
    """Main function to run the evaluation pipeline."""
    non_parametric_dataset_cfgs, parametric_dataset_cfgs = get_dataset_cfgs()
    evaluator = UnifiedEvaluator(
        experiment_name="TimeSeries_Generation_Comprehensive_Evaluation",
        parametric_dataset_cfgs = parametric_dataset_cfgs,
    non_parametric_dataset_cfgs = non_parametric_dataset_cfgs
    )
    evaluator.run_complete_evaluation(num_samples=1000)

In [3]:
if __name__ == "__main__":
    main()

  return FileStore(store_uri, store_uri)


Starting Complete Evaluation Pipeline
Preprocessing data for AAPL
  - Parametric train data shape: torch.Size([9056])
  - Parametric valid data shape: torch.Size([1132])
  - Parametric test data shape: torch.Size([1133])
Preprocessing data for AAPL
Desired time series sample length (lag with max PACF >0): 103
PACF at that lag: 0.040741497942971425
  - Non-parametric train data shape: (8975, 103)
  - Non-parametric valid data shape: (1122, 103)
  - Non-parametric test data shape: (1122, 103)
Evaluating TimeVAE
Training TimeVAE...
Inferred sequence length: 103
Batch: [tensor([[ 0.0264, -0.0220,  0.0645,  ...,  0.0389, -0.0231, -0.0008],
        [-0.0380,  0.0206, -0.0212,  ..., -0.0124,  0.0303, -0.0311],
        [-0.0083,  0.0328, -0.0245,  ..., -0.0127, -0.0074,  0.0018],
        ...,
        [ 0.0167,  0.0365,  0.0047,  ..., -0.0593, -0.0034, -0.0901],
        [ 0.0064, -0.0110,  0.0083,  ..., -0.0501,  0.0025,  0.0028],
        [-0.0193, -0.0347,  0.0000,  ..., -0.0229, -0.0116,  0.0

AttributeError: 'list' object has no attribute 'dim'