In [1]:
import sys
from pathlib import Path
import logging
from datetime import datetime
import numpy as np 
import pandas as pd

# Imports from your modules
from src.config.default_config import ExperimentConfig
from src.data.data_loader import DataLoader
from src.models.tft_model import TFTModelWrapper
from src.analysis.pfi_analysis import PFIAnalyzer
from src.visualization.plot_metrics import MetricsPlotter
from src.visualization.plot_graphs import GraphPlotter

# Set up experiment configuration
config = ExperimentConfig()
config.DATASET_NAME = "harvard_diamond_data_0"
full_config = config.get_full_config()
threshold = 0.8

# Create output directory for this run
output_dir = f'output/results_{config.DATASET_NAME}_{datetime.now().strftime("%Y%m%d_%H%M%S")}'
Path(output_dir).mkdir(parents=True, exist_ok=True)
print(f"Output directory created at: {output_dir}")

# Initialize components
data_loader = DataLoader('./data')
metrics_plotter = MetricsPlotter(output_dir)
graph_plotter = GraphPlotter(output_dir)

# Load Data
print("\nLoading data...")
df = data_loader.load_data(config.DATASET_NAME)
ground_truth = data_loader.load_ground_truth(config.DATASET_GROUNDTRUTH_NAME)

print("\nDataset shape:", df.shape)
print("\nFirst few rows of the data:")
print(df.head())
print("\nGround truth adjacency matrix:")
print(ground_truth)

# Initialize Model and Analyzer
model_wrapper = TFTModelWrapper(
    config=config.MODEL_CONFIG,
    target_variable="temp",  # Will be updated for each variable
    device='mps'  # Change to 'cuda' for NVIDIA GPUs or 'cpu' for CPU only
)

pfi_analyzer = PFIAnalyzer(model_wrapper)

# Run PFI Analysis
print("\nRunning PFI analysis...")
results = pfi_analyzer.run_analysis(
    df=df,
    train_test_split=config.TRAIN_TEST_SPLIT,
    num_samples=config.NUM_SAMPLES,
    metrics_plotter=metrics_plotter  
)

# Generate Visualizations
print("\nPreparing visualization matrices...")
variables, interval_pfi_ratios, point_pfi_ratios = pfi_analyzer.prepare_matrices(results)

# Plot metrics
print("\nCreating visualizations...")
metrics_plotter.plot_interval_scores(results)
metrics_plotter.plot_point_scores(results)
metrics_plotter.plot_pfi_ratios(variables, interval_pfi_ratios, "Interval Score")
metrics_plotter.plot_pfi_ratios(variables, point_pfi_ratios, "Point Score")

# Plot ground truth graph
graph_plotter.plot_ground_truth_graph(ground_truth)


print(f"\nEvaluating threshold: {threshold}")

# Create graphs
interval_graph = pfi_analyzer.create_causal_graph(
    variables, interval_pfi_ratios, threshold
)
point_graph = pfi_analyzer.create_causal_graph(
    variables, point_pfi_ratios, threshold
)

# Plot graphs and matrices
graph_plotter.plot_adjacency_matrix(
    variables, interval_pfi_ratios, "Interval Score", threshold
)
graph_plotter.plot_adjacency_matrix(
    variables, point_pfi_ratios, "Point Score", threshold
)
graph_plotter.plot_causal_graph(
    interval_graph, "Interval Score", threshold, interval_pfi_ratios
)
graph_plotter.plot_causal_graph(
    point_graph, "Point Score", threshold, point_pfi_ratios
)

# Evaluate against ground truth
interval_metrics = pfi_analyzer.evaluate_graph(interval_graph, ground_truth)
point_metrics = pfi_analyzer.evaluate_graph(point_graph, ground_truth)

print(f"\nInterval Score Metrics:")
for metric, value in interval_metrics.items():
    print(f"{metric}: {value:.4f}")

print(f"\nPoint Score Metrics:")
for metric, value in point_metrics.items():
    print(f"{metric}: {value:.4f}")

# Create and display final results summary
summary_data = [
    {
        'Threshold': threshold,
        'Score Type': 'Interval',
        'Precision': interval_metrics['precision'],
        'Recall': interval_metrics['recall'],
        'F1': interval_metrics['f1']
    },
    {
        'Threshold': threshold,
        'Score Type': 'Point',
        'Precision': point_metrics['precision'],
        'Recall': point_metrics['recall'],
        'F1': point_metrics['f1']
    }
]

summary_df = pd.DataFrame(summary_data)
print("\nFinal Results Summary:")
print(summary_df.to_string(index=False, float_format=lambda x: '{:.4f}'.format(x)))

print(f"\nExperiment completed. Results saved in: {output_dir}")

  from .autonotebook import tqdm as notebook_tqdm
  df_shifted.fillna(method='bfill', inplace=True)
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

   | Name                              | Type                             | Params | Mode 
------------------------------------------------------------------------------------------------
0  | train_metrics                     | MetricCollection                 | 0      | train
1  | val_metrics                       | MetricCollection                 | 0      | train
2  | input_embeddings                  | _MultiEmbedding                  | 0      | train
3  | static_covariates_vsn             | _VariableSelectionNetwork        | 0      | train
4  | encoder_vsn                       | _VariableSelectionNetwork        | 6.6 K  | train
5  | decoder_vsn                       | _VariableSelectionNetwork        | 3.0 K  | train
6  | static_context_grn                | _GatedRes

Output directory created at: output/results_harvard_diamond_data_0_20250131_222649

Loading data...

Dataset shape: (4000, 4)

First few rows of the data:
                  V1        V2        V3        V4
timestamp                                         
2023-01-01  0.188713 -0.700368 -3.666094  0.226284
2023-01-02  0.240496  0.707094 -1.074562  1.786855
2023-01-03 -0.995252 -0.768571 -0.377442  0.364459
2023-01-04 -0.395351  1.372412 -0.253002  0.123389
2023-01-05 -0.412162 -0.719210 -0.801287  0.118931

Ground truth adjacency matrix:
    V1  V2  V3  V4
V1   1   1   1   0
V2   0   1   0   1
V3   0   0   1   1
V4   0   0   0   1

Running PFI analysis...

Analyzing target variable: V1
Epoch 1: 100%|██████████| 50/50 [00:05<00:00,  8.60it/s, train_loss=2.680]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 50/50 [00:05<00:00,  8.60it/s, train_loss=2.680]


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.28it/s]


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.32it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.31it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.31it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.31it/s]

  df_shifted.fillna(method='bfill', inplace=True)
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

   | Name                              | Type                             | Params | Mode 
------------------------------------------------------------------------------------------------
0  | train_metrics                     | MetricCollection                 | 0      | train
1  | val_metrics                       | MetricCollection                 | 0      | train
2  | input_embeddings                  | _MultiEmbedding                  | 0      | train
3  | static_covariates_vsn             | _VariableSelectionNetwork        | 0      | train
4  | encoder_vsn                       | _VariableSelectionNetwork        | 6.6 K  | train
5  | decoder_vsn                       | _VariableSelectionNetwork        | 3.0 K  | train
6  | static_context_grn                | _GatedResidualNetwork            | 1.1 K  | train
7  | stat



Analyzing target variable: V2
Epoch 0:  94%|█████████▍| 47/50 [00:05<00:00,  8.51it/s, train_loss=2.820]

/Users/florisschouw/.pyenv/versions/3.10.14/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.31it/s]


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.31it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.32it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.31it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.31it/s]

  df_shifted.fillna(method='bfill', inplace=True)
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

   | Name                              | Type                             | Params | Mode 
------------------------------------------------------------------------------------------------
0  | train_metrics                     | MetricCollection                 | 0      | train
1  | val_metrics                       | MetricCollection                 | 0      | train
2  | input_embeddings                  | _MultiEmbedding                  | 0      | train
3  | static_covariates_vsn             | _VariableSelectionNetwork        | 0      | train
4  | encoder_vsn                       | _VariableSelectionNetwork        | 6.6 K  | train
5  | decoder_vsn                       | _VariableSelectionNetwork        | 3.0 K  | train
6  | static_context_grn                | _GatedResidualNetwork            | 1.1 K  | train
7  | stat



Analyzing target variable: V3
Epoch 1: 100%|██████████| 50/50 [00:06<00:00,  8.09it/s, train_loss=1.800]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 50/50 [00:06<00:00,  8.09it/s, train_loss=1.800]


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.30it/s]


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.28it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.30it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.30it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.32it/s]

  df_shifted.fillna(method='bfill', inplace=True)
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

   | Name                              | Type                             | Params | Mode 
------------------------------------------------------------------------------------------------
0  | train_metrics                     | MetricCollection                 | 0      | train
1  | val_metrics                       | MetricCollection                 | 0      | train
2  | input_embeddings                  | _MultiEmbedding                  | 0      | train
3  | static_covariates_vsn             | _VariableSelectionNetwork        | 0      | train
4  | encoder_vsn                       | _VariableSelectionNetwork        | 6.6 K  | train
5  | decoder_vsn                       | _VariableSelectionNetwork        | 3.0 K  | train
6  | static_context_grn                | _GatedResidualNetwork            | 1.1 K  | train
7  | stat



Analyzing target variable: V4
Epoch 1: 100%|██████████| 50/50 [00:05<00:00,  8.36it/s, train_loss=1.390]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 50/50 [00:05<00:00,  8.35it/s, train_loss=1.390]


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.29it/s]


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.30it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.31it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.32it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████| 1/1 [00:03<00:00,  0.32it/s]

Preparing visualization matrices...

Creating visualizations...

Evaluating threshold: 0.8

Interval Score Metrics:
precision: 1.0000
recall: 0.2500
f1: 0.4000
true_positives: 2.0000
false_positives: 0.0000
false_negatives: 6.0000

Point Score Metrics:
precision: 1.0000
recall: 0.2500
f1: 0.4000
true_positives: 2.0000
false_positives: 0.0000
false_negatives: 6.0000

Final Results Summary:
 Threshold Score Type  Precision  Recall     F1
    0.8000   Interval     1.0000  0.2500 0.4000
    0.8000      Point     1.0000  0.2500 0.4000

Experiment completed. Results saved in: output/results_harvard_diamond_data_0_20250131_222649
