In [1]:
# ============= ARIMA ==================
import pmdarima as pm
import numpy as np
from joblib import Parallel, delayed

In [2]:

# Assume data shape: [N, D, R, T]
# Let's say we want to forecast the next value after time T
# So ARIMA is applied on data[..., :T]

def fit_predict_arima_per_series(ts, forecast_steps=1):
    try:
        # Use fixed order to avoid long auto_arima tuning
        # model = pm.ARIMA(order=(1, 0, 0), suppress_warnings=True)  # AR(1)
        model = pm.auto_arima(
            ts,
            start_p=1, max_p=3,
            start_q=0, max_q=2,
            d=None, max_d=1,
            seasonal=False,
            stepwise=True,
            suppress_warnings=True,
            error_action='ignore'
        )
        model.fit(ts)
        forecast = model.predict(n_periods=forecast_steps)
        return forecast
    except Exception as e:
        # Fallback to naive prediction if ARIMA fails
        print('EH')
        return np.repeat(ts[-1], forecast_steps)

def arima_baseline(data, forecast_steps=1, n_jobs=-1):
    N, D, R, T = data.shape
    results = np.zeros((N, D, R, forecast_steps))

    def process_single(n, d, r):
        ts = data[n, d, r, :]  # Shape: [T]
        return fit_predict_arima_per_series(ts, forecast_steps)

    # Create list of jobs
    jobs = [(n, d, r) for n in range(N) for d in range(D) for r in range(R)]

    # Run in parallel
    forecasts = Parallel(n_jobs=n_jobs, backend='loky')(
        delayed(process_single)(n, d, r) for (n, d, r) in jobs
    )

    # Refill results array
    for idx, (n, d, r) in enumerate(jobs):
        results[n, d, r, :] = forecasts[idx]

    return results  # Shape: [N, D, R, forecast_steps]

In [3]:
import os
import pandas as pd
import numpy as np
import json
import torch
import sys
from scipy.spatial.distance import pdist, squareform

# Custom modules
project_root = os.path.abspath('/home/jaume/Desktop/Code/stmgcn')
sys.path.append(project_root)

from model.train_stmgcn_ode import build_model_multiplex
from synthetic_data.train_synthetic_models import ObjectiveSynthetic, batch_loop
from synthetic_data.gene_evolution import generate_graph
from utils.model_selection_sklearn import stratified_split
from utils.utils import seed_everything, str2bool, get_best_params
from experiments.ACDC_CV import plot_results, plot_combined_trajectories, get_data_in_original_scale
from experiments.ACDC_All import plot_predicted_trajectories
import torchvision.transforms as transforms
from utils.model_selection_optuna import hypertune_optuna
from model.testing_model import get_latex_table, wrap_latex_table, save_training_convergence
from synthetic_data.pendulum import PendulumDataset

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
data_folder = "/media/jaume/DATA/Data/Multiplex_Synthetic_FINAL"
study_name = "Multiplex_CoupledPendulum_DIMENSIONS_NEW_LOSS"

In [5]:
# Sahred parameters
use_global_data = False
use_position = False
use_region_id = False

use_time = False
use_edges = True
use_norm = True
normalization = "ZNorm"
norm_by_group = False # Normalize by group, i.e: healthy controls
norm_only_ed = False  # Normalize only by the ED frame
device = "cpu"
use_similarity = False
reprocess_datasets = False

space_coupling = 1
time_coupling = 1
reprocess = False

In [6]:
seed_everything()
duration = 10.0
dt = 0.1
k = 2.0
num_samples = 500
dataset_name = f'duration-{duration}_dt-{dt}_k-{k}'

# Save folder
save_folder = os.path.join(data_folder, "CoupledPendulum_Graphs", f"{dataset_name}")

In [7]:
# Load previous data
model_filename = os.path.join(save_folder, 'FinalModel', 'model.pt')
params_filename = os.path.join(save_folder, f'objective_params_{study_name}.json')

# Create the dataset
dataset = PendulumDataset(name='CoupledPendulum',
                          save_dir=save_folder, 
                          reprocess=reprocess,
                          space_coupling=space_coupling, 
                          time_coupling=time_coupling, 
                          dt=dt, 
                          duration=duration, 
                          spring_constant=k,
                          num_samples=num_samples,
                          )

# Read the params filename from the JSON
with open(params_filename, 'r') as f:
    params_dict = json.load(f)

# Load the study parameters
results_hp_folder = os.path.join(data_folder, "CoupledPendulum_Graphs", "results")
df_params_path = os.path.join(results_hp_folder, f'{study_name}_trials.csv')
df_params = pd.read_csv(df_params_path)
df_params.dropna(how='any', inplace=True)
df_params = df_params.sort_values(by='value', ascending=False)
# params_names = [key for key in df_params.columns if key.startswith('params_')]
# best_params = df_params.iloc[0].to_dict()
# best_params = df_params.iloc[0:5][params_names].mean().to_dict()
# best_params = {key.replace('params_', ''): value for key, value in best_params.items() if key.startswith('params_')}
best_params = get_best_params(df_params.iloc[0:5], use_median=True)
best_params['hidden_dim'] = 17

# Update the param_dict
params_dict.update(best_params)

In [8]:
# Create the objective
objective_optuna = ObjectiveSynthetic(study_name,
                                        dataset,
                                        normalization=normalization,
                                        norm_only_ed=False,
                                        save_dir=save_folder,
                                        direction="maximize",
                                        device = device,
                                        track_experiment=False,
                                        use_position=use_position,
                                        use_region_id=use_region_id,
                                        use_time=use_time,
                                        fn_batch_loop=batch_loop,
                                        class_dim=0,
                                        space_planes=params_dict.get('space_planes', 3),
                                        time_planes=params_dict.get('time_planes', 3),
                                        depth_nodes=1,
                                        depth_edges=1,
                                        use_edges=use_edges,
                                        only_spatial=False,
                                        use_norm=use_norm,
                                        use_mse=False,
                                        )
objective_optuna.set_default_params(params_dict)

# Indices for the train / valid split
# train_idx = graph_train_dataset.idx_train
# valid_idx = graph_train_dataset.idx_valid
# test_idx = graph_train_dataset.idx_test
# objective_optuna.set_indices(train_idx, valid_idx, test_idx=test_idx)

indices = np.arange(len(dataset))
labels = np.ones(len(dataset))  # Dummy labels
splits = stratified_split(indices, labels, test_size=0.2, valid_size=0.2)
train_idx = splits['X_train']
valid_idx = splits['X_valid']
test_idx = splits['X_test']
objective_optuna.set_indices(train_idx, valid_idx, test_idx=test_idx)

In [9]:
# Model
model = objective_optuna.build_model(params_dict)
tmp_save = os.path.join(save_folder, 'FinalModel')
res_training = objective_optuna._train(model, params_dict, tmp_save, final_model=True)  # Reload

2025-07-15 16:22:34,347 - INFO - Logging to file and console...
2025-07-15 16:22:34,349 - INFO - Saving data to /media/jaume/DATA/Data/Multiplex_Synthetic_FINAL/CoupledPendulum_Graphs/duration-10.0_dt-0.1_k-2.0/FinalModel

2025-07-15 16:22:34,365 - INFO - Loading trained model...

2025-07-15 16:22:34,438 - INFO - Trained model loaded successfully.

2025-07-15 16:22:34,505 - INFO - ==== Best epoch: 212 [212/300] ====
Training:
Loss: -1.4856	Accuracy: 0.0000
Validation:
Loss: -1.8615	Accuracy: 0.0000
Test:
Loss: -1.9108	Accuracy: 0.0000
Best Score: -2.0952

2025-07-15 16:22:34,508 - INFO - Model trained!



Loading trained model...

Trained model loaded successfully.
Model trained!
==== Best epoch: 212 [212/300] ====
Training:
Loss: -1.4856	Accuracy: 0.0000
Validation:
Loss: -1.8615	Accuracy: 0.0000
Test:
Loss: -1.9108	Accuracy: 0.0000
Best Score: -2.0952



In [10]:
steps_to_predict = int(duration/dt)
# steps_to_predict = int(duration / dt_step_size)
# print(steps_to_predict)
# time_to_predict = np.arange(0, steps_to_predict, 1)  # Predict 100 steps more
time_to_predict = torch.arange(0, steps_to_predict, 1)
pred_trajectory, pred_latent, tgt_trajectory = objective_optuna.predict_from_latent(model, objective_optuna.dataset, time_to_predict, params_dict, device=device)
# The shape of the results is [num_samples, num_features, num_nodes, num_time_steps]

# Convert to the true scale
trans_fts_predict = objective_optuna.dataset._transform['nfeatures']
fts_to_predict = np.arange(0, 2)
num_subjects, _, _, time_frames = tgt_trajectory.shape
time_frames_true = tgt_trajectory.shape[-1]
trans_fts_mean = trans_fts_predict.mean[..., fts_to_predict].unsqueeze(0).unsqueeze(-1).repeat(num_subjects, 1, 1, time_frames_true).permute(0, 2, 1, 3)
trans_fts_std = trans_fts_predict.std[..., fts_to_predict].unsqueeze(0).unsqueeze(-1).repeat(num_subjects, 1, 1, time_frames_true).permute(0, 2, 1, 3)
true_trajectory = (tgt_trajectory * trans_fts_std) + trans_fts_mean

In [11]:
# Load data
data_filename = os.path.join(save_folder, 'data.pkl')
data = torch.load(data_filename, map_location=device)
tgt_trajectories = data['tgt_trajectories'] #.numpy() # --- This is my ground truth-data to predict true_trajectory

# Normalize them
norm_tgt_trajectory = (tgt_trajectories - trans_fts_mean) / trans_fts_std

In [12]:
forecast_steps = tgt_trajectories.shape[-1]

In [13]:
# arima_preds = arima_baseline(tgt_trajectories, forecast_steps=forecast_steps)
arima_preds = arima_baseline(norm_tgt_trajectory.numpy(), forecast_steps=forecast_steps)

  return np.roots(self.polynomial_reduced_ma)**-1


In [14]:
arima_preds.shape
arima_preds = (arima_preds * trans_fts_std.numpy()) + trans_fts_mean.numpy()

In [15]:
# Initialize storage
metrics = {
    'Dimension': [],
    'MAE': [],
    'MSE': [],
    'MAPE (%)': [],
    'MSPE (%)': [],
    'IQR (Abs Error)': []
}

N, D, R, T = tgt_trajectory.shape
dimension_names = ['$\\theta$', '$\\dot{\\theta}$']
for d in range(D):  # For each dimension
    rec_d = arima_preds[:, d, :, :] #.flatten()
    tgt_d = true_trajectory[:, d, :, :].numpy() #.flatten()

    print(rec_d.shape, tgt_d.shape)

    abs_err = np.abs(rec_d - tgt_d)
    sq_err = (rec_d - tgt_d) ** 2

    # Handle small values in denominator to avoid division by zero
    tgt_d_safe = np.clip(np.abs(tgt_d), 1e-8, None)

    mae = np.mean(abs_err)
    mse = np.mean(sq_err)
    mape = np.mean(abs_err / tgt_d_safe) * 100
    mspe = np.mean(sq_err / tgt_d_safe) * 100
    iqr = np.percentile(abs_err, 75) - np.percentile(abs_err, 25)

    # Store metrics
    metrics['Dimension'].append(dimension_names[d])
    metrics['MAE'].append(mae)
    metrics['MSE'].append(mse)
    metrics['MAPE (%)'].append(mape)
    metrics['MSPE (%)'].append(mspe)
    metrics['IQR (Abs Error)'].append(iqr)

# print(abs_error.shape)
# print(data.keys())

# Create and display DataFrame
df_metrics = pd.DataFrame(metrics)
print(df_metrics)

for i, row in df_metrics.iterrows():
    print(f"extrapolation & {row['Dimension']} & "
          f"{row['MSE']:.4f} $\\pm$ {row['IQR (Abs Error)']:.4f} & "
          f"{row['MAE']:.4f} $\\pm$ {row['IQR (Abs Error)']:.4f} & "
          f"{row['MSPE (%)']:.2f}\\% & {row['MAPE (%)']:.2f}\\% \\\\")


(500, 2, 100) (500, 2, 100)
(500, 2, 100) (500, 2, 100)
        Dimension       MAE       MSE    MAPE (%)    MSPE (%)  IQR (Abs Error)
0        $\theta$  0.254543  0.113491  402.041983   76.906762         0.319864
1  $\dot{\theta}$  0.766051  0.918688  309.634717  187.792605         0.903048
extrapolation & $\theta$ & 0.1135 $\pm$ 0.3199 & 0.2545 $\pm$ 0.3199 & 76.91\% & 402.04\% \\
extrapolation & $\dot{\theta}$ & 0.9187 $\pm$ 0.9030 & 0.7661 $\pm$ 0.9030 & 187.79\% & 309.63\% \\
