In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import math
import torch.optim as optim
from sklearn.metrics import classification_report, confusion_matrix
from torch.utils.data import DataLoader, TensorDataset
from torchinfo import summary
from sklearn.metrics import accuracy_score

In [4]:
import pytorch_wavelets as DWT
import pywt
from pytorch_wavelets import DWT1D, IDWT1D

In [None]:
from dywpe.ablation.benchmark import PositionalEncodingBenchmark
from dywpe.ablation.complete_ablation import run_core_ablation_studies

In [25]:
# Fix the labels by subtracting 1 to make them 0-indexed
print("Fixing labels to be 0-indexed...")

def fix_dataloader_labels(dataloader):
    """Fix labels in a dataloader by subtracting 1"""
    fixed_data = []
    fixed_labels = []

    for inputs, labels in dataloader:
        fixed_data.append(inputs)
        fixed_labels.append(labels - 1)  # Subtract 1 to make 0-indexed

    # Concatenate all batches
    all_inputs = torch.cat(fixed_data, dim=0)
    all_labels = torch.cat(fixed_labels, dim=0)

    return all_inputs, all_labels

# Extract and fix all data
X_train_fixed, y_train_fixed = fix_dataloader_labels(train_loader)
X_valid_fixed, y_valid_fixed = fix_dataloader_labels(valid_loader)
X_test_fixed, y_test_fixed = fix_dataloader_labels(test_loader)

# Create new datasets with fixed labels
train_dataset_fixed = TensorDataset(X_train_fixed, y_train_fixed)
valid_dataset_fixed = TensorDataset(X_valid_fixed, y_valid_fixed)
test_dataset_fixed = TensorDataset(X_test_fixed, y_test_fixed)

# Create new data loaders
batch_size = 64
train_loader_fixed = DataLoader(train_dataset_fixed, batch_size=batch_size, shuffle=True, drop_last=True)
valid_loader_fixed = DataLoader(valid_dataset_fixed, batch_size=batch_size, shuffle=False, drop_last=True)
test_loader_fixed = DataLoader(test_dataset_fixed, batch_size=batch_size, shuffle=False, drop_last=True)

Fixing labels to be 0-indexed...


In [6]:
import os
import zipfile
import urllib.request
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff
import torch
from torch.utils.data import TensorDataset, DataLoader

# Directory where datasets will be downloaded and extracted
DATA_DIR = 'datasets'

# Ensure the dataset directory exists
os.makedirs(DATA_DIR, exist_ok=True)

def download_dataset(dataset_name, url):
    """
    Downloads and extracts a zip file containing the dataset.
    """
    zip_path = os.path.join(DATA_DIR, f"{dataset_name}.zip")
    extract_path = os.path.join(DATA_DIR, dataset_name)

    # Download the dataset
    print(f"Downloading {dataset_name} from {url}...")
    urllib.request.urlretrieve(url, zip_path)

    # Extract the zip file
    print(f"Extracting {dataset_name}...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

    # Remove the zip file after extraction
    os.remove(zip_path)
    print(f"Dataset {dataset_name} extracted to {extract_path}.")
    return extract_path

def load_arff_data(file_path):
    """
    Loads ARFF file and converts it to a pandas DataFrame.
    """
    print(f"Loading ARFF file: {file_path}")
    data, meta = arff.loadarff(file_path)
    df = pd.DataFrame(data)
    return df

def preprocess_data(train_paths, test_paths, batch_size=64):
    """
    Preprocesses the SelfRegulationSCP1 data:
    - Loads and combines multiple dimensions from ARFF files.
    - Normalizes the features for each dimension.
    - Stacks features from different dimensions.
    - Converts them into PyTorch tensors.
    - Creates DataLoaders for training, validation, and testing.
    """

    # Load all training and test dimensions
    train_dfs = [load_arff_data(path) for path in train_paths]
    test_dfs = [load_arff_data(path) for path in test_paths]

    # Separate features and labels for all dimensions
    train_features = [df.drop(columns=['cortical']) for df in train_dfs]
    test_features = [df.drop(columns=['cortical']) for df in test_dfs]

    # Create a label mapping for the two unique class labels
    label_mapping = {
        b'negativity': 0,
        b'positivity': 1
    }

    # Apply the label mapping to the training and test sets
    train_labels = train_dfs[0]['cortical'].apply(lambda x: label_mapping[x]).values
    test_labels = test_dfs[0]['cortical'].apply(lambda x: label_mapping[x]).values

    # Normalize the features using StandardScaler for each dimension
    scalers = [StandardScaler() for _ in range(6)]  # 6 dimensions
    train_features_normalized = [scalers[i].fit_transform(train_features[i]) for i in range(6)]
    test_features_normalized = [scalers[i].transform(test_features[i]) for i in range(6)]

    # Stack all dimensions along a new axis (multivariate time-series)
    X_train = np.stack(train_features_normalized, axis=-1)
    X_test_full = np.stack(test_features_normalized, axis=-1)

    # Split the test data into validation and test sets
    X_valid, X_test, y_valid, y_test = train_test_split(X_test_full, test_labels, test_size=0.50, random_state=42)
    y_train = train_labels

    # Convert data to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.int64)

    X_valid = torch.tensor(X_valid, dtype=torch.float32)
    y_valid = torch.tensor(y_valid, dtype=torch.int64)

    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.int64)

    # Output dataset shapes
    print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
    print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

    # Create DataLoaders
    train_dataset = TensorDataset(X_train, y_train)
    valid_dataset = TensorDataset(X_valid, y_valid)
    test_dataset = TensorDataset(X_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

    # Return both the DataLoaders and the raw tensors
    return train_loader, valid_loader, test_loader, X_train, X_valid, X_test, y_train, y_valid, y_test

# Example usage for downloading, extracting, and preprocessing the SelfRegulationSCP1 dataset
if __name__ == "__main__":
    # URL for the dataset
    dataset_name = 'SelfRegulationSCP2'
    dataset_url = 'https://timeseriesclassification.com/aeon-toolkit/SelfRegulationSCP2.zip'

    # Download and extract the dataset
    extract_path = download_dataset(dataset_name, dataset_url)

    # Paths for the ARFF files
    train_arff_paths = [
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension1_TRAIN.arff'),
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension2_TRAIN.arff'),
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension3_TRAIN.arff'),
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension4_TRAIN.arff'),
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension5_TRAIN.arff'),
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension6_TRAIN.arff'),
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension7_TRAIN.arff')
    ]

    test_arff_paths = [
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension1_TEST.arff'),
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension2_TEST.arff'),
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension3_TEST.arff'),
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension4_TEST.arff'),
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension5_TEST.arff'),
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension6_TEST.arff'),
        os.path.join(extract_path, 'SelfRegulationSCP2Dimension7_TEST.arff')
    ]

    # Preprocess the data
    train_loader, valid_loader, test_loader, X_train, X_valid, X_test, y_train, y_valid, y_test = preprocess_data(train_arff_paths, test_arff_paths)

    n_classes = len(torch.unique(y_train))

    # Output the number of classes
    print(f"Number of classes: {n_classes}")


Downloading SelfRegulationSCP2 from https://timeseriesclassification.com/aeon-toolkit/SelfRegulationSCP2.zip...
Extracting SelfRegulationSCP2...
Dataset SelfRegulationSCP2 extracted to datasets/SelfRegulationSCP2.
Loading ARFF file: datasets/SelfRegulationSCP2/SelfRegulationSCP2Dimension1_TRAIN.arff
Loading ARFF file: datasets/SelfRegulationSCP2/SelfRegulationSCP2Dimension2_TRAIN.arff
Loading ARFF file: datasets/SelfRegulationSCP2/SelfRegulationSCP2Dimension3_TRAIN.arff
Loading ARFF file: datasets/SelfRegulationSCP2/SelfRegulationSCP2Dimension4_TRAIN.arff
Loading ARFF file: datasets/SelfRegulationSCP2/SelfRegulationSCP2Dimension5_TRAIN.arff
Loading ARFF file: datasets/SelfRegulationSCP2/SelfRegulationSCP2Dimension6_TRAIN.arff
Loading ARFF file: datasets/SelfRegulationSCP2/SelfRegulationSCP2Dimension7_TRAIN.arff
Loading ARFF file: datasets/SelfRegulationSCP2/SelfRegulationSCP2Dimension1_TEST.arff
Loading ARFF file: datasets/SelfRegulationSCP2/SelfRegulationSCP2Dimension2_TEST.arff
Loadi

In [None]:
# Your existing code with enhancements
benchmark = PositionalEncodingBenchmark()

model_params = {
    'input_timesteps': 1152,
    'in_channels': 6,
    'patch_size': 256,
    'embedding_dim': 32,
    'num_transformer_layers': 4,
    'num_heads': 4,
    'dim_feedforward': 256,
    'dropout': 0.2,
    'num_classes': 2
}

models = benchmark.run_full_benchmark(
    model_params,
    train_loader,
    valid_loader,
    test_loader,
    encodings=['dywpe'],
    n_epochs=100,  # Maximum epochs
    early_stopping_patience=30,  # Stop if no improvement for 15 epochs
    min_delta=0.001,  # Minimum improvement threshold
    save_models=True,  # Save best models
    models_dir="./saved_models"  # Where to save
)

# The summary will now show early stopping information
benchmark.print_summary()

Starting benchmark on 1 positional encodings...
Max epochs per encoding: 100
Early stopping patience: 30
Test set evaluation: ENABLED

Benchmarking: DYWPE2
Early stopping patience: 30
Epoch   1: Train Acc: 0.4740, Val Acc: 0.4219 ⭐ (NEW BEST), Time: 0.94s
Epoch   2: Train Acc: 0.5312, Val Acc: 0.6094 ⭐ (NEW BEST), Time: 0.47s
Epoch   5: Train Acc: 0.4896, Val Acc: 0.6094, Time: 0.26s
Epoch   8: Train Acc: 0.5052, Val Acc: 0.6406 ⭐ (NEW BEST), Time: 0.21s
Epoch  10: Train Acc: 0.5260, Val Acc: 0.4219, Time: 0.17s
Epoch  15: Train Acc: 0.5677, Val Acc: 0.4688, Time: 0.17s
Epoch  20: Train Acc: 0.6354, Val Acc: 0.5156, Time: 0.17s
Epoch  25: Train Acc: 0.6510, Val Acc: 0.5312, Time: 0.19s
Epoch  30: Train Acc: 0.6354, Val Acc: 0.5469, Time: 0.17s
Epoch  35: Train Acc: 0.6406, Val Acc: 0.5156, Time: 0.17s

Early stopping triggered after 38 epochs
Best validation accuracy: 0.6406 at epoch 8
Loaded best model from epoch 8
Final Test Acc: 0.6719
Training Summary:
- Best Val Acc: 0.6406 (epoch