In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, matthews_corrcoef


import os
import sys

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [None]:
# Get the current working directory (where your notebook is)
current_directory = os.getcwd()


# Construct the full path to your data_utils.py file
data_utils_path = '/content/drive/MyDrive/Colab Notebooks/Bachelor Thesis/Thesis Files/data_utils.py'  # Replace with the actual path

# Add the directory containing data_utils.py to the Python path
sys.path.append(os.path.dirname(data_utils_path))  # Add parent directory of data_utils.py
# Now you can import the custom module
import data_utils as du
import analysis_utils as au

sys.path = []

In [None]:
data_path = "/content/drive/MyDrive/Colab Notebooks/Bachelor Thesis/Data/Including Features/5_class_mid_noise_30s_features_vec.npy"
label_path = "/content/drive/MyDrive/Colab Notebooks/Bachelor Thesis/Data/Including Features/5_class_mid_noise_30s_numeric_label_vec.npy"

data_vec, label_vec = du.load_data(data_path, label_path)

du.data_info(data_vec)

DATASET INFO:
Shape: (10, 20, 5, 7500, 25)

No of Samples: 10

No of Frequencies: 20
Freqency values: [4, 5, 6, 7, 8, 9, 11, 13, 16, 18, 22, 26, 31, 36, 43, 51, 60, 71, 84, 100]

No of noise ratios: 5
Signal to Noise ratios (in db) [-10, -8, -6, -4, -2, 0, 2, 4, 6, 8]

No of Datapoints: 7500

No of Features per Datapoint: 25 (signal, hilbert amp, 20 wavelets for each freq)


### Data Processing



In [None]:
data_vec_trim = data_vec[:1, :, :]
label_vec_trim = label_vec[:1, :, :]

print(data_vec_trim.shape)
print(label_vec_trim.shape)

(1, 20, 5, 7500, 25)
(1, 20, 5, 7500)


### Reduces Sample size massively until it works propertly.

In [None]:
data_vec_shaped, label_vec_shaped = du.preprocess_data(data_vector=data_vec_trim, label_vector=label_vec_trim)

print(data_vec_shaped.shape)
print(label_vec_shaped.shape)

(100, 7500, 25)
(100, 7500, 1)


## Batch & Sequence Length Comparison

### Helper Functions

In [None]:


def kfold_data_split(X, y, n_splits=5):
    """
    Creates K-Fold data splits.

    Args:
        X: Input features.
        y: Labels.
        n_splits: Number of folds.

    Returns:
        A KFold object.
    """
    return KFold(n_splits=n_splits, shuffle=True, random_state=42)


In [None]:

def create_model(input_shape, binary=False, return_sequences=True):
    """
    Creates an LSTM model for binary or multiclass classification.

    Args:
        input_shape: Tuple, shape of the input (seq_len, features).
        binary: Whether the model is binary classification.

    Returns:
        A compiled Keras model.
    """
    model = Sequential([
        LSTM(32, input_shape=input_shape, return_sequences=return_sequences, name="lstm_layer"),
        Dense(1 if binary else 5, activation="sigmoid" if binary else "softmax", name="dense_output")
    ])

    loss = "binary_crossentropy" if binary else "sparse_categorical_crossentropy"
    model.compile(optimizer="adam", loss=loss, metrics=["accuracy"])
    return model


# Utility: Train and evaluate model
def train_and_evaluate(model, X_train, y_train, X_val, y_val, batch_size, epochs=10):
    """
    Trains and evaluates a model.

    Args:
        model: Compiled Keras model.
        X_train, y_train: Training data.
        X_val, y_val: Validation data.
        batch_size: Batch size for training.
        epochs: Number of training epochs.

    Returns:
        Dictionary with training and validation accuracy.
    """
    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_val, y_val), verbose=0)
    train_acc = history.history["accuracy"][-1]
    val_acc = history.history["val_accuracy"][-1]
    return {"train_acc": train_acc, "val_acc": val_acc}

# Utility: Sliding window generator
def sliding_window(data, window_size, stride):
    """
    Generates sliding windows for sequence data.

    Args:
        data: Input data of shape (samples, seq_len, features).
        window_size: Size of each sliding window.
        stride: Step size between windows.

    Returns:
        Sliding window dataset of shape (new_samples, window_size, features).
    """
    windows = []
    for seq in data:
        for i in range(0, len(seq) - window_size + 1, stride):
            windows.append(seq[i:i + window_size])
    return np.array(windows)

# Main Experiment Function
def phase_1_experiments(X, y, binary=False, seq_lengths=[1500, 2500, 7500], batch_sizes=[8, 16, 32], use_sliding=True):
    """
    Phase 1 experiments: Tests seq_len, batch_size, and sliding windows.

    Args:
        X, y: Input data and labels.
        binary: Whether to perform binary classification.
        seq_lengths: List of sequence lengths to test.
        batch_sizes: List of batch sizes to test.
        use_sliding: Whether to test sliding window approach.

    Returns:
        Results dictionary summarizing all experiments.
    """
    results = {}
    num_classes = 2 if binary else 5

    # Train-test split
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Iterate through sequence lengths
    for seq_len in seq_lengths:
        # Truncate/pad sequences to desired length
        X_train_truncated = X_train[:, :seq_len, :]
        X_val_truncated = X_val[:, :seq_len, :]

        # Optional: Apply sliding windows
        if use_sliding:
            X_train_sliding = sliding_window(X_train_truncated, window_size=200, stride=10)
            X_val_sliding = sliding_window(X_val_truncated, window_size=200, stride=10)
            y_train_sliding = np.repeat(y_train, len(X_train_sliding) // len(y_train))
            y_val_sliding = np.repeat(y_val, len(X_val_sliding) // len(y_val))
        else:
            X_train_sliding, X_val_sliding = X_train_truncated, X_val_truncated
            y_train_sliding, y_val_sliding = y_train, y_val

        # Iterate through batch sizes
        for batch_size in batch_sizes:
            # Create and train model
            model = create_lstm(input_shape=(seq_len, X.shape[2]), num_classes=num_classes)
            metrics = train_and_evaluate(model, X_train_sliding, y_train_sliding, X_val_sliding, y_val_sliding, batch_size)

            # Save results
            key = f"seq_len={seq_len}_batch_size={batch_size}_sliding={use_sliding}"
            results[key] = metrics

    return results


In [None]:

def split_sequences_keep_data(X, y, new_seq_len):
    """
    Splits input sequences and their per-timestep labels into smaller sequences of length `new_seq_len`.

    Args:
        X (numpy array): Input data of shape (samples, seq_len, features).
        y (numpy array): Labels of shape (samples, seq_len) or (samples, seq_len, ...).
        new_seq_len (int): Desired sequence length.

    Returns:
        X_new: New input data with shape (new_samples, new_seq_len, features).
        y_new: New labels with shape (new_samples, new_seq_len, ...).
    """
    num_samples, original_seq_len, num_features = X.shape
    if original_seq_len % new_seq_len != 0:
        print(f"Warning: Original seq_len ({original_seq_len}) is not a multiple of new_seq_len ({new_seq_len}).")

    # Number of new samples per original sequence
    num_segments = original_seq_len // new_seq_len

    # Create new datasets by reshaping
    X_new = np.reshape(X[:, :num_segments * new_seq_len, :],
                       (-1, new_seq_len, num_features))  # Shape: (samples * num_segments, new_seq_len, features)
    y_new = np.reshape(y[:, :num_segments * new_seq_len, ...],
                       (-1, new_seq_len, *y.shape[2:]))  # Adjust labels to match input shape

    return X_new, y_new


In [None]:
def train_kfold(X, y, model_fn, binary, batch_size, epochs, n_splits=5):
    """
    Performs K-Fold cross-validation on the dataset.

    Args:
        X: Input data.
        y: Labels.
        model_fn: Function to create the model.
        binary: Whether the model is binary classification.
        batch_size: Batch size for training.
        epochs: Number of training epochs.
        n_splits: Number of folds.

    Returns:
        Dictionary summarizing metrics across folds.
    """
    kfold = kfold_data_split(X, y, n_splits)
    results = {"accuracy": [], "precision": [], "recall": [], "f1": [], "confusion_matrices": []}

    for fold, (train_idx, val_idx) in enumerate(kfold.split(X, y)):
        print(f"Fold {fold + 1}/{n_splits}")

        # Split data
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        # Create and train model
        model = model_fn(input_shape=X.shape[1:], binary=binary)
        model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0)

        # Evaluate on validation fold
        y_pred = model.predict(X_val).argmax(axis=-1) if not binary else (model.predict(X_val) > 0.5).astype(int)
        results["accuracy"].append(accuracy_score(y_val, y_pred))
        results["precision"].append(precision_score(y_val, y_pred, average="binary" if binary else "macro"))
        results["recall"].append(recall_score(y_val, y_pred, average="binary" if binary else "macro"))
        results["f1"].append(f1_score(y_val, y_pred, average="binary" if binary else "macro"))
        results["confusion_matrices"].append(confusion_matrix(y_val, y_pred))

    return results


In [None]:
def compare_configurations(X, y, binary, configs, epochs=10):
    """
    Compare performance for different configurations of seq_len, batch_size, etc.

    Args:
        X: Input data.
        y: Labels.
        binary: Whether it's binary classification.
        configs: List of configurations to test.
        epochs: Number of epochs to train.

    Returns:
        Dictionary summarizing all results.
    """
    all_results = {}

    for config in configs:
        seq_len, batch_size = config["seq_len"], config["batch_size"]
        print(f"Testing Config: seq_len={seq_len}, batch_size={batch_size}")

        # Truncate/pad sequences
        X_truncated = X[:, :seq_len, :]

        # Perform K-Fold training and evaluation
        results = train_kfold(
            X_truncated, y, create_model, binary, batch_size=batch_size, epochs=epochs
        )
        all_results[f"seq_len={seq_len}_batch_size={batch_size}"] = results

    return all_results


In [None]:
# Updated seq_len and batch_size comparison function
def evaluate_seq_len_batch_size(feature_sets, y, model_fn, binary, configs, epochs=10, n_splits=4, use_kfold=False, keep_data=True):
    """
    Evaluates sequence length and batch size for each feature set, with optional k-fold cross-validation.

    Args:
        feature_sets: Dictionary of feature sets to test (e.g., signal, hilbert, wavelets).
        y: Labels corresponding to the feature sets.
        model_fn: Function to create the model.
        binary: Whether the task is binary classification.
        configs: Dictionary of configurations, where each entry has 'seq_len' and 'batch_size' subkeys.
        epochs: Number of epochs for training.
        n_splits: Number of folds for cross-validation (only used if use_kfold is True).
        use_kfold: Boolean, whether to use k-fold cross-validation (True) or simple train/test split (False).

    Returns:
        Dictionary of results for each feature set, seq_len, and batch size.
    """
    results = {}

    for feature_name, X in feature_sets.items():
        print(f"Evaluating feature set: {feature_name} ({X.shape[-1]} features)")
        results[feature_name] = {}

        for config_dict in configs:
            seq_len = config_dict['seq_len']
            batch_size = config_dict['batch_size']

            if keep_data:
                # Trim sequences to the current seq_len
                X_trimmed, y_trimmed = split_sequences_keep_data(X, y, seq_len)
            else:
                X_trimmed, y_trimmed = X[:, :seq_len, :], y[:, :seq_len, :]
            print(X_trimmed.shape)
            print(y_trimmed.shape)

            print(f"Seq_len={seq_len}, Batch_size={batch_size}")

            if use_kfold:
                kfold = kfold_data_split(X_trimmed, y_trimmed, n_splits)
                print(kfold.get_n_splits)
                fold_results = {"accuracy": [], "precision": [], "recall": [], "f1": [], 'mcc': [], 'confusion_matrices': []}

                for fold, (train_idx, val_idx) in enumerate(kfold.split(np.arange(len(X_trimmed)))):
                    print(f"Fold {fold + 1}/{n_splits}")
                    X_train, X_val = X_trimmed[train_idx], X_trimmed[val_idx]
                    y_train, y_val = y_trimmed[train_idx], y_trimmed[val_idx]

                    print(X_train.shape)
                    print(y_train.shape)
                    print(X_val.shape)
                    print(y_val.shape)
                    # Create and train model
                    model = model_fn(input_shape=(seq_len, X.shape[-1]), binary=binary)
                    model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_val, y_val),verbose=0)


                    # Evaluate
                    y_pred = model.predict(X_val).argmax(axis=-1) if not binary else (model.predict(X_val) > 0.5).astype(int)
                    y_pred = y_pred.flatten()
                    y_val = y_val.flatten()

                    fold_results["accuracy"].append(accuracy_score(y_val, y_pred))
                    fold_results["precision"].append(precision_score(y_val, y_pred, average="binary" if binary else "macro"))
                    fold_results["recall"].append(recall_score(y_val, y_pred, average="binary" if binary else "macro"))
                    fold_results["f1"].append(f1_score(y_val, y_pred, average="binary" if binary else "macro"))
                    fold_results["mcc"].append(matthews_corrcoef(y_val, y_pred))
                    fold_results["confusion_matrices"].append(confusion_matrix(y_val, y_pred))

                # Save results for the current seq_len and batch_size
                results[feature_name][f"seq_len={seq_len}_batch_size={batch_size}"] = fold_results
            else:
                # Simple train/test split
                X_train, X_val, y_train, y_val = train_test_split(X_trimmed, y_trimmed, test_size=0.2, random_state=42)

                # Create and train model
                model = model_fn(input_shape=(seq_len, X.shape[-1]), binary=binary)
                model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_val, y_val), verbose=0)

                # Evaluate
                y_pred = model.predict(X_val).argmax(axis=-1) if not binary else (model.predict(X_val) > 0.5).astype(int)
                y_pred = y_pred.flatten()
                y_val = y_val.flatten()

                # Store results in a dictionary
                results[feature_name][f"seq_len={seq_len}_batch_size={batch_size}"] = {
                    "accuracy": [accuracy_score(y_val, y_pred)],
                    "precision": [precision_score(y_val, y_pred, average="binary" if binary else "macro")],
                    "recall": [recall_score(y_val, y_pred, average="binary" if binary else "macro")],
                    "f1": [f1_score(y_val, y_pred, average="binary" if binary else "macro")],
                    "mcc": [matthews_corrcoef(y_val, y_pred)],
                    'confusion_matrices': [confusion_matrix(y_val, y_pred)]
                }
    return results


### Configurations

In [None]:
# Data Config Files

configs = [
    {"seq_len": 250, "batch_size": 8},
    {"seq_len": 250, "batch_size": 64},
    {"seq_len": 500, "batch_size": 8},
    {"seq_len": 500, "batch_size": 32},
    {"seq_len": 1500, "batch_size": 8},
    {"seq_len": 1500, "batch_size": 16},
    {"seq_len": 1500, "batch_size": 32},
    {"seq_len": 7500, "batch_size": 16},
]

# Feature sets dictionary
feature_sets = {
    "signal": data_vec_shaped[:,:,:1],      # 1 feature
    "hilbert": data_vec_shaped[:,:,:5],    # 5 features
    "wavelets": data_vec_shaped            # 25 features
}

y_binary = label_vec_shaped.copy()
y_binary[y_binary >= 1] = 1
y_multiclass = label_vec_shaped

print(y_binary.shape)
print(y_multiclass.shape)

# # Binary Example
# binary_results = compare_configurations(X_binary, y_binary, binary=True, configs=configs)

# # Multiclass Example
# multiclass_results = compare_configurations(X_multiclass, y_multiclass, binary=False, configs=configs)


(100, 7500, 1)
(100, 7500, 1)


In [None]:
# # Assume X (samples, seq_len, features) and y (labels) are already preprocessed
# binary_results = phase_1_experiments(X, y_binary, binary=True)
# multiclass_results = phase_1_experiments(X, y_multiclass, binary=False)

# # Print or save results
# import pprint
# pprint.pprint(binary_results)
# pprint.pprint(multiclass_results)


In [None]:
# Configuration
seq_lens = [250, 500, 1000, 7500]  # Example sequence lengths
batch_sizes = [8, 16, 32]         # Example batch sizes

configs = []
for seq_len in seq_lens:
    for batch_size in batch_sizes:
        configs.append({"seq_len": seq_len, "batch_size": batch_size})
print(configs)

[{'seq_len': 250, 'batch_size': 8}, {'seq_len': 250, 'batch_size': 16}, {'seq_len': 250, 'batch_size': 32}, {'seq_len': 500, 'batch_size': 8}, {'seq_len': 500, 'batch_size': 16}, {'seq_len': 500, 'batch_size': 32}, {'seq_len': 1000, 'batch_size': 8}, {'seq_len': 1000, 'batch_size': 16}, {'seq_len': 1000, 'batch_size': 32}, {'seq_len': 7500, 'batch_size': 8}, {'seq_len': 7500, 'batch_size': 16}, {'seq_len': 7500, 'batch_size': 32}]


### Training Models (keep Data)

Same amount of data -> smaller seq_len has more samples

In [None]:
from multiprocessing import Process

# Binary classification
binary_results = evaluate_seq_len_batch_size(
    feature_sets=feature_sets,
    y=y_binary,  # Binary labels
    model_fn=create_model,  # Function to create LSTM models
    binary=True,
    configs=configs,
    epochs=10,
    n_splits=4,
    use_kfold=False
)


Evaluating feature set: signal (1 features)
(3000, 250, 1)
(3000, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
(3000, 250, 1)
(3000, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 1)
(3000, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(1500, 500, 1)
(1500, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
(1500, 500, 1)
(1500, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(1500, 500, 1)
(1500, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(700, 1000, 1)
(700, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
(700, 1000, 1)
(700, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
(700, 1000, 1)
(700, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step
Evaluating feature set: hilbert (5 features)
(3000, 250, 5)
(3000, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 5)
(3000, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 5)
(3000, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(1500, 500, 5)
(1500, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(1500, 500, 5)
(1500, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
(1500, 500, 5)
(1500, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
(700, 1000, 5)
(700, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
(700, 1000, 5)
(700, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
(700, 1000, 5)
(700, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
Evaluating feature set: wavelets (25 features)
(3000, 250, 25)
(3000, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
(3000, 250, 25)
(3000, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 25)
(3000, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
(1500, 500, 25)
(1500, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
(1500, 500, 25)
(1500, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(1500, 500, 25)
(1500, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(700, 1000, 25)
(700, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
(700, 1000, 25)
(700, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
(700, 1000, 25)
(700, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 189ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step


In [None]:
# Example: Summarizing binary results
for feature, results in binary_results.items():
    print(f"Feature Set: {feature}")
    for config, metrics in results.items():
        print(f"{config}: Accuracy={np.mean(metrics['accuracy']):.2f}, "
              f"F1={np.mean(metrics['f1']):.2f}, "
              f"MCC={np.mean(metrics['mcc']):.2f}")


Feature Set: signal
seq_len=250_batch_size=8: Accuracy=0.85, F1=0.83, MCC=0.69
seq_len=250_batch_size=16: Accuracy=0.84, F1=0.83, MCC=0.68
seq_len=250_batch_size=32: Accuracy=0.84, F1=0.83, MCC=0.68
seq_len=500_batch_size=8: Accuracy=0.86, F1=0.85, MCC=0.72
seq_len=500_batch_size=16: Accuracy=0.85, F1=0.84, MCC=0.71
seq_len=500_batch_size=32: Accuracy=0.85, F1=0.83, MCC=0.69
seq_len=1000_batch_size=8: Accuracy=0.83, F1=0.82, MCC=0.67
seq_len=1000_batch_size=16: Accuracy=0.83, F1=0.81, MCC=0.65
seq_len=1000_batch_size=32: Accuracy=0.82, F1=0.80, MCC=0.63
seq_len=7500_batch_size=8: Accuracy=0.77, F1=0.75, MCC=0.53
seq_len=7500_batch_size=16: Accuracy=0.69, F1=0.63, MCC=0.38
seq_len=7500_batch_size=32: Accuracy=0.64, F1=0.53, MCC=0.27
Feature Set: hilbert
seq_len=250_batch_size=8: Accuracy=0.87, F1=0.86, MCC=0.74
seq_len=250_batch_size=16: Accuracy=0.86, F1=0.85, MCC=0.73
seq_len=250_batch_size=32: Accuracy=0.86, F1=0.85, MCC=0.72
seq_len=500_batch_size=8: Accuracy=0.87, F1=0.87, MCC=0.74

In [None]:
# Multiclass classification
multiclass_results = evaluate_seq_len_batch_size(
    feature_sets=feature_sets,
    y=y_multiclass,  # Multiclass labels
    model_fn=create_model,
    binary=False,
    configs=configs,
    epochs=10,
    n_splits=4,
    use_kfold=False
)


Evaluating feature set: signal (1 features)
(3000, 250, 1)
(3000, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 1)
(3000, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 1)
(3000, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(1500, 500, 1)
(1500, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
(1500, 500, 1)
(1500, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step
(1500, 500, 1)
(1500, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(700, 1000, 1)
(700, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
(700, 1000, 1)
(700, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
(700, 1000, 1)
(700, 1000, 1)
Seq_len=1000, Batch_size=32


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 193ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step
Evaluating feature set: hilbert (5 features)
(3000, 250, 5)
(3000, 250, 1)
Seq_len=250, Batch_size=8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 5)
(3000, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 5)
(3000, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(1500, 500, 5)
(1500, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(1500, 500, 5)
(1500, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
(1500, 500, 5)
(1500, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
(700, 1000, 5)
(700, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
(700, 1000, 5)
(700, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
(700, 1000, 5)
(700, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=32
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step
Evaluating feature set: wavelets (25 features)
(3000, 250, 25)
(3000, 250, 1)
Seq_len=250, Batch_size=8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 25)
(3000, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
(3000, 250, 25)
(3000, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
(1500, 500, 25)
(1500, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
(1500, 500, 25)
(1500, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
(1500, 500, 25)
(1500, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(700, 1000, 25)
(700, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
(700, 1000, 25)
(700, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
(700, 1000, 25)
(700, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=32
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 261ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Example: Summarizing binary results
for feature, results in multiclass_results.items():
    print(f"Feature Set: {feature}")
    for config, metrics in results.items():
        print(f"{config}: Accuracy={np.mean(metrics['accuracy']):.2f}, "
              f"F1={np.mean(metrics['f1']):.2f}, "
              f"MCC={np.mean(metrics['mcc']):.2f}")

Feature Set: signal
seq_len=250_batch_size=8: Accuracy=0.78, F1=0.68, MCC=0.65
seq_len=250_batch_size=16: Accuracy=0.77, F1=0.67, MCC=0.63
seq_len=250_batch_size=32: Accuracy=0.77, F1=0.66, MCC=0.63
seq_len=500_batch_size=8: Accuracy=0.76, F1=0.62, MCC=0.62
seq_len=500_batch_size=16: Accuracy=0.76, F1=0.60, MCC=0.62
seq_len=500_batch_size=32: Accuracy=0.75, F1=0.56, MCC=0.60
seq_len=1000_batch_size=8: Accuracy=0.74, F1=0.60, MCC=0.58
seq_len=1000_batch_size=16: Accuracy=0.72, F1=0.51, MCC=0.54
seq_len=1000_batch_size=32: Accuracy=0.67, F1=0.40, MCC=0.45
seq_len=7500_batch_size=8: Accuracy=0.54, F1=0.18, MCC=0.12
seq_len=7500_batch_size=16: Accuracy=0.54, F1=0.15, MCC=0.06
seq_len=7500_batch_size=32: Accuracy=0.53, F1=0.14, MCC=0.01
Feature Set: hilbert
seq_len=250_batch_size=8: Accuracy=0.84, F1=0.80, MCC=0.75
seq_len=250_batch_size=16: Accuracy=0.84, F1=0.80, MCC=0.75
seq_len=250_batch_size=32: Accuracy=0.83, F1=0.79, MCC=0.73
seq_len=500_batch_size=8: Accuracy=0.84, F1=0.79, MCC=0.76

The results show a clear Trend.

For both binary and multi setup, it seems that a sequence length **500 timesteps and a batch_site of 16** is best. I dont want to go shorter because that alredy only 2 seconds and i want to ensure that each sample has at least one burst from start to end inside.

One must consider that these were the results for 10 epochs. Longer sequences may just need more time updates that the shorter sequences got, by having a larger sample size.

I could also make the smaller sequences have the same sample size as the larger ones, to see if that was the main reason, why longer sequences performed better.

In [None]:
# prompt: Help me save the binary_results and multi_class results

import pickle

basePath = "/content/drive/MyDrive/Colab Notebooks/Bachelor Thesis/Data/Model Performances/LSTM Model Set-Up Performances"

# Save binary_results
with open('binary_results_batch_len_comparison.pkl', 'wb') as f:
    pickle.dump(binary_results, f)

# Save multiclass_results
with open('multiclass_results_batch_len_comparison.pkl', 'wb') as f:
    pickle.dump(multiclass_results, f)

# Download the files
from google.colab import files
files.download('binary_results_batch_len_comparison.pkl')
files.download('multiclass_results_batch_len_comparison.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Training Models (remove Data)

In [None]:
# Binary classification
binary_results = evaluate_seq_len_batch_size(
    feature_sets=feature_sets,
    y=y_binary,  # Binary labels
    model_fn=create_model,  # Function to create LSTM models
    binary=True,
    configs=configs,
    epochs=10,
    n_splits=4,
    use_kfold=False,
    keep_data=False
)

Evaluating feature set: signal (1 features)
(100, 250, 1)
(100, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
(100, 250, 1)
(100, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
(100, 250, 1)
(100, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
(100, 500, 1)
(100, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
(100, 500, 1)
(100, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
(100, 500, 1)
(100, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
(100, 1000, 1)
(100, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
(100, 1000, 1)
(100, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step
(100, 1000, 1)
(100, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 259ms/step
Evaluating feature set: hilbert (5 features)
(100, 250, 5)
(100, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step
(100, 250, 5)
(100, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
(100, 250, 5)
(100, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
(100, 500, 5)
(100, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step
(100, 500, 5)
(100, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
(100, 500, 5)
(100, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
(100, 1000, 5)
(100, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step
(100, 1000, 5)
(100, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
(100, 1000, 5)
(100, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step
Evaluating feature set: wavelets (25 features)
(100, 250, 25)
(100, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
(100, 250, 25)
(100, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
(100, 250, 25)
(100, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
(100, 500, 25)
(100, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
(100, 500, 25)
(100, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step
(100, 500, 25)
(100, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
(100, 1000, 25)
(100, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step
(100, 1000, 25)
(100, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step
(100, 1000, 25)
(100, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166ms/step


In [None]:
# Multiclass classification
multiclass_results = evaluate_seq_len_batch_size(
    feature_sets=feature_sets,
    y=y_multiclass,  # Multiclass labels
    model_fn=create_model,
    binary=False,
    configs=configs,
    epochs=10,
    n_splits=4,
    use_kfold=False,
    keep_data=False
)

Evaluating feature set: signal (1 features)
(100, 250, 1)
(100, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
(100, 250, 1)
(100, 250, 1)
Seq_len=250, Batch_size=16


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step
(100, 250, 1)
(100, 250, 1)
Seq_len=250, Batch_size=32


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step
(100, 500, 1)
(100, 500, 1)
Seq_len=500, Batch_size=8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
(100, 500, 1)
(100, 500, 1)
Seq_len=500, Batch_size=16


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step
(100, 500, 1)
(100, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
(100, 1000, 1)
(100, 1000, 1)
Seq_len=1000, Batch_size=8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step
(100, 1000, 1)
(100, 1000, 1)
Seq_len=1000, Batch_size=16


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step
(100, 1000, 1)
(100, 1000, 1)
Seq_len=1000, Batch_size=32


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=32
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


Evaluating feature set: hilbert (5 features)
(100, 250, 5)
(100, 250, 1)
Seq_len=250, Batch_size=8
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
(100, 250, 5)
(100, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step
(100, 250, 5)
(100, 250, 1)
Seq_len=250, Batch_size=32


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
(100, 500, 5)
(100, 500, 1)
Seq_len=500, Batch_size=8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
(100, 500, 5)
(100, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
(100, 500, 5)
(100, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
(100, 1000, 5)
(100, 1000, 1)
Seq_len=1000, Batch_size=8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step
(100, 1000, 5)
(100, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
(100, 1000, 5)
(100, 1000, 1)
Seq_len=1000, Batch_size=32


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 149ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


Evaluating feature set: wavelets (25 features)
(100, 250, 25)
(100, 250, 1)
Seq_len=250, Batch_size=8
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
(100, 250, 25)
(100, 250, 1)
Seq_len=250, Batch_size=16


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
(100, 250, 25)
(100, 250, 1)
Seq_len=250, Batch_size=32


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step
(100, 500, 25)
(100, 500, 1)
Seq_len=500, Batch_size=8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
(100, 500, 25)
(100, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
(100, 500, 25)
(100, 500, 1)
Seq_len=500, Batch_size=32


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
(100, 1000, 25)
(100, 1000, 1)
Seq_len=1000, Batch_size=8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
(100, 1000, 25)
(100, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
(100, 1000, 25)
(100, 1000, 1)
Seq_len=1000, Batch_size=32


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 184ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 230ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Example: Summarizing binary results
for feature, results in binary_results.items():
    print(f"Feature Set: {feature}")
    for config, metrics in results.items():
        print(f"{config}: Accuracy={np.mean(metrics['accuracy']):.2f}, "
              f"F1={np.mean(metrics['f1']):.2f}, "
              f"MCC={np.mean(metrics['mcc']):.2f}")

Feature Set: signal
seq_len=250_batch_size=8: Accuracy=0.79, F1=0.81, MCC=0.57
seq_len=250_batch_size=16: Accuracy=0.74, F1=0.76, MCC=0.47
seq_len=250_batch_size=32: Accuracy=0.67, F1=0.74, MCC=0.32
seq_len=500_batch_size=8: Accuracy=0.75, F1=0.77, MCC=0.52
seq_len=500_batch_size=16: Accuracy=0.71, F1=0.67, MCC=0.42
seq_len=500_batch_size=32: Accuracy=0.69, F1=0.62, MCC=0.40
seq_len=1000_batch_size=8: Accuracy=0.77, F1=0.76, MCC=0.53
seq_len=1000_batch_size=16: Accuracy=0.71, F1=0.69, MCC=0.41
seq_len=1000_batch_size=32: Accuracy=0.66, F1=0.59, MCC=0.33
seq_len=7500_batch_size=8: Accuracy=0.77, F1=0.75, MCC=0.53
seq_len=7500_batch_size=16: Accuracy=0.71, F1=0.66, MCC=0.41
seq_len=7500_batch_size=32: Accuracy=0.66, F1=0.53, MCC=0.33
Feature Set: hilbert
seq_len=250_batch_size=8: Accuracy=0.77, F1=0.81, MCC=0.54
seq_len=250_batch_size=16: Accuracy=0.74, F1=0.76, MCC=0.48
seq_len=250_batch_size=32: Accuracy=0.74, F1=0.78, MCC=0.47
seq_len=500_batch_size=8: Accuracy=0.77, F1=0.76, MCC=0.55

In [None]:
# Example: Summarizing binary results
for feature, results in multiclass_results.items():
    print(f"Feature Set: {feature}")
    for config, metrics in results.items():
        print(f"{config}: Accuracy={np.mean(metrics['accuracy']):.2f}, "
              f"F1={np.mean(metrics['f1']):.2f}, "
              f"MCC={np.mean(metrics['mcc']):.2f}")

Feature Set: signal
seq_len=250_batch_size=8: Accuracy=0.49, F1=0.23, MCC=0.20
seq_len=250_batch_size=16: Accuracy=0.46, F1=0.17, MCC=0.12
seq_len=250_batch_size=32: Accuracy=0.44, F1=0.13, MCC=0.04
seq_len=500_batch_size=8: Accuracy=0.55, F1=0.25, MCC=0.21
seq_len=500_batch_size=16: Accuracy=0.53, F1=0.20, MCC=0.16
seq_len=500_batch_size=32: Accuracy=0.51, F1=0.14, MCC=0.04
seq_len=1000_batch_size=8: Accuracy=0.53, F1=0.19, MCC=0.13
seq_len=1000_batch_size=16: Accuracy=0.52, F1=0.14, MCC=0.04
seq_len=1000_batch_size=32: Accuracy=0.52, F1=0.14, MCC=0.02
seq_len=7500_batch_size=8: Accuracy=0.56, F1=0.23, MCC=0.18
seq_len=7500_batch_size=16: Accuracy=0.53, F1=0.14, MCC=0.04
seq_len=7500_batch_size=32: Accuracy=0.53, F1=0.15, MCC=0.05
Feature Set: hilbert
seq_len=250_batch_size=8: Accuracy=0.70, F1=0.66, MCC=0.58
seq_len=250_batch_size=16: Accuracy=0.48, F1=0.21, MCC=0.21
seq_len=250_batch_size=32: Accuracy=0.46, F1=0.16, MCC=0.12
seq_len=500_batch_size=8: Accuracy=0.73, F1=0.65, MCC=0.57

When cutting of the amount of training data the results are much closer together and it is clear to see that small batch sizes outperform larger ones.

This again has to do with the amount of updated performed.

Now, lastly I want to run first comparison but with more training epochs

### Trainnig Models (25 Epochs)

In [None]:
# Binary classification
binary_results = evaluate_seq_len_batch_size(
    feature_sets=feature_sets,
    y=y_binary,  # Binary labels
    model_fn=create_model,  # Function to create LSTM models
    binary=True,
    configs=configs,
    epochs=25,
    n_splits=4,
    use_kfold=False,
    keep_data=True
)

Evaluating feature set: signal (1 features)
(3000, 250, 1)
(3000, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 1)
(3000, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 1)
(3000, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(1500, 500, 1)
(1500, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
(1500, 500, 1)
(1500, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
(1500, 500, 1)
(1500, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
(700, 1000, 1)
(700, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
(700, 1000, 1)
(700, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
(700, 1000, 1)
(700, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
Evaluating feature set: hilbert (5 features)
(3000, 250, 5)
(3000, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 5)
(3000, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 5)
(3000, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
(1500, 500, 5)
(1500, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
(1500, 500, 5)
(1500, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(1500, 500, 5)
(1500, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
(700, 1000, 5)
(700, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
(700, 1000, 5)
(700, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
(700, 1000, 5)
(700, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step
Evaluating feature set: wavelets (25 features)
(3000, 250, 25)
(3000, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 25)
(3000, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 25)
(3000, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(1500, 500, 25)
(1500, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(1500, 500, 25)
(1500, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
(1500, 500, 25)
(1500, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(700, 1000, 25)
(700, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
(700, 1000, 25)
(700, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
(700, 1000, 25)
(700, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 193ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 246ms/step


In [None]:
# Multiclass classification
multiclass_results = evaluate_seq_len_batch_size(
    feature_sets=feature_sets,
    y=y_multiclass,  # Multiclass labels
    model_fn=create_model,
    binary=False,
    configs=configs,
    epochs=25,
    n_splits=4,
    use_kfold=False,
    keep_data=True
)

Evaluating feature set: signal (1 features)
(3000, 250, 1)
(3000, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 1)
(3000, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
(3000, 250, 1)
(3000, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
(1500, 500, 1)
(1500, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(1500, 500, 1)
(1500, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(1500, 500, 1)
(1500, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(700, 1000, 1)
(700, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
(700, 1000, 1)
(700, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
(700, 1000, 1)
(700, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step
(100, 7500, 1)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step
Evaluating feature set: hilbert (5 features)
(3000, 250, 5)
(3000, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 5)
(3000, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 5)
(3000, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(1500, 500, 5)
(1500, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
(1500, 500, 5)
(1500, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
(1500, 500, 5)
(1500, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
(700, 1000, 5)
(700, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
(700, 1000, 5)
(700, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
(700, 1000, 5)
(700, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
(100, 7500, 5)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 179ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluating feature set: wavelets (25 features)
(3000, 250, 25)
(3000, 250, 1)
Seq_len=250, Batch_size=8


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
(3000, 250, 25)
(3000, 250, 1)
Seq_len=250, Batch_size=16


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
(3000, 250, 25)
(3000, 250, 1)
Seq_len=250, Batch_size=32


  super().__init__(**kwargs)


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
(1500, 500, 25)
(1500, 500, 1)
Seq_len=500, Batch_size=8


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
(1500, 500, 25)
(1500, 500, 1)
Seq_len=500, Batch_size=16


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
(1500, 500, 25)
(1500, 500, 1)
Seq_len=500, Batch_size=32


  super().__init__(**kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
(700, 1000, 25)
(700, 1000, 1)
Seq_len=1000, Batch_size=8


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
(700, 1000, 25)
(700, 1000, 1)
Seq_len=1000, Batch_size=16


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
(700, 1000, 25)
(700, 1000, 1)
Seq_len=1000, Batch_size=32


  super().__init__(**kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=8


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=16


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step
(100, 7500, 25)
(100, 7500, 1)
Seq_len=7500, Batch_size=32


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 259ms/step


In [None]:
# Example: Summarizing binary results
for feature, results in binary_results.items():
    print(f"Feature Set: {feature}")
    for config, metrics in results.items():
        print(f"{config}: Accuracy={np.mean(metrics['accuracy']):.2f}, "
              f"F1={np.mean(metrics['f1']):.2f}, "
              f"MCC={np.mean(metrics['mcc']):.2f}")

Feature Set: signal
seq_len=250_batch_size=8: Accuracy=0.85, F1=0.84, MCC=0.70
seq_len=250_batch_size=16: Accuracy=0.85, F1=0.83, MCC=0.70
seq_len=250_batch_size=32: Accuracy=0.85, F1=0.83, MCC=0.69
seq_len=500_batch_size=8: Accuracy=0.87, F1=0.86, MCC=0.74
seq_len=500_batch_size=16: Accuracy=0.87, F1=0.86, MCC=0.73
seq_len=500_batch_size=32: Accuracy=0.86, F1=0.85, MCC=0.72
seq_len=1000_batch_size=8: Accuracy=0.84, F1=0.83, MCC=0.68
seq_len=1000_batch_size=16: Accuracy=0.84, F1=0.83, MCC=0.68
seq_len=1000_batch_size=32: Accuracy=0.84, F1=0.82, MCC=0.67
seq_len=7500_batch_size=8: Accuracy=0.81, F1=0.80, MCC=0.63
seq_len=7500_batch_size=16: Accuracy=0.79, F1=0.76, MCC=0.57
seq_len=7500_batch_size=32: Accuracy=0.74, F1=0.71, MCC=0.48
Feature Set: hilbert
seq_len=250_batch_size=8: Accuracy=0.87, F1=0.86, MCC=0.73
seq_len=250_batch_size=16: Accuracy=0.87, F1=0.86, MCC=0.74
seq_len=250_batch_size=32: Accuracy=0.87, F1=0.86, MCC=0.74
seq_len=500_batch_size=8: Accuracy=0.89, F1=0.88, MCC=0.77

In [None]:
# Example: Summarizing binary results
for feature, results in multiclass_results.items():
    print(f"Feature Set: {feature}")
    for config, metrics in results.items():
        print(f"{config}: Accuracy={np.mean(metrics['accuracy']):.2f}, "
              f"F1={np.mean(metrics['f1']):.2f}, "
              f"MCC={np.mean(metrics['mcc']):.2f}")

Feature Set: signal
seq_len=250_batch_size=8: Accuracy=0.80, F1=0.72, MCC=0.69
seq_len=250_batch_size=16: Accuracy=0.78, F1=0.70, MCC=0.66
seq_len=250_batch_size=32: Accuracy=0.77, F1=0.68, MCC=0.64
seq_len=500_batch_size=8: Accuracy=0.73, F1=0.46, MCC=0.56
seq_len=500_batch_size=16: Accuracy=0.76, F1=0.63, MCC=0.62
seq_len=500_batch_size=32: Accuracy=0.79, F1=0.70, MCC=0.67
seq_len=1000_batch_size=8: Accuracy=0.75, F1=0.63, MCC=0.60
seq_len=1000_batch_size=16: Accuracy=0.70, F1=0.51, MCC=0.51
seq_len=1000_batch_size=32: Accuracy=0.73, F1=0.57, MCC=0.56
seq_len=7500_batch_size=8: Accuracy=0.66, F1=0.43, MCC=0.44
seq_len=7500_batch_size=16: Accuracy=0.59, F1=0.31, MCC=0.29
seq_len=7500_batch_size=32: Accuracy=0.54, F1=0.19, MCC=0.13
Feature Set: hilbert
seq_len=250_batch_size=8: Accuracy=0.85, F1=0.82, MCC=0.77
seq_len=250_batch_size=16: Accuracy=0.85, F1=0.81, MCC=0.77
seq_len=250_batch_size=32: Accuracy=0.84, F1=0.80, MCC=0.75
seq_len=500_batch_size=8: Accuracy=0.86, F1=0.82, MCC=0.78

In [None]:
# Save binary_results
with open('binary_results_batch_len_comparison-25epoch.pkl', 'wb') as f:
    pickle.dump(binary_results, f)

# Save multiclass_results
with open('multiclass_results_batch_len_comparison-25epoch.pkl', 'wb') as f:
    pickle.dump(multiclass_results, f)

# Download the files
from google.colab import files
files.download('binary_results_batch_len_comparison.pkl')
files.download('multiclass_results_batch_len_comparison.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
np.random.seed(42)
data_v = np.random.rand(100, 7500, 50, 25)  # Shape: (N_samples, seq_len, N_features)
label_v = np.random.rand(100, 7500, 1)  # Shape: (N_samples, seq_len, 1)

In [None]:
X = data_v.reshape(-1, data_v.shape[-2], data_v.shape[-1])
y = label_vec[:, ::50].reshape(-1, 1)

### Conclusion

Best Model Set-Up:
- Seq_len=500
- batch_size=16

# Sliding Windows

In [None]:
print(data_vec_shaped.shape)
print(label_vec_shaped.shape)

(100, 7500, 25)
(100, 7500, 1)


In [None]:
X_train, X_val, y_train, y_val = train_test_split(data_vec_shaped, label_vec_shaped, test_size=0.2, random_state=42)

print(X_train.shape)
print(X_val.shape)
print(y_train.shape)
print(y_val.shape)

(80, 7500, 25)
(20, 7500, 25)
(80, 7500, 1)
(20, 7500, 1)


In [None]:
def create_window_model(input_shape, binary=False):
    """
    Creates an LSTM model for binary or multiclass classification.

    Args:
        input_shape: Tuple, shape of the input (seq_len, features).
        binary: Whether the model is binary classification.

    Returns:
        A compiled Keras model.
    """
    model = Sequential([
        LSTM(32, input_shape=input_shape, return_sequences=False, name="lstm_layer"),
        Dense(1 if binary else 5, activation="sigmoid" if binary else "softmax", name="dense_output")
    ])

    loss = "binary_crossentropy" if binary else "sparse_categorical_crossentropy"
    model.compile(optimizer="adam", loss=loss, metrics=["accuracy"])
    return model

In [None]:
import numpy as np
import tensorflow as tf

class SlidingWindowGenerator(tf.keras.utils.Sequence):
    def __init__(self, data, labels, window_size, batch_size, shuffle=True):
        """
        Sliding Window Generator for Training

        :param data: ndarray, shape (samples, timesteps, features)
        :param labels: ndarray, shape (samples, timesteps) or (samples, timesteps, num_classes)
        :param window_size: int, size of each sliding window
        :param batch_size: int, number of samples per batch
        :param shuffle: bool, whether to shuffle data at the start of each epoch
        """
        self.data = data
        self.labels = labels
        self.window_size = window_size
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indices = np.arange(data.shape[1] - window_size)  # Indices for windows
        self.on_epoch_end()

    def __len__(self):
        """Number of batches per epoch"""
        return (len(self.indices) * self.data.shape[0]) // self.batch_size

    def __getitem__(self, idx):
        """Generate one batch of data"""
        batch_x, batch_y = [], []

        for _ in range(self.batch_size):
            sample_idx = np.random.randint(0, self.data.shape[0])  # Randomly choose a sample
            start_idx = np.random.choice(self.indices)  # Randomly choose a start index

            # Extract sliding window
            window = self.data[sample_idx, start_idx:start_idx + self.window_size, :]
            label = self.labels[sample_idx, start_idx + self.window_size - 1]  # Predict last timestep

            batch_x.append(window)
            batch_y.append(label)

        return np.array(batch_x), np.array(batch_y)

    def on_epoch_end(self):
        """Shuffle data at the end of each epoch"""
        if self.shuffle:
            np.random.shuffle(self.indices)


In [None]:
import time
# Define parameters
window_sizes = [50, 200]
batch_size = 256

# Create data generators for different window sizes
train_gen_50 = SlidingWindowGenerator(X_train, y_train, window_size=50, batch_size=batch_size)
val_gen_50 = SlidingWindowGenerator(X_val, y_val, window_size=50, batch_size=batch_size, shuffle=False)

train_gen_200 = SlidingWindowGenerator(X_train, y_train, window_size=200, batch_size=batch_size)
val_gen_200 = SlidingWindowGenerator(X_val, y_val, window_size=200, batch_size=batch_size, shuffle=False)

# Build models
model_50 = create_window_model((50, X_train.shape[-1]), binary=False)
model_200 = create_window_model((200, X_train.shape[-1]), binary=False)

start_time = time.time()
# Train model with smaller window
history_50 = model_50.fit(train_gen_50, validation_data=val_gen_50, epochs=10)

# Measure training time
training_time_50 = time.time() - start_time
print(training_time_50)

start_time = time.time()
# Train model with larger window
history_200 = model_200.fit(train_gen_200, validation_data=val_gen_200, epochs=10)

# Measure training time
training_time_200 = time.time() - start_time
print(training_time_200)


Epoch 1/10
[1m2328/2328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 16ms/step - accuracy: 0.8131 - loss: 0.5030 - val_accuracy: 0.8282 - val_loss: 0.4620
Epoch 2/10
[1m2328/2328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 16ms/step - accuracy: 0.8872 - loss: 0.3012 - val_accuracy: 0.8412 - val_loss: 0.4276
Epoch 3/10
[1m2328/2328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 17ms/step - accuracy: 0.8951 - loss: 0.2787 - val_accuracy: 0.8413 - val_loss: 0.4305
Epoch 4/10
[1m2328/2328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 16ms/step - accuracy: 0.8999 - loss: 0.2642 - val_accuracy: 0.8376 - val_loss: 0.4283
Epoch 5/10
[1m2328/2328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 15ms/step - accuracy: 0.9046 - loss: 0.2523 - val_accuracy: 0.8399 - val_loss: 0.4343
Epoch 6/10
[1m2328/2328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 16ms/step - accuracy: 0.9071 - loss: 0.2456 - val_accuracy: 0.8380 - val_loss: 0.4820
Epoc

In [None]:
basePathModelComp = "/content/drive/MyDrive/Colab Notebooks/Bachelor Thesis/Data/LSTM Analysis Data/"


# Save the trained models
model_50.save(f'{basePathModelComp}model_50.h5')
model_200.save(f'{basePathModelComp}model_200.h5')

# Download the models
from google.colab import files
files.download('model_50.h5')
# files.download('model_200.h5')




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from sklearn.metrics import accuracy_score, matthews_corrcoef, confusion_matrix, mean_absolute_error, recall_score, f1_score
import time

def evaluate_model(model, generator, binary):
    """
    Evaluates the model using a sliding window generator.

    Args:
        model: The trained Keras model.
        generator: The SlidingWindowGenerator instance for the validation set.
        binary: Whether the model is for binary classification.

    Returns:
        A dictionary containing the evaluation metrics.
    """

    start_time = time.time()
    y_true = []
    y_pred = []
    for i in range(len(generator)):
        X_batch, y_batch = generator[i]  # Get a batch of data
        y_true.extend(y_batch)
        y_pred_batch = model.predict(X_batch).argmax(axis=-1) if not binary else (model.predict(X_batch) > 0.5).astype(int)
        y_pred.extend(y_pred_batch)
    inference_time = time.time() - start_time

    accuracy = accuracy_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    recall = recall_score(y_true, y_pred, average="binary" if binary else "macro")
    f1 = f1_score(y_true, y_pred, average="binary" if binary else "macro")
    cm = confusion_matrix(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)

    print(f"Accuracy: {accuracy}")
    print(f"MAE: {mae}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")
    print(f"Confusion Matrix:\n{cm}")
    print(f"MCC: {mcc}")
    print(f"Inference Time: {inference_time:.4f} seconds")

    return {
        "accuracy": accuracy,
        "mcc": mcc,
        "mae": mae,
        "recall": recall,
        "f1": f1,
        "confusion_matrix": cm,
        "inference_time": inference_time
    }

In [None]:
# Assuming you have your X_val and y_val
window_size = 200  # Or your desired window size
batch_size = 256   # Or your desired batch size
val_generator = SlidingWindowGenerator(X_val, y_val, window_size, batch_size, shuffle=False)

results = evaluate_model(model_200, val_generator, binary=False)  # Set binary to True if it's a binary classification task

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9m

In [None]:
results = {"accuracy": [], "precision": [], "recall": [], "f1": [], "confusion_matrices": [], "mcc": []}

y_pred = model.predict(X_val).argmax(axis=-1) if not binary else (model.predict(X_val) > 0.5).astype(int)
results["accuracy"].append(accuracy_score(y_val, y_pred))
results["precision"].append(precision_score(y_val, y_pred, average="binary" if binary else "macro"))
results["recall"].append(recall_score(y_val, y_pred, average="binary" if binary else "macro"))
results["f1"].append(f1_score(y_val, y_pred, average="binary" if binary else "macro"))
results["confusion_matrices"].append(confusion_matrix(y_val, y_pred))
results["mcc"].append(matthews_corrcoef(y_val, y_pred))

# Example: Summarizing binary results
for config, metrics in results.items():
    print(f"{config} = {np.mean(metrics):.2f}, ")

NameError: name 'binary' is not defined

In [None]:
import matplotlib.pyplot as plt

def plot_training(history_50, history_200):
    plt.figure(figsize=(12,5))

    # Plot Loss
    plt.subplot(1,2,1)
    plt.plot(history_50.history["loss"], label="Window 50 - Train")
    plt.plot(history_50.history["val_loss"], label="Window 50 - Val")
    plt.plot(history_200.history["loss"], label="Window 200 - Train")
    plt.plot(history_200.history["val_loss"], label="Window 200 - Val")
    plt.title("Loss over Epochs")
    plt.legend()

    # Plot Accuracy
    plt.subplot(1,2,2)
    plt.plot(history_50.history["accuracy"], label="Window 50 - Train")
    plt.plot(history_50.history["val_accuracy"], label="Window 50 - Val")
    plt.plot(history_200.history["accuracy"], label="Window 200 - Train")
    plt.plot(history_200.history["val_accuracy"], label="Window 200 - Val")
    plt.title("Accuracy over Epochs")
    plt.legend()

    plt.show()

# Compare results
plot_training(history_50, history_200)
