In [5]:
import numpy as np
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from scipy.stats import mode
from aeon.datasets import load_basic_motions
from aeon.transformations.collection.convolution_based import Rocket
from sklearn.preprocessing import LabelEncoder

# Load dataset
X_train, y_train = load_basic_motions(split="train", return_type="numpy3d")
X_test, y_test = load_basic_motions(split="test", return_type="numpy3d")

# Encode string labels as integers
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc = le.transform(y_test)

# Set random seeds
np.random.seed(0)
random_seeds = np.random.randint(0, 10000, size=5)

# For collecting predictions
all_preds = []
individual_accuracies = []

for seed in random_seeds:
    print(f"--- Seed: {seed} ---")

    # ROCKET transform
    rocket = Rocket(random_state=seed)
    X_train_transformed = rocket.fit_transform(X_train)
    X_test_transformed = rocket.transform(X_test)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_transformed)
    X_test_scaled = scaler.transform(X_test_transformed)

    # Train XGBoost
    clf = xgb.XGBClassifier(
        n_estimators=100,
        use_label_encoder=False,
        eval_metric='mlogloss',
        random_state=seed,
        verbosity=0
    )
    clf.fit(X_train_scaled, y_train_enc)

    # Predict
    y_pred = clf.predict(X_test_scaled)
    all_preds.append(y_pred)
    acc = accuracy_score(y_test_enc, y_pred)
    individual_accuracies.append(acc)

    print(f"Accuracy: {acc:.4f}")

# Majority vote across all predictions
all_preds = np.array(all_preds)  # shape (n_models, n_samples)
ensemble_preds, _ = mode(all_preds, axis=0, keepdims=False)

# Final ensemble accuracy
ensemble_acc = accuracy_score(y_test, ensemble_preds)

print("\n--- Summary ---")
print(f"Individual Model Accuracies: {[f'{a:.4f}' for a in individual_accuracies]}")
print(f"Mean Accuracy: {np.mean(individual_accuracies):.4f}")
print(f"Ensemble Accuracy (Majority Vote): {ensemble_acc:.4f}")


--- Seed: 2732 ---
Accuracy: 0.7750
--- Seed: 9845 ---
Accuracy: 0.6750
--- Seed: 3264 ---
Accuracy: 0.6500
--- Seed: 4859 ---
Accuracy: 0.8250
--- Seed: 9225 ---
Accuracy: 0.8750

--- Summary ---
Individual Model Accuracies: ['0.7750', '0.6750', '0.6500', '0.8250', '0.8750']
Mean Accuracy: 0.7600
Ensemble Accuracy (Majority Vote): 0.0000


In [4]:
import numpy as np
import lightgbm as lgb
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from aeon.datasets import load_basic_motions
from aeon.transformations.collection.convolution_based import Rocket

# Load dataset
X_train, y_train = load_basic_motions(split="train", return_type="numpy3d")
X_test, y_test = load_basic_motions(split="test", return_type="numpy3d")

# Encode labels as integers
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc = le.transform(y_test)
n_classes = len(le.classes_)

# Random seeds
np.random.seed(0)
random_seeds = np.random.randint(0, 10000, size=5)

# Store model probabilities and individual accuracies
all_probas = []
individual_accuracies = []

for seed in random_seeds:
    print(f"--- Seed: {seed} ---")

    # ROCKET transform
    rocket = Rocket(random_state=seed)
    X_train_transformed = rocket.fit_transform(X_train)
    X_test_transformed = rocket.transform(X_test)

    # Standardize
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_transformed)
    X_test_scaled = scaler.transform(X_test_transformed)

    # Train LightGBM
    clf = lgb.LGBMClassifier(n_estimators=100, random_state=seed)
    clf.fit(X_train_scaled, y_train_enc)

    # Store predicted probabilities
    y_proba = clf.predict_proba(X_test_scaled)  # shape: (n_samples, n_classes)
    all_probas.append(y_proba)

    # Also store accuracy of hard prediction
    y_pred = clf.predict(X_test_scaled)
    acc = accuracy_score(y_test_enc, y_pred)
    individual_accuracies.append(acc)

    print(f"Accuracy: {acc:.4f}")

# Soft voting: average class probabilities
avg_proba = np.mean(all_probas, axis=0)  # shape: (n_samples, n_classes)
ensemble_preds = np.argmax(avg_proba, axis=1)  # predicted class index

# Final ensemble accuracy
ensemble_acc = accuracy_score(y_test_enc, ensemble_preds)
ensemble_labels = le.inverse_transform(ensemble_preds)

print("\n--- Soft Voting Summary ---")
print(f"Individual Accuracies: {[f'{a:.4f}' for a in individual_accuracies]}")
print(f"Mean Accuracy: {np.mean(individual_accuracies):.4f}")
print(f"Soft Voting Accuracy: {ensemble_acc:.4f}")


--- Seed: 2732 ---
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003080 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 44403
[LightGBM] [Info] Number of data points in the train set: 40, number of used features: 3575
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Accuracy: 1.0000
--- Seed: 9845 ---




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002844 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 42996
[LightGBM] [Info] Number of data points in the train set: 40, number of used features: 3445
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Accuracy: 1.0000
--- Seed: 3264 ---




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002648 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 43895
[LightGBM] [Info] Number of data points in the train set: 40, number of used features: 3517
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Accuracy: 1.0000
--- Seed: 4859 ---




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003347 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 41761
[LightGBM] [Info] Number of data points in the train set: 40, number of used features: 3372
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294




Accuracy: 1.0000
--- Seed: 9225 ---
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002846 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 42515
[LightGBM] [Info] Number of data points in the train set: 40, number of used features: 3408
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Accuracy: 1.0000

--- Soft Voting Summary ---
Individual Accuracies: ['1.0000', '1.0000', '1.0000', '1.0000', '1.0000']
Mean Accuracy: 1.0000
Soft Voting Accuracy: 1.0000


