In [1]:
%load_ext autoreload
%autoreload 2
from mtsccleav import *

In [2]:
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import RidgeClassifierCV

In [3]:
from aeon.classification.convolution_based import (
    MiniRocketClassifier, MultiRocketClassifier, HydraClassifier,
)

In [4]:
from aeon.utils.conversion import convert_collection
from aeon.utils.validation import get_type
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from aeon.transformations.collection.convolution_based import Rocket
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix


In [5]:
from aeon.classification.convolution_based import (
    Arsenal,
    HydraClassifier,
    MiniRocketClassifier,
    MultiRocketClassifier,
    MultiRocketHydraClassifier,
    RocketClassifier,
)

In [6]:
from sklearn.metrics import (
    accuracy_score, f1_score, recall_score,
    confusion_matrix, matthews_corrcoef
)
import numpy as np

In [7]:
from sklearn.model_selection import StratifiedKFold

In [8]:
ts_panel = load_as_pd_multiindex("../data/01_single_multi_eq.csv")
X = construct_X(ts_panel, include_five_p_cleav=False, include_prob=True, include_ss=True)


In [9]:
X

Unnamed: 0_level_0,Unnamed: 1_level_0,three_p_cleav_1,three_p_cleav_compl_1,three_p_cleav_2,three_p_cleav_compl_2,three_p_cleav_prob,three_p_cleav_ss
instances,time_points,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,0,0,1,-1,0.843341,-1
0,1,1,0,0,0,0.000000,0
0,2,0,0,1,-1,0.806891,-1
0,3,1,-1,0,0,0.807178,-1
0,4,-1,0,0,1,0.793192,-1
...,...,...,...,...,...,...,...
1653,9,0,0,1,-1,0.994895,-1
1653,10,1,0,0,0,0.000000,0
1653,11,0,0,1,-1,0.995325,-1
1653,12,0,0,1,-1,0.999313,-1


In [10]:
type(X)

pandas.core.frame.DataFrame

In [11]:
X.isnull().values.any()

np.False_

In [12]:
X.fillna(-100, inplace=True)

In [13]:
X = pad_multiindex_ts(X,-100)

In [14]:
X_convert=convert_collection(X, "np-list")

In [15]:
get_type(X), get_type(X_convert)

('pd-multiindex', 'np-list')

In [18]:
# np.array(X_convert).shape

In [17]:
# X_convert[1] 

In [19]:
y = np.concatenate((np.full((1, 827), 'cleav'), np.full((1, 827), 'non_cleav')), axis=None)
labelencoder = LabelEncoder()
y_transform = labelencoder.fit_transform(y)

In [20]:
def multiclass_specificity(cm):
    FP = cm.sum(axis=0) - np.diag(cm)
    TN = cm.sum() - (FP + cm.sum(axis=1) - np.diag(cm) + np.diag(cm))
    return np.mean(TN / (TN + FP))

In [44]:
def evaluate_classifier(classifier, X, y, n_splits=5, mode="binary"):
    print(f"\n Evaluating {classifier.__class__.__name__}")

    cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    results = {k: [] for k in ["accuracy", "specificity", "sensitivity", "f1", "mcc"]}

    for fold, (train_idx, test_idx) in enumerate(cv.split(X, y), 1):
        # print(f"Fold {fold}")

        X_train = [X[i] for i in train_idx]
        X_test = [X[i] for i in test_idx]
        y_train = y[train_idx]
        y_test = y[test_idx]
    


        clf = classifier
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        mcc = matthews_corrcoef(y_test, y_pred)
        if mode == "binary":
            tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
            accuracy = (tp + tn) / (tp + tn + fp + fn)
            specificity = tn / (tn+fp)
            sensitivity = tp / (tp+fn)
            f1 = 2 * tp / (2 * tp + fp + fn)
            
        elif mode == "multi":
            accuracy = accuracy_score(y_test, y_pred)
            cm = confusion_matrix(y_test, y_pred)
            specificity = multiclass_specificity(cm)
            sensitivity = recall_score(y_test, y_pred, average="macro")
            f1 = f1_score(y_test, y_pred, average="macro")
        else:
            raise ValueError("mode must be either binary or multi.")
    

        results["accuracy"].append(accuracy)
        results["specificity"].append(specificity)
        results["sensitivity"].append(sensitivity)
        results["f1"].append(f1)
        results["mcc"].append(mcc)

    print("\nAverage 5-Fold Results:")
    for metric, scores in results.items():
        print(f"{metric.capitalize():<12}: {np.mean(scores):.4f} ± {np.std(scores):.4f}")

In [22]:
evaluate_classifier(RocketClassifier(random_state=18), X_convert, y_transform)
# evaluate_classifier(MiniRocketClassifier(random_state=18), X_convert, y_transform)
# evaluate_classifier(MultiRocketClassifier(random_state=18), X_convert, y_transform)
# evaluate_classifier(HydraClassifier(random_state=18), X_convert, y_transform)
# evaluate_classifier(MultiRocketHydraClassifier(random_state=18), X_convert, y_transform)


 Evaluating RocketClassifier

Average 5-Fold Results:
Accuracy    : 0.8628 ± 0.0204
Specificity : 0.8501 ± 0.0128
Sensitivity : 0.8754 ± 0.0346
F1          : 0.8641 ± 0.0222
Mcc         : 0.7261 ± 0.0409


In [39]:
# 01_single_multi_eq


# Multi

In [23]:
path = "../data/01_single_multi_eq.csv"
include_prob=True
include_ss=True

In [24]:
ts_panel = load_as_pd_multiindex(path)
X_5p = construct_X(ts_panel, include_five_p_cleav=True, include_prob=include_prob, include_ss=include_ss)
X_5p.columns = X_5p.columns.str.replace("five_p_", "", regex=False)
X_3p = construct_X(ts_panel, include_five_p_cleav=False, include_prob=include_prob, include_ss=include_ss)
X_3p .columns = X_3p.columns.str.replace("three_p_", "", regex=False)

In [25]:
X_5p

Unnamed: 0_level_0,Unnamed: 1_level_0,cleav_1,cleav_compl_1,cleav_2,cleav_compl_2,cleav_prob,cleav_ss
instances,time_points,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,-1,1,0,0,0.999468,1
0,1,1,-1,0,0,0.998997,1
0,2,-1,1,0,0,0.998971,1
0,3,1,-1,0,0,0.999318,1
0,4,0,0,1,-1,0.999742,1
...,...,...,...,...,...,...,...
1653,9,0,0,-1,1,0.992391,1
1653,10,0,0,-1,0,0.000000,0
1653,11,0,0,1,0,0.000000,0
1653,12,0,0,-1,1,0.762377,1


In [26]:
X_3p

Unnamed: 0_level_0,Unnamed: 1_level_0,cleav_1,cleav_compl_1,cleav_2,cleav_compl_2,cleav_prob,cleav_ss
instances,time_points,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,0,0,1,-1,0.843341,-1
0,1,1,0,0,0,0.000000,0
0,2,0,0,1,-1,0.806891,-1
0,3,1,-1,0,0,0.807178,-1
0,4,-1,0,0,1,0.793192,-1
...,...,...,...,...,...,...,...
1653,9,0,0,1,-1,0.994895,-1
1653,10,1,0,0,0,0.000000,0
1653,11,0,0,1,-1,0.995325,-1
1653,12,0,0,1,-1,0.999313,-1


In [27]:
X_3p.fillna(-100, inplace=True)
X_3p = pad_multiindex_ts(X_3p,-100)
X_5p.fillna(-100, inplace=True)
X_5p = pad_multiindex_ts(X_5p,-100)


In [28]:
X_5p_convert=convert_collection(X_5p, "np-list")
X_3p_convert=convert_collection(X_3p, "np-list")

In [35]:
X_convert = np.concatenate((X_5p_convert, X_5p_convert), axis=0)
# print(X_convert)

In [37]:
# np.array(X_convert).shape

In [38]:
y = np.concatenate((np.full((1, 827), 'five_p_cleav'), np.full((1, 827), 'non_cleav'), np.full((1, 827), 'three_p_cleav'), np.full((1, 827), 'non_cleav')), axis=None)
labelencoder = LabelEncoder()
y_transform = labelencoder.fit_transform(y)

In [39]:
type(y_transform)

numpy.ndarray

In [None]:
num_unique = len(np.unique(y_transform))
print(num_unique)
print(np.unique(y_transform))
print(len(np.unique(y_transform)))

3
[0 1 2]
3


In [45]:
evaluate_classifier(RocketClassifier(random_state=18), X_convert, y_transform, mode = "multi")


 Evaluating RocketClassifier

Average 5-Fold Results:
Accuracy    : 0.5290 ± 0.0066
Specificity : 0.7862 ± 0.0040
Sensitivity : 0.3772 ± 0.0090
F1          : 0.3738 ± 0.0102
Mcc         : 0.2428 ± 0.0115


In [40]:
import datetime
print(f"This Notebook last end-to-end runs on: {datetime.datetime.now()}\n")

This Notebook last end-to-end runs on: 2025-07-03 23:50:46.994830

