In [1]:
import nb_utils
import numpy as np
from gtda.diagrams import NumberOfPoints, Scaler
from gtda.homology import VietorisRipsPersistence
from gtda.metaestimators import CollectionTransformer
from gtda.pipeline import Pipeline
from gtda.time_series import TakensEmbedding
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split
from src import acf_utils, sim


def get_accuracy(model, X_train, X_valid, y_train, y_valid):
    """returns the 1) classification accuracy and 2) area under ROC"""
    return (
        f"Accuracy on train: {accuracy_score(model.predict(X_train), y_train):.3f}",
        f"ROC AUC on train: {roc_auc_score(y_train, model.predict_proba(X_train)[:, 1]):.3f}",
        f"Accuracy on valid: {accuracy_score(model.predict(X_valid), y_valid):.3f}",
        f"ROC AUC on valid: {roc_auc_score(y_valid, model.predict_proba(X_valid)[:, 1]):.3f}",
    )

### $\rho_k$: sublevel set filtration

In [2]:
def sim_xy(idx, n_lags, features):
    """returns persistence entropy from sublevel set filtration of acf"""
    y = idx % 2
    phis = nb_utils.gen_ar2_coeffs(oscillatory=y, phi1="positive", random_seed=idx)
    x_acf = acf_utils.ar_to_acf(phis, n_lags=n_lags)  # theoretical acf
    x_acf_ssf = nb_utils.sublevel_set_filtration(x_acf)  # sublevel-set filtration -> PH
    x_acf_pe = features.fit_transform(x_acf_ssf)  #
    return x_acf_pe.squeeze(), y


n_lags, n_repeats = 50, 5000  # simulation settings
features = NumberOfPoints()
model = LogisticRegression()  # classification settings

Xs, ys = zip(*[(sim_xy(idx, n_lags, features)) for idx in range(n_repeats)])
Xs, ys = np.array(Xs).reshape(-1, 1), np.array(ys)

# fit classifier
X_train, X_valid, y_train, y_valid = train_test_split(Xs, ys, test_size=0.2, random_state=0)
model.fit(X_train, y_train)
get_accuracy(model, X_train, X_valid, y_train, y_valid)

('Accuracy on train: 0.823',
 'ROC AUC on train: 0.889',
 'Accuracy on valid: 0.841',
 'ROC AUC on valid: 0.896')

### $\rho_k$: Taken's embedding

In [3]:
def sim_xy(idx, n_lags):
    """return persistence entropy from takens embedding of acf"""
    y = idx % 2
    phis = nb_utils.gen_ar2_coeffs(oscillatory=y, phi1="positive", random_seed=idx)
    x_acf = acf_utils.ar_to_acf(phis, n_lags=n_lags)  # theoretical acf
    return x_acf, y


n_jobs = -1
pipeline = Pipeline(
    [
        ("emb", TakensEmbedding(time_delay=1, dimension=2)),
        ("pca", CollectionTransformer(PCA(n_components=2), n_jobs=n_jobs)),
        ("vr", VietorisRipsPersistence(homology_dimensions=(0, 1), n_jobs=n_jobs)),
        ("scaler", Scaler(metric="bottleneck")),
        ("features", NumberOfPoints()),
    ]
)


Xs_acf, ys = zip(*[(sim_xy(idx, n_lags)) for idx in range(n_repeats)])
Xs_acf, ys = np.array(Xs_acf), np.array(ys)
Xs = pipeline.fit_transform(Xs_acf)

# fit classifier
X_train, X_valid, y_train, y_valid = train_test_split(Xs, ys, test_size=0.1, random_state=0)
model.fit(X_train, y_train)
get_accuracy(model, X_train, X_valid, y_train, y_valid)

('Accuracy on train: 0.881',
 'ROC AUC on train: 0.935',
 'Accuracy on valid: 0.860',
 'ROC AUC on valid: 0.916')

### $x_t$: Taken's embedding

In [4]:
def sim_xy(idx, n_timepoints):
    """return persistence entropy from takens embedding of timeseries"""
    y = idx % 2
    phis = nb_utils.gen_ar2_coeffs(oscillatory=y, phi1="postive", random_seed=idx)
    x = sim.sim_ar(phis, n_timepoints, random_seed=idx).squeeze()
    return x, y


n_timepoints = 250

Xs_ts, ys = zip(*[(sim_xy(idx, n_timepoints)) for idx in range(n_repeats)])
Xs_ts, ys = np.array(Xs_ts), np.array(ys)
Xs = pipeline.fit_transform(Xs_ts)


# fit classifier
X_train, X_valid, y_train, y_valid = train_test_split(Xs, ys, test_size=0.1, random_state=0)
model.fit(X_train, y_train)
get_accuracy(model, X_train, X_valid, y_train, y_valid)

('Accuracy on train: 0.628',
 'ROC AUC on train: 0.679',
 'Accuracy on valid: 0.630',
 'ROC AUC on valid: 0.694')