In [None]:
import nb_utils
import numpy as np
from gtda.diagrams import PersistenceEntropy, Scaler
from gtda.homology import VietorisRipsPersistence
from gtda.metaestimators import CollectionTransformer
from gtda.pipeline import Pipeline
from gtda.time_series import TakensEmbedding
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split
from src import acf_utils


def get_accuracy(model, X_train, X_valid, y_train, y_valid):
    """returns the 1) classification accuracy and 2) area under ROC"""
    return (
        f"Accuracy on train: {accuracy_score(model.predict(X_train), y_train):.3f}",
        f"ROC AUC on train: {roc_auc_score(y_train, model.predict_proba(X_train)[:, 1]):.3f}",
        f"Accuracy on valid: {accuracy_score(model.predict(X_valid), y_valid):.3f}",
        f"ROC AUC on valid: {roc_auc_score(y_valid, model.predict_proba(X_valid)[:, 1]):.3f}",
    )

### sublevel set filtration

In [None]:
def sim_xy(idx, n_lags, pe):
    """returns persistence entropy from sublevel set filtration of acf"""
    y = idx % 2
    phis = nb_utils.gen_ar2_coeffs(oscillatory=y, phi1="postive", random_seed=idx)
    x_acf = acf_utils.ar_to_acf(phis, n_lags=n_lags)  # theoretical acf
    x_acf_ssf = nb_utils.sublevel_set_filtration(x_acf)  # sublevel-set filtration -> PH
    x_acf_pe = pe.fit_transform(x_acf_ssf)  #
    return x_acf_pe.squeeze(), y


n_lags, n_repeats = 50, 5000  # simulation settings
pe = PersistenceEntropy(normalize=True)
model = LogisticRegression()  # classification settings

Xs, ys = zip(*[(sim_xy(idx, n_lags, pe)) for idx in range(n_repeats)])  # sim Xs, ys
Xs, ys = np.array(Xs).reshape(-1, 1), np.array(ys)

# fit classifier
X_train, X_valid, y_train, y_valid = train_test_split(Xs, ys, test_size=0.1, random_state=0)
model.fit(X_train, y_train)
get_accuracy(model, X_train, X_valid, y_train, y_valid)

### takens embedding

In [None]:
def sim_xy(idx, n_lags):
    """return persistence entropy from takens embedding of acf"""
    y = idx % 2
    phis = nb_utils.gen_ar2_coeffs(oscillatory=y, phi1="postive", random_seed=idx)
    x_acf = acf_utils.ar_to_acf(phis, n_lags=n_lags)  # theoretical acf
    return x_acf, y


n_jobs = -1
emb = TakensEmbedding(time_delay=1, dimension=2)
pca = CollectionTransformer(PCA(n_components=2), n_jobs=n_jobs)
vr = VietorisRipsPersistence(homology_dimensions=(0, 1), n_jobs=n_jobs)
scaler = Scaler(metric="bottleneck")
pe = PersistenceEntropy(normalize=True)

pipeline = Pipeline(
    [
        ("emb", emb),
        ("pca", pca),
        ("vr", vr),
        ("scaler", scaler),
        ("pe", pe),
    ]
)


Xs_acf, ys = zip(*[(sim_xy(idx, n_lags)) for idx in range(n_repeats)])  # sim Xs, ys
Xs_acf, ys = np.array(Xs_acf), np.array(ys)
Xs = pipeline.fit_transform(Xs_acf)

# fit classifier
X_train, X_valid, y_train, y_valid = train_test_split(Xs, ys, test_size=0.1, random_state=0)
model.fit(X_train, y_train)
get_accuracy(model, X_train, X_valid, y_train, y_valid)