# Topological Divergence Results

Load, visualise, and summarise the topological divergences computed for the test systems.

In [2]:
import os
import pickle
from trajectories import generate_trajectories
from plots_and_correlates import plot_lce_estimate_and_correlation
from lca_supervised_learning import score_classification
from lca_supervised_learning import score_regression
from lca_supervised_learning import score_regression_pos
from lca_supervised_learning import score_regression_KNN
from lca_supervised_learning import score_regression_pos_KNN


In [3]:
SYS_NAMES = ["henon", "ikeda", "logistic", "tinkerbell"]
# SYS_NAMES = ["logistic" "henon"]
DIV_TYPES = ["dmt", "mt", "hvg", "ph"]
# DIV_TYPES = ["dmt", "mt"]
RES_TYPES = ["correlations", "divergences"]

In [4]:
SEED = 42
SAMPLES = 500
LENGTH = 500
experimental_data = generate_trajectories(
    RANDOM_SEED=SEED, TS_LENGTH=LENGTH, CONTROL_PARAM_SAMPLES=SAMPLES
)

Experiment config -- SEED:42, LENGTH:500, SAMPLES:500


In [5]:
logistic_trajectories = experimental_data["logistic"]["trajectories"]
logistic_lces = experimental_data["logistic"]["lces"]
logistic_control_params = experimental_data["logistic"]["sys_params"]

In [6]:
henon_trajectories = experimental_data["henon"]["trajectories"]
henon_lces = experimental_data["henon"]["lces"]
henon_control_params = experimental_data["henon"]["sys_params"]

In [7]:
ikeda_trajectories = experimental_data["ikeda"]["trajectories"]
ikeda_lces = experimental_data["ikeda"]["lces"]
ikeda_control_params = experimental_data["ikeda"]["sys_params"]

In [8]:
tinkerbell_trajectories = experimental_data["tinkerbell"]["trajectories"]
tinkerbell_lces = experimental_data["tinkerbell"]["lces"]
tinkerbell_control_params = experimental_data["tinkerbell"]["sys_params"]

In [9]:
system_data = {
    "logistic": {
        "trajectories": logistic_trajectories,
        "lces": logistic_lces,
        "control_params": logistic_control_params,
        "param_name": "r",
    },
    "henon": {
        "trajectories": henon_trajectories,
        "lces": henon_lces,
        "control_params": henon_control_params,
        "param_name": "a",
    },
    "ikeda": {
        "trajectories": ikeda_trajectories,
        "lces": ikeda_lces,
        "control_params": ikeda_control_params,
        "param_name": "a",
    },
    "tinkerbell": {
        "trajectories": tinkerbell_trajectories,
        "lces": tinkerbell_lces,
        "control_params": tinkerbell_control_params,
        "param_name": "a",
    },
}

In [10]:
def find_first_matching_file(path, prefix):
    for filename in os.listdir(path):
        if filename.startswith(prefix):
            return os.path.join(path, filename)


In [11]:
def load_results_file(sys_name, div_name, res_type="correlations", seed=42, length=1000, samples=500):

    assert sys_name in SYS_NAMES, "unrecognised system name"
    assert div_name in DIV_TYPES, "unrecognised divergence name"
    assert res_type in RES_TYPES, "unrecognised results type"

    results_dir = "./outputs/data/divergence_results"
    filename_prefix = f"SEED_{seed}__LENGTH_{length}__SAMPLES_{samples}__{sys_name}_{div_name}_{res_type}__"
    filename = find_first_matching_file(results_dir, filename_prefix)

    with open(filename, 'rb') as results_file:
        data = pickle.load(results_file)

    return data

## Visualise divergences and get Spearman and supervised learning results

In [12]:
DIVS_TO_REPORT = [
    "cophenetic",
    "cophenetic_reverse",
    # "bottleneck",
    # "wasserstein",
    # "cophenetic_linf",
    # "cophenetic_reverse_linf",
]

# MASKS = [0.1, 0.2, 0.3, 0.4, 0.5]
# DIVS_TO_REPORT.extend([f"interleaving_{mask}_None" for mask in MASKS])
# DIVS_TO_REPORT.extend([f"interleaving_edge_{mask}_None" for mask in MASKS])
# DIVS_TO_REPORT.extend([f"interleaving_length_{mask}_None" for mask in MASKS])


def generate_correlations_and_scores_from_results_files(
    system_data, SYS_NAMES=SYS_NAMES, DIV_TYPES=DIV_TYPES
):
    correlations_and_scores = {}
    res_type = "divergences"
    for sys_name in SYS_NAMES:
        sys_data = system_data[sys_name]
        param_name = sys_data["param_name"]
        lces_actual = sys_data["lces"]
        control_params = sys_data["control_params"]
        for div_type in DIV_TYPES:
            divergence_results = load_results_file(
                sys_name,
                div_type,
                res_type=res_type,
                seed=SEED,
                length=LENGTH,
                samples=SAMPLES,
            )
            divergence_data = divergence_results["data"]
            for div_name, divergences in divergence_data.items():
                # if div_name not in DIVS_TO_REPORT:
                #     continue
                div_name = f"{div_name} divergence"
                correlations_and_scores[
                    div_name, sys_name, LENGTH
                ] = plot_lce_estimate_and_correlation(
                    div_name,
                    sys_name,
                    param_name,
                    divergences,
                    lces_actual,
                    control_params,
                    LENGTH,
                    plot_actual=True,
                    save_plot=True,
                    sharey=False,
                    logy=False,
                    twoy=True,
                    show_plot=False,
                )
                correlations_and_scores[div_name, sys_name, LENGTH] |= {
                    "classification_f1": score_classification(
                        divergences.reshape(-1, 1), lces_actual
                    ),
                    "regression_neg_mean_absolute": score_regression(
                        divergences.reshape(-1, 1), lces_actual
                    ),
                    "pos_regression_neg_mean_absolute": score_regression_pos(
                        divergences.reshape(-1, 1), lces_actual
                    ),
                    "regression_neg_mean_absolute_poly": score_regression_KNN(
                        divergences.reshape(-1, 1), lces_actual
                    ),
                    "pos_regression_neg_mean_absolute_poly": score_regression_pos_KNN(
                        divergences.reshape(-1, 1), lces_actual
                    ),
                }
    return correlations_and_scores


In [13]:
all_results_divergences = generate_correlations_and_scores_from_results_files(
    system_data, SYS_NAMES=SYS_NAMES, DIV_TYPES=DIV_TYPES
)

In [14]:
# with open(f"outputs/data/LINF_results_{LENGTH}.pkl", "wb") as file:
#     pickle.dump(all_results_divergences, file)


for result in all_results_divergences:
    if 'cophenetic' not in result[0]:
        continue
    print(result, all_results_divergences[result])

('cophenetic divergence', 'henon', 500) {'spearmanr': SpearmanrResult(correlation=0.7364815379261517, pvalue=1.3964935941194425e-86), 'pos_spearmanr': SpearmanrResult(correlation=0.7467992639647886, pvalue=3.699273942702137e-43), 'pearsonr': (0.749995101739337, 1.9174688020558558e-91), 'pos_pearsonr': (0.7332609456457777, 6.572077779240664e-41), 'classification_f1': 0.7701461158411967, 'regression_neg_mean_absolute': -0.08808694157150157, 'pos_regression_neg_mean_absolute': -0.0650528426857522, 'regression_neg_mean_absolute_poly': -0.08548396353638935, 'pos_regression_neg_mean_absolute_poly': -0.05996216467716961}
('cophenetic_reverse divergence', 'henon', 500) {'spearmanr': SpearmanrResult(correlation=0.7987872271489085, pvalue=5.628398730614848e-112), 'pos_spearmanr': SpearmanrResult(correlation=0.8319056469435119, pvalue=1.5924061305414393e-61), 'pearsonr': (0.7754331494635098, 1.6781552884875256e-101), 'pos_pearsonr': (0.7699282826728137, 2.3658523585789734e-47), 'classification_f1