In [27]:
# packages

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import hsv_to_rgb
import pandas as pd
import os
from itertools import combinations
import h5py

import sys
sys.path.append("../src")

from analysis import *
from inference import *

In [28]:
import re

def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] 
    return sorted(l, key=alphanum_key)

In [51]:
datapath = "../experiment_outputs/test_perturb_env_noise0.1"
log = h5py.File(f"{datapath}/data_generation_log.h5", "r")

print(f"n_species = {log.attrs['n_species']}")
print(f"avg_samp_dt = {log.attrs['avg_samp_dt']}")
print(f"env_noise = {log.attrs['env_noise']}")
print(f"meas_noise_list = {log.attrs['meas_noise_list']}")
print(f"n_params_seeds = {log.attrs['n_params_seeds']}")

n_species = [3 5]
avg_samp_dt = [3.  1.5]
env_noise = 0.1
meas_noise_list = [0.1]
n_params_seeds = 10


In [52]:
n_sp = 3
avg_samp_dt = 3.
env_noise = log.attrs["env_noise"]
meas_noise = 0.1
n_params_seeds = log.attrs['n_params_seeds']

In [53]:
datafiles = []
metadatafiles = []

params_seeds = [i.split("param_seed")[1] for i in os.listdir(f"{datapath}/{n_sp}_sp")]
for p in params_seeds:
    datafiles.append(f"{datapath}/{n_sp}_sp/param_seed{p}/meas_noise{meas_noise}/t_samp{avg_samp_dt}/dataset{n_sp}_sp{p}_env_noise{env_noise}.csv")
    metadatafiles.append(f"{datapath}/{n_sp}_sp/param_seed{p}/meas_noise{meas_noise}/t_samp{avg_samp_dt}/metadata{n_sp}_sp{p}_env_noise{env_noise}.txt")

In [55]:
df = pd.read_csv(datafiles[-1], index_col=0)
metatext = open(metadatafiles[-1], "r").read().split("\n")

In [57]:
metadict = get_meta(metatext)

print(f"Numbers of sampling points: {metadict['n_tpoints']}")
print(f"Average sampling intervals: {metadict['avg_dt'].round(3)}")
print(f"Number of initial conditions: {metadict['n_init_cond']}")
print(f"Number of repetitions: {metadict['repetitions']}")
print(f"Environmental noise: {metadict['env_noise']}")
print(f"Amounts of measurement noise: {metadict['meas_noise']}")

Numbers of sampling points: [11 21]
Average sampling intervals: [3.  1.5]
Number of initial conditions: 5
Number of repetitions: 1
Environmental noise: 0.1
Amounts of measurement noise: [0.1]


In [58]:
def calculate_es_score(true_aij, inferred_aij) -> float:
    """GRANT'S edited version to calculate ED score

    Calculate the ecological direction (EDₙ) score (n := number of species in ecosystem).

    Parameters
    ===============
    truth: ndarray(axis0=species_names, axis1=species_names), the ecosystem coefficient matrix used to generate data
    inferred: ndarray(axis0=species_names, axis1=species_names), the inferred ecosystem coefficient matrix
    Returns
    ===============
    ES_score: float
    """

    truth = pd.DataFrame(true_aij).copy()
    inferred = pd.DataFrame(inferred_aij).copy()

    # consider inferred coefficients
    mask = inferred != 0

    # compare sign: agreement when == -2 or +2, disagreement when 0
    nonzero_sign = np.sign(inferred)[mask] + np.sign(truth)[mask]
    corr_sign = (np.abs(nonzero_sign) == 2).sum().sum()
    opposite_sign = (np.abs(nonzero_sign) == 0).sum().sum()

    # count incorrect non-zero coefficients
    wrong_nz = (truth[mask] == 0).sum().sum()

    # combine
    unscaled_score = corr_sign - opposite_sign

    # scale by theoretical extrema
    truth_nz_counts = (truth != 0).sum().sum()
    truth_z_counts = len(truth.index) ** 2 - truth_nz_counts
    theoretical_min = -truth_nz_counts
    theoretical_max = truth_nz_counts

    ES_score = (unscaled_score - theoretical_min) / (theoretical_max - theoretical_min)

    return ES_score

In [68]:
# Infer and score

param_columns = [f"r{i}" for i in range(1, n_sp+1)] + \
                [f"A{i},{j}" for i in range(1, n_sp+1) for j in range(1, n_sp+1)]

cols = ["n_init_cond"] + list(df.columns[1:4]) + param_columns + ["MSPD", "CSR", "ES"]

for avg_samp_dt in log.attrs["avg_samp_dt"]:
    for meas_noise in log.attrs["meas_noise_list"]:
        datafiles = []

        params_seeds = [i.split("param_seed")[1] for i in os.listdir(f"{datapath}/{n_sp}_sp")]
        for p in params_seeds:
            datafiles.append(f"{datapath}/{n_sp}_sp/param_seed{p}/meas_noise{meas_noise}/t_samp{avg_samp_dt}/dataset{n_sp}_sp{p}_env_noise{env_noise}.csv")

        for file_idx in range(len(datafiles)):
            datafile = datafiles[file_idx]

            df = pd.read_csv(datafile, index_col=0)
            
            infer_out = pd.DataFrame(columns=cols)

            pd.options.mode.chained_assignment = None

            p = metadict["parameters"]
            r = p[:n_sp]
            A = p[n_sp:].reshape((n_sp,n_sp))

            for i in tqdm(range(len(df.init_cond_idx.unique()))):
                combs = list(combinations(df.init_cond_idx.unique(), i+1))
                np.random.shuffle(combs)
                for comb in combs[:100]:
                    df_comb = df[df.init_cond_idx.isin(comb)]
                    r_est, A_est = fit_ridge_cv(df_comb)
                    p_est = np.concatenate((r_est, A_est.flatten()))
                    MSPD = ((p-p_est)**2).mean()
                    CSR = (np.sign(A_est)==np.sign(A)).mean()
                    ES = calculate_es_score(A, A_est)
                    infer_out.loc[len(infer_out)] = [i+1, comb, avg_samp_dt, meas_noise] + list(p_est) + [MSPD, CSR, ES]

            infer_out.to_csv(datafile.split('dataset')[0]+"/inference"+datafile.split("dataset")[1])

100%|██████████| 5/5 [00:01<00:00,  3.83it/s]
100%|██████████| 5/5 [00:01<00:00,  3.65it/s]
100%|██████████| 5/5 [00:01<00:00,  3.40it/s]
100%|██████████| 5/5 [00:01<00:00,  4.92it/s]
100%|██████████| 5/5 [00:01<00:00,  4.68it/s]
100%|██████████| 5/5 [00:00<00:00,  6.05it/s]
100%|██████████| 5/5 [00:00<00:00,  6.03it/s]
100%|██████████| 5/5 [00:00<00:00,  6.19it/s]
100%|██████████| 5/5 [00:00<00:00,  5.89it/s]
100%|██████████| 5/5 [00:00<00:00,  5.94it/s]
100%|██████████| 5/5 [00:00<00:00,  5.75it/s]
100%|██████████| 5/5 [00:00<00:00,  6.13it/s]
100%|██████████| 5/5 [00:00<00:00,  5.82it/s]
100%|██████████| 5/5 [00:00<00:00,  5.60it/s]
100%|██████████| 5/5 [00:00<00:00,  5.73it/s]
100%|██████████| 5/5 [00:00<00:00,  6.02it/s]
100%|██████████| 5/5 [00:00<00:00,  5.50it/s]
100%|██████████| 5/5 [00:00<00:00,  5.55it/s]
100%|██████████| 5/5 [00:00<00:00,  5.63it/s]
100%|██████████| 5/5 [00:00<00:00,  5.40it/s]
