In [2]:
import matplotlib.pyplot as plt
import numpy as np
import json

from egci_bioacoustic_shifts import plot_EGCI


In [3]:
with open("e1_results.json", "r") as f:
    e1_results = json.load(f)


In [19]:
from egci_bioacoustic_shifts.utils import measure_distrbution_metrics
import pandas as pd

regions = ["HSN", "PER", "UHH", "SNE", "POW", "NES"]
num_samples = 2000
num_trials = 1000


for region in regions:
    focal_data = e1_results[region]["data"]["focal"]
    soundscape_data = e1_results[region]["data"]["soundscape"]

    focal_data = np.array(focal_data[:2])
    soundscape_data = np.array(soundscape_data[:2])

    focal_data = focal_data[:min(focal_data.shape[-1], soundscape_data.shape[-1])]
    soundscape_data = soundscape_data[:min(focal_data.shape[-1], soundscape_data.shape[-1])]

    test_df = pd.DataFrame(
        {
            "h": np.concat((focal_data[0], soundscape_data[0])),
            "c": np.concat((focal_data[1], soundscape_data[1])),
            "label": ["focal"] * focal_data.shape[-1] + ["soundscape"] * soundscape_data.shape[-1]
        }
    )

    sample_stats = measure_distrbution_metrics(
        test_df[test_df["label"] == "focal"][["h", "c"]],
        test_df[test_df["label"] == "soundscape"][["h", "c"]],
        emd=False
    )

    KL_d_XC_S = []
    KL_d_S_XC = []

    for i in range(num_trials):
        test_df["label_shuffled"] = test_df["label"].sample(frac=1, replace=True).reset_index(drop=True)
        out = measure_distrbution_metrics(
            test_df[test_df["label_shuffled"] == "focal"][["h", "c"]],
            test_df[test_df["label_shuffled"] == "soundscape"][["h", "c"]],
            emd=False
        )
        KL_d_XC_S.append(out["Kullback-Leibler divergence Xeno-canto to Soundscapes"])
        KL_d_S_XC.append(out["Kullback-Leibler divergence Soundscapes to Xeno-canto"])

    print(
        region,
        (np.array(KL_d_XC_S) > sample_stats["Kullback-Leibler divergence Xeno-canto to Soundscapes"]).mean(),
        (np.array(KL_d_S_XC) > sample_stats["Kullback-Leibler divergence Soundscapes to Xeno-canto"]).mean(),
    )

HSN 0.052 0.0
PER 0.0 0.996
UHH 0.544 0.0
SNE 0.147 0.66
POW 0.0 0.962
NES 0.037 0.998


In [14]:
test_df.shape

(4000, 3)