In [1]:
import numpy as np
from scipy.integrate import odeint

In [2]:
def dUdt(U, t, a, b):
        x = U[:len(a)]
        y = U[len(a):]

        dxdt = a*x - x*y
        dydt = b*x*y - y

        return np.concatenate([dxdt, dydt])

In [3]:
a = np.random.uniform(-10, 10, 1000)
b = np.random.uniform(-10, 10, 1000)
x0, y0 = np.array([0.5] * 1000), np.array([0.5] * 1000)
S0 = np.concatenate([x0, y0])
tspan = np.linspace(0, 10, 100)
sol = odeint(dUdt, S0, tspan, args=(a, b))

In [9]:
x_sol = sol[:, :len(a)] # Prey
y_sol = sol[:, len(a):] # Predator

In [121]:
import numpy as np 
from sklearn import metrics
from scipy.stats import rankdata
from scipy.special import kl_div
from scipy.spatial.distance import pdist, squareform, cdist

def wasserstein_distance(simulated_sample: np.ndarray, observed_sample: np.ndarray) -> float:
    # Mean Difference between simulated and observed
    distance = np.mean(np.abs(simulated_sample - observed_sample))
    return distance

def maximum_mean_discrepancy(simulated_sample: np.ndarray, observed_sample: np.ndarray, gamma = 1.0) -> float:
    n = simulated_sample.shape[1]
    distances_XX = np.zeros((n, 100, 100))  # Distances within simulated_sample
    distances_YY = np.zeros((n, 100, 100))  # Distances within observed_sample
    distances_XY = np.zeros((n, 100, 100))  # Distances between simulated_sample and observed_sample

    for i in range(n):
        distances_XX[i] = squareform(pdist(simulated_sample[:, i, np.newaxis], metric='euclidean'))
        distances_YY[i] = squareform(pdist(observed_sample[:, i, np.newaxis], metric='euclidean'))
        distances_XY[i] = cdist(simulated_sample[:, i, np.newaxis], observed_sample[:, i, np.newaxis], metric='euclidean')

    # Gaussian kernel
    KXX = np.exp(-distances_XX ** 2 / (2 * gamma ** 2))
    KYY = np.exp(-distances_YY ** 2 / (2 * gamma ** 2))
    KXY = np.exp(-distances_XY ** 2 / (2 * gamma ** 2))

    mean_KXX = np.mean(KXX, axis=(1, 2))  # Mean of kernel within simulated_sample
    mean_KYY = np.mean(KYY, axis=(1, 2))  # Mean of kernel within observed_sample
    mean_KXY = np.mean(KXY, axis=(1, 2))  # Mean of kernel between simulated_sample and observed_sample

    # MMD vectorized for each column
    mmd_values = mean_KXX + mean_KYY - 2 * mean_KXY

    return mmd_values

def cramer_von_mises(simulated_sample: np.ndarray, observed_sample: np.ndarray) -> float:
    if len(simulated_sample) != len(observed_sample):
        return "Size of samples not equal."
    
    nrow = len(simulated_sample) 
    ncol = len(simulated_sample[0]) 
    combined = np.concatenate((simulated_sample, observed_sample))
    # Find corresponding ranks in h associated with simulated/observed
    combined_rank = rankdata(combined, axis=0)
    simulated_rank = np.sort(combined_rank[:nrow], axis=0)
    observed_rank = np.sort(combined_rank[nrow:], axis=0)

    # Calculate distance
    idx = np.tile(np.arange(1, nrow+1), (ncol, 1)).T
    observed_sum = np.sum((observed_rank - idx)**2, axis=0)
    simulated_sum = np.sum((simulated_rank - idx)**2, axis=0)
    rank_sum = nrow * (observed_sum + simulated_sum)
    distance = rank_sum / (2*nrow**3) - (4*nrow**2 - 1)/(12*nrow)
    return distance

def energy_dist(simulated_sample: np.ndarray, observed_sample: np.ndarray) -> float:
    n = simulated_sample.shape[1]
    distances_XX = np.zeros((n, 100, 100))  # Distances within array1
    distances_YY = np.zeros((n, 100, 100))  # Distances within array2
    distances_XY = np.zeros((n, 100, 100))  # Distances between array1 and array2

    for i in range(n):
        distances_XX[i] = squareform(pdist(simulated_sample[:, i, np.newaxis], metric='euclidean'))
        distances_YY[i] = squareform(pdist(observed_sample[:, i, np.newaxis], metric='euclidean'))
        distances_XY[i] = cdist(simulated_sample[:, i, np.newaxis], observed_sample[:, i, np.newaxis], metric='euclidean')

    mean_dist_XY = np.mean(distances_XY, axis=(1, 2))  # Mean distance between columns
    mean_dist_XX = np.mean(distances_XX, axis=(1, 2))  # Mean distance within array1
    mean_dist_YY = np.mean(distances_YY, axis=(1, 2))  # Mean distance within array2

    # Calculate the energy distances for each column in a vectorized way
    energy_distances = 2 * mean_dist_XY - mean_dist_XX - mean_dist_YY

    # Output the energy distances for each column
    return energy_distances

def kullback_leibler_divergence(simulated_sample: np.ndarray, observed_sample: np.ndarray) -> float:
    # Sum of the divergence of each point
    return sum(kl_div(simulated_sample, observed_sample))

In [7]:
observed = odeint(dUdt, (0.5, 0.5), tspan, args=([1], [1]))
x_observed = observed[:, 0]
x_obs_mod = np.tile(x_observed, (1000, 1)).T
y_observed = observed[:, 1]
y_obs_mod = np.tile(y_observed, (1000, 1)).T

In [122]:
temp = cramer_von_mises(x_sol, x_obs_mod)
temp

array([15.776975, 16.248175, 16.476675, 16.374375, 16.476675, 16.248175,
       10.525375, 16.476675, 15.526025, 16.348575, 15.200725, 13.400375,
       16.476675,  5.976425, 16.476675, 16.476675,  9.751325, 10.852525,
       16.476675, 14.900225, 15.261225, 16.476675, 16.476675, 16.476675,
       15.639525, 16.476675, 16.476675, 13.356125, 16.476675, 12.940625,
       14.086925, 15.567725, 12.811375, 16.476675, 14.174575, 15.789425,
       16.374375, 16.410175, 15.874725, 13.129325, 16.374375, 16.476675,
       16.476675, 16.476675, 16.476675, 13.435025, 14.885925, 15.886425,
       16.476675,  8.994425, 16.476675, 16.230675, 16.476675, 15.509325,
       16.438075, 16.476675, 16.476675, 10.479575, 15.554225, 16.476675,
       16.476675, 12.494625, 16.476675, 16.476675,  9.598225, 16.476675,
       16.476675, 16.476675,  0.386075, 14.900225, 16.392075, 16.476675,
       16.476675, 12.399925, 15.894075, 15.892425, 16.009975, 16.476675,
       15.863425, 16.476675, 16.476675, 16.476675, 

In [115]:
from scipy.stats import rankdata
tempx = np.array([[0.5, 0.5, 0.5, 0.5], [1, 0.7, 0.55, 0.56], [2, 0.9, 0.6, 0.61]])
tempy = np.array([[0.5, 0.5, 0.5, 0.5], [2, 2, 2, 2], [8, 8, 8, 8]])

# combine simulated and observed
tempcomb = np.concatenate((tempx, tempy))

# Find corresponding ranks in h associated with simulated/observed
combrank = rankdata(tempcomb, axis=0)
simrank = combrank[:len(tempx)]
obsrank = combrank[len(tempx):]

# Calculate distance
obs_idx = np.tile(np.arange(1, len(tempy)+1), (len(tempy[0]), 1)).T
sim_idx = np.tile(np.arange(1, len(tempx)+1), (len(tempy[0]), 1)).T
obs_sum = np.sum((obsrank - obs_idx)**2, axis=0)
sim_sum = np.sum((simrank - sim_idx)**2, axis=0)
rank_sum = len(tempx)*(obs_sum + sim_sum)
distance = rank_sum / 2*(len(tempx) ** 4) - (4*(len(tempx) **2)-1)/(12*len(tempx))
np.sort(simrank, axis=0)

array([[1.5, 1.5, 1.5, 1.5],
       [3. , 3. , 3. , 3. ],
       [4.5, 4. , 4. , 4. ]])

In [75]:
obs_sum

array([16.25, 19.  , 24.25])