In [None]:
%load_ext autoreload

In [None]:
import numpy as np
import statsmodels.api as sm
from statsmodels.genmod.families.links import Link, Log as LogLink
import scipy as sp
import scipy.stats
import matplotlib.pyplot as plt
import matplotlib as mpl
import strainzip as sz
import seaborn as sns

from strainzip import depth_model
import strainzip as sz

import pandas as pd

In [None]:
model = sz.depth_model
seed = 0
alpha = 1e-0  # Small offset for handling 0s in depths
n, m = 3, 4  # In-edges / out-edges
s_samples = 4
sigma = 1e-1  # Scale of the multiplicative noise
depth_multiplier = 1  # Scaling factor for depths
num_excess_paths = 1  # How many extra paths to include beyond correct ones.

np.random.seed(seed)

r_edges, p_paths = (n + m, n * m)
X = sz.deconvolution.design_paths(n, m)[0]
assert X.shape == (r_edges, p_paths)

# Select which pairs of in/out edges are "real" and assign them weights across samples.
active_paths = sz.deconvolution.simulate_active_paths(n, m, excess=num_excess_paths)
active_paths = [i for i, _ in active_paths]
print(active_paths)
beta = np.zeros((p_paths, s_samples))
beta[active_paths, :] = np.random.lognormal(
    mean=-5, sigma=7, size=(len(active_paths), s_samples)
)
beta = beta.round(1)  # Structural zeros


# Simulate the observed depth of each edge.
expect = X @ (beta * depth_multiplier)
log_noise = np.random.normal(loc=0, scale=1, size=expect.shape)
y_obs = expect * np.exp(log_noise * sigma)


print(-model.negloglik(beta, sigma, y_obs, X, alpha=alpha))

# # Simulate a selection of paths during the estimation procedure.
# # Possibly over-specified. (see `num_excess_paths`)
# _active_paths = list(
#     sorted(
#         set(active_paths)
#         | set(
#             np.random.choice(
#                 [p for p in range(p_paths) if p not in active_paths],
#                 replace=False,
#                 size=num_excess_paths,
#             )
#         )
#     )
# )
# X_reduced = X[:, _active_paths]

# # Estimate model parameters
# beta_est, sigma_est, _ = model.fit(y_obs, X_reduced, alpha=alpha)

# # Calculate likelihood
# loglik = -model.negloglik(beta_est, sigma_est, y_obs, X_reduced, alpha=alpha)
# assert np.isfinite(loglik)

# # Estimate standard errors.
# beta_stderr, sigma_stderr = model.estimate_stderr(
#     y_obs, X_reduced, beta_est, sigma_est, alpha=alpha
# )

# # Check model identifiable.
# assert np.isfinite(beta_stderr).all()
# assert np.isfinite(sigma_stderr)

In [None]:
sns.heatmap(pd.DataFrame(beta[active_paths, :], index=active_paths), norm=mpl.colors.SymLogNorm(1, vmin=-5e7, vmax=5e7), yticklabels=1, cmap='coolwarm')

In [None]:
%autoreload

In [None]:
selected_paths, beta_est, beta_stderr, sigma_est, sigma_stderr, inv_hessian, fit, delta_aic = (
    sz.deconvolution.estimate_paths(
        X,
        y_obs,
        model=sz.depth_model,
        forward_stop=0.2,
        backward_stop=0.01,
        verbose=2,
        alpha=alpha,
    )
)
print(set(selected_paths) - set(active_paths), set(selected_paths) & set(active_paths), set(active_paths) - set(selected_paths), )

In [None]:
delta_aic

In [None]:
all_paths = list(sorted(set(selected_paths) | set(active_paths)))

In [None]:
depth_est = pd.DataFrame(beta_est, index=selected_paths).reindex(all_paths, fill_value=0)
sns.heatmap(depth_est, norm=mpl.colors.SymLogNorm(1, vmin=-5e7, vmax=5e7), yticklabels=1, cmap='coolwarm')

In [None]:
depth = pd.DataFrame(beta[active_paths, :], index=active_paths).reindex(all_paths, fill_value=0)
sns.heatmap(depth, norm=mpl.colors.SymLogNorm(1, vmin=-5e7, vmax=5e7), yticklabels=1, cmap='coolwarm')

In [None]:
err = depth_est - depth
sns.heatmap(err, norm=mpl.colors.SymLogNorm(1, vmin=-5e7, vmax=5e7), yticklabels=1, cmap='coolwarm')

In [None]:
err_est = pd.DataFrame(beta_stderr, index=selected_paths).reindex(all_paths, fill_value=0)
sns.heatmap(err_est, norm=mpl.colors.SymLogNorm(1, vmin=-5e7, vmax=5e7), yticklabels=1, cmap='coolwarm')

In [None]:
d = pd.DataFrame(dict(
    depth=depth.stack(),
    depth_est=depth_est.stack(),
    err=err.stack(),
    stderr_est=err_est.stack(),
)).rename_axis(['path', 'sample']).reset_index().assign(
    false_positive=lambda x: x.path.isin(set(selected_paths) - set(active_paths)),
    false_negative=lambda x: x.path.isin(set(active_paths) - set(selected_paths)),
)
xx = np.logspace(-1, 5)

plt.scatter('depth', 'err', data=d, c='false_positive')
plt.plot(xx, xx)
plt.plot(xx, -xx)
plt.xscale('symlog', linthresh=1e-1)
plt.yscale('symlog', linthresh=1e-1)

In [None]:
d = pd.DataFrame(dict(
    depth=depth.stack(),
    depth_est=depth_est.stack(),
    err=err.stack(),
    stderr_est=err_est.stack(),
)).rename_axis(['path', 'sample']).reset_index().assign(
    false_positive=lambda x: x.path.isin(set(selected_paths) - set(active_paths)),
    false_negative=lambda x: x.path.isin(set(active_paths) - set(selected_paths)),
)
xx = np.logspace(-1, 3)

plt.scatter('stderr_est', 'err', data=d)
plt.plot(xx, xx)
plt.plot(xx, -xx)
plt.xscale('symlog', linthresh=1e-1)
plt.yscale('symlog', linthresh=1e-1)