In [None]:
import numpy as np
import statsmodels.api as sm
from statsmodels.genmod.families.links import Link

class Log1pLink(Link):
    """
    Custom link function for log(1 + p).
    """    
    def __init__(self):
        super(Log1pLink, self).__init__()
    
    def __call__(self, p):
        # The link function itself
        return np.log(1 + p)

    def inverse(self, z):
        # The inverse of the link function
        return np.exp(z) - 1

    def deriv(self, p):
        # Derivative of the link function
        return 1 / (1 + p)

    def inverse_deriv(self, z):
        # Derivative of the inverse of the link function
        return np.exp(z)

    linkclass = property(lambda self: type(self).__name__)


class Gaussian2(sm.families.Gaussian):
    links = [Log1pLink]
    safe_links = links

LOG1PLINK = Log1pLink()

In [None]:
print(repr(beta))

In [None]:
print(repr(np.array([
    [1, 0, 1, 0, 0],
    [1, 0, 0, 1, 0],
    [1, 0, 0, 0, 1],
    [0, 1, 1, 0, 0],
    [0, 1, 0, 1, 0],
    [0, 1, 0, 0, 1],
]).T))

In [None]:
beta.T

In [None]:
X = np.array([
    [1, 1, 1, 0, 0, 0],
    [0, 0, 0, 1, 1, 1],
    [1, 0, 0, 1, 0, 0],
    [0, 1, 0, 0, 1, 0],
    [0, 0, 1, 0, 0, 1],
])

# Latent path depths
beta = np.array([
    [1. , 1. , 0. ],
    [0.5, 1. , 0. ],
    [0. , 0. , 0. ],
    [0. , 0. , 0. ],
    [0.5, 0. , 0. ],
    [0. , 0. , 1. ],
])

n, m = 2, 3


s_samples = beta.shape[1]
p_paths = X.shape[1]
r_edges = X.shape[0]

assert X.shape == (n+m, n*m)
assert beta.shape == (p_paths, s_samples)

y = LOG1PLINK.inverse(LOG1PLINK(X @ beta) + np.random.normal(0, 1))
y_stacked = y.reshape((r_edges * s_samples, 1))

y_stacked.shape
X_stacked = np.block([[X] * s_samples] * s_samples)

assert X_stacked.shape[0] == y_stacked.shape[0]

# beta_stacked = beta.reshape((p_paths * s_samples, 1))

# print(X_stacked.shape)
# print(beta_stacked.shape)
# print(y_stacked.shape)

In [None]:
model = sm.GLM(LOG1PLINK(y_stacked), X_stacked, family=Gaussian2(link=LOG1PLINK))
results = model.fit()

print(results.summary())

In [None]:
active_paths = [0, 1, 2, 4, 5]
reduced_X = X[:, active_paths]
reduced_X_stacked = np.block([[reduced_X] * s_samples] * s_samples)

model = sm.GLM(LOG1PLINK(y_stacked), reduced_X_stacked, family=Gaussian2(link=Log1pLink()))
results = model.fit()

print(results.summary())

In [None]:
active_paths = [1, 2, 4, 5]
reduced_X = X[:, active_paths]
reduced_X_stacked = np.block([[reduced_X] * s_samples] * s_samples)

model = sm.GLM(LOG1PLINK(y_stacked), reduced_X_stacked, family=Gaussian2(link=Log1pLink()))
results = model.fit()

print(results.summary())

In [None]:
active_paths = [2, 4, 5]
reduced_X = X[:, active_paths]
reduced_X_stacked = np.block([[reduced_X] * s_samples] * s_samples)

model = sm.GLM(LOG1PLINK(y_stacked), reduced_X_stacked, family=Gaussian2(link=Log1pLink()))
results = model.fit()

print(results.summary())

In [None]:
active_paths = [4, 5]
reduced_X = X[:, active_paths]
reduced_X_stacked = np.block([[reduced_X] * s_samples] * s_samples)

model = sm.GLM(LOG1PLINK(y_stacked), reduced_X_stacked, family=Gaussian2(link=Log1pLink()))
results = model.fit()

print(results.summary())