In [1]:
import numpy as np
from scipy.stats import entropy
from scipy.optimize import minimize

In [2]:
def gme_linear_regression(X, y, z, v):
    """
    X: covariates
    y: outcomes
    z: beta support
    v: error support
    """
 
    def objective(p):
        return -entropy(p) # return the negative entropy, which will be minimized
 
    def constraints(p, X, y, z, v, n, k, m):
        """constraints for the minimization process
        p: probabilities whose entropy to maximize (a combination of p's associated with betas and w's associated with errors)
        X: obsreved covariates
        y: observed outcomes
        z: support space for beta's
        v: support space for errors
        n:: number of observations
        k: number of beta's
        m: size of error support space
        """
        P = p[:k*m].reshape((k, m))
        W = p[k*m:].reshape((n, m))
        beta_hat = (P * z).sum(axis=1)
        error_hat = np.dot(W, v)
        return np.dot(X, beta_hat) + error_hat - y
       
    def adding_up_constraints(p, n, k, m):
        """
        p: probabilities whose entropy to maximize (a combination of p's associated with betas and w's associated with errors)
        n: number of observations
        k: number of parameters
        m: size of the support space
        """
        P = p[:k*m].reshape((k, m))
        W = p[k*m:].reshape((n, m))
        cons = []
        for i in range(k):
            cons.append({
                'type': 'eq',
                'fun': lambda p, i=i: np.sum(p[i*m:(i+1)*m]) - 1
            })
        for t in range(n):
            cons.append({
                'type': 'eq',
                'fun': lambda p, t=t: np.sum(p[k*m + t*m:k*m + (t+1)*m]) - 1
            })
        return cons
       
    
    n, k = X.shape
    m = len(z[0]) # size of the support spaces (assumes same size for error and beta support)
    p_init = np.ones(k * m + n * m)/m # initial guess: uniform
 
    # Define constrains
    cons = [{'type': 'eq', 'fun': lambda p: constraints(p, X, y, z, v, n, k, m)}]
    cons += adding_up_constraints(p_init, n, k, m)
 
    # Perform optimization
    result = minimize(objective, p_init, constraints = cons, method = 'SLSQP')
 
    if result.success:
        p_opt = result.x
        P_opt = p_opt[:k*m].reshape((k, m))
        W_opt = p_opt[k*m:].reshape((n, m))
        return P_opt, W_opt, result
    else:
        raise ValueError("Optimization Failed")

In [3]:
def simulate_data(n=30, intercept=1.0, slope=2.0, noise_std=0.5, seed=42):
    """
    Generate synthetic data for a linear model y = intercept + slope*x + noise.
    Returns:
        X: shape (n, 2)  with first column = 1.0 (intercept), second column = x
        y: shape (n,)
    """
    np.random.seed(seed)
    x = np.random.randn(n)
    y = intercept + slope*x + noise_std*np.random.randn(n)
    # Add intercept column of ones
    X = np.column_stack([np.ones(n), x])
    return X, y

In [4]:
X, y = simulate_data()

In [9]:
np.linalg.lstsq(X, y, rcond = None)[0]

array([0.94903764, 2.05112428])

In [10]:
z = np.array([[-10, 0, 10], [-10, 0, 10]])
v = np.array([-3*np.std(y), 0, 3*np.std(y)])
P_opt, W_opt, result = gme_linear_regression(X, y, z, v)

In [14]:
(P_opt * z).sum(axis =1)

array([0.93343736, 2.02094526])

In [16]:
W_opt.sum(axis = 1)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

array([[ 4, 10, 18],
       [ 7, 16, 27]])