In [2]:
import numpy as np
from scipy.optimize import root


In [3]:

def logistic(z):
    return 1.0 / (1.0 + np.exp(-z))

def softmax(logits):  
    # logits is shape (S,) -> returns shape (S,)
    # stable softmax possible, but for small S we can do direct
    ex = np.exp(logits - np.max(logits))
    return ex / np.sum(ex)


In [4]:

def solve_dual_unconstrained(x, y, v, tol=1e-8, maxiter=1000):
    """
    Solve the dual (unconstrained) system for a maximum-entropy 
    binary choice + discrete noise model.
    
    x:  (n x K) matrix of covariates
    y:  (n,)    binary labels in {0,1}
    v:  (S,)    discrete noise support
    
    Returns:
        beta_hat: shape (K,) the solution to F(beta)=0
        p_hat:    shape (n,) p_i = logistic(x_i^T beta_hat)
        w_hat:    shape (n,S) w_{i,s} = softmax( x_i^T beta_hat * v_s ), etc.
    """
    n, K = x.shape
    S = len(v)
    xy = x * y[:,None]  # shape (n,K), just for convenience
    sum_xy = np.sum(xy, axis=0)  # shape (K,)

    def F_and_Jacobian(beta):
        """
        Returns F(beta) = 0 (the residuals),
        and optionally the Jacobian dF/dBeta (K x K).
        """
        beta = beta.reshape(-1)  # ensure it's 1D
        # We'll accumulate predictions to build F
        # F_k(beta) = sum_i x_{i,k} y_i
        #           - [ sum_i x_{i,k} p_i(beta) + sum_i sum_s x_{i,k} v_s w_{i,s}(beta) ]
        
        # 1) Compute p_i
        xb = x @ beta  # shape (n,)
        p = logistic(xb)  # shape (n,)

        # 2) Compute w_{i,s} via softmax
        #    w_{i,s} = exp( x_{i}^T beta * v_s ) / sum_r exp( x_{i}^T beta * v_r )
        # We do this in a loop or vectorized. For clarity, we do a loop here.
        w = np.zeros((n, S))
        for i in range(n):
            logits_i = xb[i] * v
            w[i, :] = softmax(logits_i)

        # 3) Build F_k
        #    sum_i x_{i,k} y_i  - [ sum_i x_{i,k} p_i  + sum_i sum_s x_{i,k} v_s w_{i,s} ]
        # We can do sum_i x_{i,k} p_i = x[:,k] dot p
        # and sum_i sum_s x_{i,k} v_s w_{i,s} = x[:,k] dot [ sum_s v_s w_{i,s} ]
        # We'll do it in a vector form
        sum_xp = x.T @ p  # shape (K,)
        sum_xvw = np.zeros(K)
        for s_idx in range(S):
            sum_xvw += x.T @ (v[s_idx] * w[:, s_idx])  # shape (K,)

        F = sum_xy - (sum_xp + sum_xvw)  # shape (K,)

        # For a proper Newton method, we’d also compute the Jacobian = dF/dBeta.
        # But implementing the full derivative of logistic + softmax in code can be a bit tedious.
        # Let's do a simpler approach: let root() do a Jacobian-free method (like Broyden).
        return F, None

    # We'll solve F(beta)=0 with a zero initial guess or random guess
    beta0 = np.zeros(K)  # or e.g. np.random.randn(K)
    sol = root(lambda b: F_and_Jacobian(b)[0], beta0, method='hybr', tol=tol, options={'maxfev': maxiter})
    
    beta_hat = sol.x
    # Now that we have beta, compute final p, w
    xb = x @ beta_hat
    p_hat = logistic(xb)
    w_hat = np.zeros((n, S))
    for i in range(n):
        w_hat[i,:] = softmax(xb[i] * v)
    
    return beta_hat, p_hat, w_hat, sol


In [8]:
np.random.seed(42)
n = 10000
K = 2  # 2 covariates
x_data = np.random.randn(n, K)
# "True" process
true_beta = np.array([1.5, -2.0])
offset = 0.5
# logistic part
linear_part = offset + x_data @ true_beta
prob_true = 1.0 / (1.0 + np.exp(-linear_part))
y_data = (np.random.rand(n) < prob_true).astype(float)
# noise support
v_data = np.array([-1.0, 0.0, 1.0])


Converged: True The solution converged.
Beta_hat: [ 0.22274545 -0.29993276]
p_hat (first 5): [0.53795455 0.42249564 0.50451706 0.53035852 0.43356888]
w_hat (first 5 rows):
 [[0.28410231 0.33077727 0.38512042]
 [0.44114846 0.3227392  0.23611234]
 [0.32732888 0.33329706 0.33937406]
 [0.29372321 0.33169687 0.37457991]
 [0.42528983 0.32553373 0.24917644]]


In [13]:

beta_hat, p_hat, w_hat, sol = solve_dual_unconstrained(x_data, y_data, v_data)

print("Converged:", sol.success, sol.message)
print("Beta_hat:", np.round(beta_hat, 2))
print("p_hat (first 5):", p_hat[:5])
print("w_hat (first 5 rows):\n", w_hat[:5])


Converged: True The solution converged.
Beta_hat: [ 0.22 -0.3 ]
p_hat (first 5): [0.53795455 0.42249564 0.50451706 0.53035852 0.43356888]
w_hat (first 5 rows):
 [[0.28410231 0.33077727 0.38512042]
 [0.44114846 0.3227392  0.23611234]
 [0.32732888 0.33329706 0.33937406]
 [0.29372321 0.33169687 0.37457991]
 [0.42528983 0.32553373 0.24917644]]


In [11]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
fit = model.fit(x_data, y_data)
np.round(fit.coef_, 2)

array([[ 1.51, -2.04]])