In [33]:
import cvxpy as cp
import numpy as np

In [55]:
def entropy_model(x, y, v):
    n, k = x.shape
    s = v.shape[0]

    # Define variables
    p = cp.Variable(n)
    w = cp.Variable((n, s))

    # Define the objective function
    entropy = -cp.sum(cp.entr(p) + cp.entr(1 - p)) - cp.sum(cp.entr(w))
    objective = cp.Minimize(entropy)

    # Define the constraints
    constraints = []
    for k_idx in range(k):
        lhs = cp.sum(cp.multiply(x[:, k_idx], y))
        rhs = cp.sum(cp.multiply(x[:, k_idx], p)) \
              + cp.sum(cp.multiply(cp.multiply(x[:, k_idx].reshape(-1,1), v), w))
        constraints.append(lhs == rhs)
    constraints += [p >= 0, p <= 1]
    constraints.append(cp.sum(w, axis=1) == 1)

    # Define the problem
    problem = cp.Problem(objective, constraints)

    # Solve the problem
    problem.solve()

    # Extract results
    p_opt = p.value
    w_opt = w.value
    betas = [-constraint.dual_value for constraint in constraints[:k]]  # Negative of the Lagrange multipliers

    return p_opt, w_opt, betas, problem

In [63]:
from scipy.special import expit
# Set random seed for reproducibility
np.random.seed(42)

# Generate synthetic data
n_samples = 1000
true_betas = np.array([1.5, 10])

# Generate random features
X = np.random.normal(0, 1, (n_samples, len(true_betas)))

# Calculate true probabilities using logistic function
true_probs = expit(X @ true_betas)

# Generate binary outcomes
y = np.random.binomial(1, true_probs)

# Set up the states for maximum entropy
v = np.array([-0.3, 0, 0.3])


In [64]:

# Solve the maximum entropy model
p_opt, w_opt, estimated_betas, res = entropy_model(X, y, v)

# Print results
print("True betas:", true_betas)
print("Estimated betas:", estimated_betas)
print("Mean predicted probability:", np.mean(p_opt))
print("Mean noise", np.mean(w_opt))

True betas: [ 1.5 10. ]
Estimated betas: [-0.2504104657675626, -1.8458237680554712]
Mean predicted probability: 0.5162750811721235
Mean noise 0.3333333330919437


In [58]:
from sklearn.linear_model import LogisticRegression
logit = LogisticRegression(fit_intercept=False)
logit.fit(X, y)
# Get logistic regression coefficients
logit_betas = logit.coef_[0]

print("Results:")
print("True betas:", true_betas)
print("Logistic regression betas:", logit_betas)

Results:
True betas: [ 1.5 10. ]
Logistic regression betas: [1.04811998 7.08590115]
