In [157]:
import numpy as np
from scipy.optimize import minimize, LinearConstraint

In [345]:
beta = 1
B = 1

def Hamiltonian(*state):
    x0, x1, x2, y01, y02 = 2 * np.array(state) - 1
    return beta / 2 * (x0 * x1 * y01 + x0 * x2 * y02) + B * x0


def target_function(x):
    mu = np.exp(x)
    exp = np.sum(np.fromfunction(Hamiltonian, (2, 2, 2, 2, 2)) * mu)
    rel_entr = np.sum(
        np.log(mu) * mu
    )  # Since eta is uniform, the KL-div is just the entropy

    mu_marg = np.sum(mu, axis=(0, 1, 2, 4))
    rel_marg_entr = np.sum(np.log(mu_marg) * mu_marg)

    return exp - rel_entr + rel_marg_entr

# This function is not used. It is not accurate at this point in time
def grad_target_function(x):
    mu = np.exp(x)
    mu_marg = np.sum(mu, axis=(0, 1, 2, 4))
    grad_exp = np.fromfunction(Hamiltonian, (2, 2, 2, 2, 2))
    grad_rel_entr = np.log(mu) + 1
    grad_rel_marg_entr = np.log(mu_marg) + 1
    # repeat the values to all dimensions
    grad_rel_marg_entr = np.tile(
        np.tile(grad_rel_marg_entr[:, np.newaxis], (1, 2)), (2, 2, 2, 1, 1)
    )
    return (grad_exp - grad_rel_entr + grad_rel_marg_entr) * mu

def constraint_function(x):
    return np.sum(np.exp(x)) - 1


## Proximal Optimization for Constrained Optimization

For the following optimization problem:

$$\begin{aligned}
\text{maximize}\quad& f(\textbf{x}) \\
\text{subject to}\quad& \textbf{g}(\textbf{x}) = \textbf{0}
\end{aligned}$$

We have the following iterative process to obtain an optimum:
$$\begin{aligned}
\textbf{x}^{(t+1)} &= \text{argmax}_{\text{x}} f(\text{x}) + \mathbf{\lambda}^{(t)} \cdot \textbf{g}(\textbf{x}) - \frac12 (\textbf{g}(\textbf{x}))^2 \\
\mathbf{\lambda}^{(t+1)} &= \mathbf{\lambda}^{(t)} - \textbf{g}\left(\textbf{x}^{(t+1)}\right) \\
\end{aligned}$$


In [346]:
def optimize(x0, lam0, patience, eps, verbose=True):

    x = x0
    lam = lam0
    
    target = -np.inf
    for it in range(patience):
        if verbose:
            print(f"{it=}")

        # Redefine the function for scipy.optimize.minimize
        # There is probably some fancy functional-programming way to incorporate lambda
        # but you get the point.
        def proximal_target_function(_x):
            _x = _x.reshape((2, 2, 2, 2, 2))
            _constraint = constraint_function(_x)
            return -target_function(_x) - lam * _constraint + _constraint ** 2 / 2
        
        # Calculate proximal point
        res = minimize(proximal_target_function, x0=x)
        x = res.x.reshape((2, 2, 2, 2, 2))
        
        new_target = target_function(x)
        constraint = constraint_function(x)
        if verbose:
            print(f"{new_target=}\t{constraint=}")
            
        # Decide whether to terminate
        if np.abs(target - new_target) < eps:
            return x, target
        target = new_target
        
        # Update lambda
        lam = lam - constraint
    return x, target


In [200]:
x0 = np.zeros((2, 2, 2, 2, 2))

x, _ = optimize(x0, lam0=0, patience=1000, eps=1e-6)

it=0
new_target=15.325634961650895	constraint=3.4465970351591215
it=1
new_target=3.446590107014522	constraint=-2.453636693600103e-06
it=2
new_target=3.446590107014522	constraint=-2.453636693600103e-06


## Analysis

In our theory we have hypothesized that

$$\mathbb{P}_\mu(S=s|Y_{01} = y_{01}) = \frac1Z e^{\mathcal{H}(s)}$$

We will compare the results

In [234]:
mu = np.exp(x)
print(f'Distribution Norm: {np.sum(mu)}\n')
print(f'Checking for conditional distribution:')
# Conditional Distribution
mu01 = np.sum(mu, (0, 1, 2, 4))
mu_cond = mu / np.tile(np.tile(mu01[:, np.newaxis], (1, 2)), (2, 2, 2, 1, 1))
# mu_cond0 = mu[:, :, :, 0, :] / mu01[0]
# mu_cond1 = mu[:, :, :, 1, :] / mu01[1]

# Exponential Distribution
exponential_dist = np.exp(np.fromfunction(Hamiltonian, (2, 2, 2, 2, 2)))
norm = np.sum(exponential_dist, (0, 1, 2, 4))
norm = np.tile(np.tile(norm[:, np.newaxis], (1, 2)), (2, 2, 2, 1, 1))
exponential_dist = exponential_dist / norm

# exponential_dist0 = exponential_dist[:, :, :, 0, :]
# exponential_dist0 = exponential_dist0 / np.sum(exponential_dist0)
# exponential_dist1 = exponential_dist[:, :, :, 1, :]
# exponential_dist1 = exponential_dist1 / np.sum(exponential_dist1)
print(f'Difference between mu and exponential distribution:')
print(np.abs(mu_cond - exponential_dist))

print(f'\nP(Y01=y01): {mu01}')

Distribution Norm: 1.0000070533738148

Checking for conditional distribution:
Difference between mu and exponential distribution:
[[[[[1.20245598e-07 1.01423153e-10]
    [4.32244306e-10 2.00683444e-06]]

   [[1.52212960e-07 3.88765382e-07]
    [2.26945305e-10 2.74874765e-10]]]


  [[[7.89363842e-11 1.15200089e-06]
    [4.28817437e-10 1.41317703e-07]]

   [[3.31741391e-08 2.16043087e-11]
    [8.19818051e-08 1.67358305e-07]]]]



 [[[[2.05974632e-07 1.04106014e-06]
    [4.93942014e-07 5.31934754e-06]]

   [[7.65289752e-07 5.40694162e-06]
    [5.22204858e-06 7.09776612e-07]]]


  [[[1.03778900e-07 2.82694247e-06]
    [3.66998295e-05 4.34688571e-07]]

   [[3.04157687e-06 8.11379460e-07]
    [1.95319519e-07 2.19258515e-05]]]]]

P(Y01=y01): [0.71504575 0.28496131]


### Changing initial guess

We will try the influence of the initial guess on x to the results

In [180]:
# x0 = np.ones((2, 2, 2, 2, 2))
x0 = np.random.uniform(-1, 1, (2, 2, 2, 2, 2))
print(f'Initial Guess={x0}')

x, target = optimize(x0, lam0=0, patience=1000, eps=1e-6, verbose=False)
print(f'{target=}')

Initial Guess=[[[[[ 0.31565381  0.26116493]
    [-0.52779404 -0.36706465]]

   [[ 0.63012789  0.24220802]
    [ 0.15937316  0.61637476]]]


  [[[ 0.1435931  -0.19305956]
    [-0.29644628 -0.77405785]]

   [[-0.3878525  -0.72787086]
    [-0.91334667  0.17384261]]]]



 [[[[-0.968578   -0.17608896]
    [-0.20617675 -0.40441563]]

   [[-0.07337725 -0.44329344]
    [ 0.95241968  0.8043892 ]]]


  [[[-0.13593784 -0.52466993]
    [ 0.69956671 -0.35113912]]

   [[ 0.69803921  0.09689119]
    [-0.61765896  0.33439266]]]]]
target=array([[[[-17.55590668, -19.42241647],
         [ -2.44656519,  -3.44652141]],

        [[-22.37716224, -17.67363032],
         [ -3.44652173,  -2.44656547]]],


       [[[-22.45195296, -18.42333841],
         [ -1.44656375,  -2.44656519]],

        [[-18.84338958, -23.69857227],
         [ -2.44656534,  -1.4465635 ]]]])


In [272]:
mu = np.exp(x)
print(f'Distribution Norm: {np.sum(mu)}\n')
print(f'Checking for conditional distribution:')
# Conditional Distribution
mu01 = np.sum(mu, (0, 1, 2, 4))
print(f'\nP(Y01=y01): {mu01}')

Distribution Norm: 0.9999938617730124

Checking for conditional distribution:

P(Y01=y01): [5.24609141e-08 9.99993809e-01]


We can see that all of them have different values of conditional probability, which proves our suspicion that it does not matter what the distribution
on $Y_{01}$ is.

Note 1: When $P(Y01=y01)$ is close to zero, the conditional probability value may not be as accurate. This will appear as 1e-1 and 1e-2 magnitude deviations
Note 2: Some initial values may overflow. This seems to be normal. Try again with a different initial value

### Different values of $\beta$ and $B$

In [410]:
beta = 1
B = 1

x0 = np.random.uniform(-1, 1, (2, 2, 2, 2, 2))
# x0 = np.ones((2, 2, 2, 2, 2))
x, target = optimize(x0, lam0=0, patience=1000, eps=1e-10, verbose=False)
print(f'{target=}')

mu = np.exp(x)
print(f'Distribution Norm: {np.sum(mu)}\n')
# print(f'Checking for conditional distribution:')
# Conditional Distribution
mu01 = np.sum(mu, (0, 1, 2, 4))
# mu_cond = mu / np.tile(np.tile(mu01[:, np.newaxis], (1, 2)), (2, 2, 2, 1, 1))

# # Exponential Distribution
# exponential_dist = np.exp(np.fromfunction(Hamiltonian, (2, 2, 2, 2, 2)))
# norm = np.sum(exponential_dist, (0, 1, 2, 4))
# norm = np.tile(np.tile(norm[:, np.newaxis], (1, 2)), (2, 2, 2, 1, 1))
# exponential_dist = exponential_dist / norm

# print(f'Difference between mu and exponential distribution:')
# print(np.abs(mu_cond - exponential_dist))

print(f'\nP(Y01=y01): {mu01}')

a = 1
b = 0
axes = list(range(len(mu.shape)))
axes.remove(a)
axes.remove(b)
dist = np.sum(mu, tuple(axes))
dist = dist / np.sum(dist) # Normalizing may affect the results slightly, but we are estimating anyways
print(dist)
x_value = np.fromfunction(lambda x, y: (2 * x - 1), dist.shape)
y_value = np.fromfunction(lambda x, y: (2 * y - 1), dist.shape)
product = x_value * y_value
covariance = np.sum(product * dist) - np.sum(x_value * dist) * np.sum(y_value * dist)

dist_x = np.sum(dist, 0)
variance_x = np.sum(dist_x) - (dist_x[1] - dist_x[0]) ** 2
sigma_x = np.sqrt(variance_x)
dist_y = np.sum(dist, 1)
variance_y = np.sum(dist_y) - (dist_y[1] - dist_y[0]) ** 2
sigma_y = np.sqrt(variance_y)

print(f'{covariance=}, {sigma_x=}, {sigma_y=}')

pearson = covariance / (sigma_x * sigma_y)
print(f'{pearson=}')

target=3.4466201992444496
Distribution Norm: 1.0000062769301432


P(Y01=y01): [0.65733047 0.34267581]
[[0.05093462 0.0682694 ]
 [0.50442531 0.37637067]]
covariance=-0.061066063086528816, sigma_x=0.9938516549676646, sigma_y=0.6480568629039271
pearson=-0.09481242239440378


In [293]:
a = 3
b = 0
axes = list(range(len(mu.shape)))
axes.remove(a)
axes.remove(b)
dist = np.sum(mu, tuple(axes))
dist = dist / np.sum(dist)
print(dist)
x_value = np.fromfunction(lambda x, y: (2 * x - 1), dist.shape)
y_value = np.fromfunction(lambda x, y: (2 * y - 1), dist.shape)
product = x_value * y_value
covariance = np.sum(product * dist) - np.sum(x_value * dist) * np.sum(y_value * dist)

dist_x = np.sum(dist, 0)
variance_x = np.sum(dist_x) - (dist_x[1] - dist_x[0]) ** 2
sigma_x = np.sqrt(variance_x)
dist_y = np.sum(dist, 1)
variance_y = np.sum(dist_y) - (dist_y[1] - dist_y[0]) ** 2
sigma_y = np.sqrt(variance_y)

print(f'{covariance=}, {sigma_x=}, {sigma_y=}')

pearson = covariance / (sigma_x * sigma_y)
print(f'{pearson=}')

[[1.19201787e-01 7.31001211e-09]
 [8.80797800e-01 4.05690092e-07]]
covariance=1.676813660145271e-07, sigma_x=0.0012853014181753298, sigma_y=0.6480516237847741
pearson=0.00020131225490591998
