# First Layer: Good-Old Gaussian

In [1]:
import sympy as sp

from symdl.utils import pull_sums_out_front, sum_kronecker_contract, wilds, wild_subs
from symdl.random import ExpVal, RandomIndexedBase, connected_correlator
from symdl.gaussian import GaussianIndexedBase, wick_contraction, GaussianExpVal
from symdl.nn import Layer, NNIndexedBase, sample_indices, neuron_indices

In [2]:
i = neuron_indices('i1:5') # neouron index
alpha = sample_indices('alpha1:5') # sample index

# Input
x = sp.IndexedBase('x')
n0 = sp.symbols('n0', integer=True)

# Layer param
b = NNIndexedBase('b^(1)', is_gaussian = True)
W = NNIndexedBase('W^(1)', is_gaussian = True)
C_b, C_w = sp.symbols('C_b^(1) C_W^(1)', positive=True)

## Two point correlator

In [3]:
layer_1 = Layer(W, b, x, n0)
z1 = layer_1.preactivation

z1[i[0], alpha[0]]

b^(1)[i1] + Sum(x[k_1, alpha1]*W^(1)[i1, k_1], (k_1, 1, n0))

In [4]:
zz: sp.Expr = sp.expand(z1[i[0], alpha[0]] * z1[i[1], alpha[1]])
zz = pull_sums_out_front(zz)
zz

b^(1)[i1]*b^(1)[i2] + Sum(x[k_2, alpha1]*W^(1)[i1, k_2]*b^(1)[i2], (k_2, 1, n0)) + Sum(x[k_3, alpha2]*W^(1)[i2, k_3]*b^(1)[i1], (k_3, 1, n0)) + Sum(x[k_2, alpha1]*x[k_3, alpha2]*W^(1)[i1, k_2]*W^(1)[i2, k_3], (k_2, 1, n0), (k_3, 1, n0))

In [5]:
Ezz: sp.Expr = ExpVal(zz)
Ezz

𝔼[b^(1)[i1]*b^(1)[i2]] + Sum(𝔼[W^(1)[i1, k_2]*b^(1)[i2]]*x[k_2, alpha1], (k_2, 1, n0)) + Sum(𝔼[W^(1)[i2, k_3]*b^(1)[i1]]*x[k_3, alpha2], (k_3, 1, n0)) + Sum(𝔼[W^(1)[i1, k_2]*W^(1)[i2, k_3]]*x[k_2, alpha1]*x[k_3, alpha2], (k_2, 1, n0), (k_3, 1, n0))

In [6]:
A, B, C, D = wilds('A, B, C, D')

gaussian_init_rules = {
    ExpVal(W[A, B] * b[C]): 0,
    ExpVal(b[A] * b[B]): C_b * sp.KroneckerDelta(A, B),
    ExpVal(W[A, B] * W[C, D]): C_w * sp.KroneckerDelta(A, C) * sp.KroneckerDelta(B, D)/n0
}

Ezz = wild_subs(Ezz, gaussian_init_rules)
Ezz

C_b^(1)*KroneckerDelta(i1, i2) + Sum(0, (k_2, 1, n0)) + Sum(0, (k_3, 1, n0)) + Sum(C_W^(1)*KroneckerDelta(i1, i2)*KroneckerDelta(k_2, k_3)*x[k_2, alpha1]*x[k_3, alpha2]/n0, (k_2, 1, n0), (k_3, 1, n0))

In [7]:
Ezz = sum_kronecker_contract(Ezz).doit()
Ezz

C_b^(1)*KroneckerDelta(i1, i2) + Sum(C_W^(1)*KroneckerDelta(i1, i2)*x[k_2, alpha1]*x[k_2, alpha2]/n0, (k_2, 1, n0))

## First-layer metric

we define `Ezz`: $\mathbb{E}(z_{i, \alpha1} z_{j, \alpha2}) = \delta_{ij} \left[ C_b^{(1)} + \frac{C_W^{(1)}}{n_0} \sum^{n_0}_{k=1} x_{k, \alpha_1} x_{k, \alpha_2}\right] \equiv \delta_{ij} G^{(1)}_{\alpha_1 \alpha_2}$

where $G^{(1)}$ is called **First Layer Metric**

In [8]:
layer_1.gaussian_init(C_w, C_b)
G1 = layer_1.layer_metric
G1[alpha[0], alpha[1]]

C_W^(1)*Sum(x[k_4, alpha1]*x[k_4, alpha2], (k_4, 1, n0))/n0 + C_b^(1)

# Four point correlator

In [9]:
z = NNIndexedBase('z^(1)', is_gaussian=True)
G = NNIndexedBase('G^(1)')
zzzz = z[i[0], alpha[0]] * z[i[1], alpha[1]] * z[i[2], alpha[2]] * z[i[3], alpha[3]]
zzzz

z^(1)[i1, alpha1]*z^(1)[i2, alpha2]*z^(1)[i3, alpha3]*z^(1)[i4, alpha4]

In [10]:
Ezzzz: sp.Expr = ExpVal(pull_sums_out_front(sp.expand(zzzz)))
Ezzzz

𝔼[z^(1)[i1, alpha1]*z^(1)[i2, alpha2]*z^(1)[i3, alpha3]*z^(1)[i4, alpha4]]

In [11]:
Ezzzz = wick_contraction(Ezzzz)
Ezzzz

𝔼[z^(1)[i1, alpha1]*z^(1)[i2, alpha2]]*𝔼[z^(1)[i3, alpha3]*z^(1)[i4, alpha4]] + 𝔼[z^(1)[i1, alpha1]*z^(1)[i3, alpha3]]*𝔼[z^(1)[i2, alpha2]*z^(1)[i4, alpha4]] + 𝔼[z^(1)[i1, alpha1]*z^(1)[i4, alpha4]]*𝔼[z^(1)[i2, alpha2]*z^(1)[i3, alpha3]]

In [12]:
Ezzzz.replace(ExpVal(z[A, B] * z[C, D]), sp.KroneckerDelta(A, C) * G[B, D])

KroneckerDelta(i1, i2)*KroneckerDelta(i3, i4)*G^(1)[alpha1, alpha2]*G^(1)[alpha3, alpha4] + KroneckerDelta(i1, i3)*KroneckerDelta(i2, i4)*G^(1)[alpha1, alpha3]*G^(1)[alpha2, alpha4] + KroneckerDelta(i1, i4)*KroneckerDelta(i2, i3)*G^(1)[alpha1, alpha4]*G^(1)[alpha2, alpha3]

In [13]:
conn4 = connected_correlator(z, tuple(zip(i, alpha)))
conn4 = wick_contraction(conn4)
conn4

0

The 4-point connected correlator is zero since $z^{(1)}$ follors gaussian distribution

## Gaussian action in action

The expectation value of activations at 1st layer can be calculate as **Gaussian Expectations**:

$$\mathbb{E} \left[ \sigma\left(z^{(1)}_{i_1; \alpha_1} \right) \sigma\left(z^{(1)}_{i_1; \alpha_2} \right) \right] = \left\langle \sigma_{\alpha_1} \sigma_{\alpha_2} \right\rangle_{G^{(1)}}
$$

$$\mathbb{E} \left[ \sigma\left(z^{(1)}_{i_1; \alpha_1} \right) \sigma\left(z^{(1)}_{i_1; \alpha_2} \right) \sigma\left(z^{(1)}_{i_1; \alpha_3} \right) \sigma\left(z^{(1)}_{i_1; \alpha_4} \right) \right] 
= \left\langle \sigma_{\alpha_1} \sigma_{\alpha_2} \sigma_{\alpha_3} \sigma_{\alpha_4} \right\rangle_{G^{(1)}}
$$

By symmetry, the neuron index are omitted.
Since neurons are independent, each neuron factorizes and gives separate Gaussian integrals:

$$\mathbb{E} \left[ \sigma\left(z^{(1)}_{i_1; \alpha_1} \right) \sigma\left(z^{(1)}_{i_1; \alpha_2} \right) \sigma\left(z^{(1)}_{i_2; \alpha_3} \right) \sigma\left(z^{(1)}_{i_2; \alpha_4} \right) \right]
= \left\langle \sigma_{\alpha_1} \sigma_{\alpha_2} \right\rangle_{G^{(1)}} \left\langle \sigma_{\alpha_3} \sigma_{\alpha_4} \right\rangle_{G^{(1)}}
$$