We begin with a simple plot to give us a starting point and validate that our simulation is giving sensible results. We would expect that significantly below the critical value, all of the percolation clusters in our simulation would terminate, whilst significantly above the critical value there would be one single cluster which dominates, and does not terminate. A simple plot of log cluster size against log number of clusters over a range of probabilities shows this clearly.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os

def plot_size_num_prob_3d(directory, bins=False):
  params = []
  x1data = [] # terminated clusters
  x2data = [] # non-terminated clusters
  ydata = []
  zdata = []

  for filename in sorted(os.listdir(directory)):
    path = os.path.join(directory, filename)

    with open(path) as fp:
      for i, line in enumerate(fp):
          if i == 1:
            params = line.split(',')
            break

    data = np.genfromtxt(path, delimiter=',', skip_header=4)

    if bins == True:
      x1data += list(np.log2(data[:,1] / int(params[3])))
      x2data += list(np.log2(data[:,2] / int(params[3])))
      zdata += list(data[:,0])
    else:
      x1data += list(np.log(data[:,1]))
      x2data += list(np.log(data[:,2]))
      zdata += list(np.log(data[:,0]))
    ydata += [float(params[0]) for _ in data[:,0]]

  fig = plt.figure()
  ax = fig.add_subplot(projection='3d')
  ax.scatter(x1data, ydata, zdata)
  ax.scatter(x2data, ydata, zdata)
  fig.show()
   

plot_size_num_prob_3d("data/p_24_26")


Let $n_s(p)$ be the average number of clusters per lattice point of size $s$ for a given probability $p$. According to (...) we have that

$$n_s(p) = s^{-\tau}(f_0(z)+s^{-\Omega}f_1(z)+...)$$

where $\tau$ is the _fisher exponent_, $\Omega$ accounts for the leading errors due to the finite size of our lattices, $z=(p-p_c)s^{\sigma}$ for some constant $\sigma$ and $f_0, f_1...$ are analytic in a neighbourhood of zero. Taking the taylor expansion of the $f_i$ we obtain

$$n_s(p) = s^{-\tau}(c_0 + c_1(p-p_c)s^{\sigma} + c_2s^{-\Omega} + c_3(p-p_c)s^{\sigma - \Omega} + ...)$$

Taking logs, pulling out a constant factor and taylor expanding, we obtain

$$
\begin{aligned}
\log(n_s) &= -\tau \log(s) + a_0 + \log\left(1 + a_1(p-p_c)s^{\sigma} + a_2s^{-\Omega} + a_3(p-p_c)s^{\sigma - \Omega} + ...\right) \\
&= -\tau \log(s) + a_0 + a_1(p-p_c)s^{\sigma} + a_2s^{-\Omega} + a_3(p-p_c)s^{\sigma - \Omega} + ...
\end{aligned}
$$

Thus (aside from the effects of $\Omega$) we expect the critical value to occur when we have a log-linear relationship between $n_s$ and $s$. This aligns well with our initial sanity-check plot.

It will be convenient for our simulations to sample individual points from our lattice, rather than counting clusters. So we instead consider the probability $P(s)$ that a given point lives in a cluster of size $s$, which is simply $sn_s$.

One obstacle we must overcome if we wish to make accurate predictions is the fact that our simulations must always be finite. In order to remove boundary effects, it is convenient to consider the cumulative distribution $Q(s) = \sum_{t=s}^{\infty}P(t)$. This way, we can simply collect up all the terms affected by the boundary into the final bin. Approximating this as an integral, we obtain

$$
\begin{aligned}
Q(s,p) = Q(s) &= \int_{s}^{\infty} t^{1-\tau}(c_0 + c_1(p-p_c)s^{\sigma} + c_2s^{-\Omega} + c_3(p-p_c)s^{\sigma - \Omega} + ...)dt + ... \\
&= s^{2-\tau}(c_0 + c_1(p-p_c)s^{\sigma} + c_2s^{-\Omega} + c_3(p-p_c)s^{\sigma - \Omega} + ...) + ...
\end{aligned}
$$

and so

$\log Q(s) = (2-\tau) \log(s) + a_0 + a_1(p-p_c)s^{\sigma} + a_2s^{-\Omega} + a_3(p-p_c)s^{\sigma - \Omega} + ...$

Thus to obtain accurate estimates for $p_c$ (and $\tau$) it remains to generate as much data as possible on the relationship between $n_s$ and $s$, whilst minimising the effects of finite lattice sizes.

We shall therefore sample the size of the parent cluster for all of the points from a smaller central cube of side length $L'$ from our lattice of side length $L$, and sum up our results over a large number of runs.

In [None]:
plot_size_num_prob_3d("data/p_244", bins=True)

from scipy.optimize import curve_fit

def rhs(log2_size, tau, omega, a1, a2):
  return (-(tau - 2) * log2_size + a1 * 2**(-log2_size * omega) + a2)

directory = "data/p_244_test21"
params = []

fig, axs = plt.subplots(10)
fig.set_figheight(20)

for idx, filename in enumerate(sorted(os.listdir(directory))):
  path = os.path.join(directory, filename)

  with open(path) as fp:
    for i, line in enumerate(fp):
      if i == 1:
        params = line.split(',')
        break

  data = np.genfromtxt(path, delimiter=',', skip_header=4)

  size_data = [d for i, d in enumerate(data[:, 0]) if data[i, 2] == 0 and data[i,1] != 0]
  number_data = [np.log2(np.sum(data[i:, 1] + data[i:, 2])) for i, d in enumerate(data[:, 1]) if data[i, 2] == 0 and data[i,1] != 0]
  
  popt, pcov = curve_fit(rhs, size_data, number_data, p0=(2.19,0.5,0.1,30), bounds=((1, -1, -10, -10), (3, 5, 50, 50)))

  print(popt)
  print(np.sqrt(np.diag(pcov)))
  axs[idx].scatter(size_data, number_data)

  pred_num_data = [rhs(s, *popt) for s in size_data]
  axs[idx].plot(size_data, pred_num_data)

  axs[idx].set_xlabel("log size")
  axs[idx].set_title(params[0])
  axs[idx].set_ylabel("log num_clusters")

fig.show()

# Probably better to run over points in central cube and just keep track of what proportion belong to each size?

In [None]:
def rhs1(xy, p_c, tau, omega, sigma, a_0, a_1, a_2, a_3):
  lg2_s, p = xy
  sigma = 0.453
  return (-(tau - 2) * lg2_s + a_0 + a_1 * (p-p_c) * 2**(lg2_s * sigma) + a_2 * 2**(-lg2_s * omega) + a_3*(p-p_c) * 2**(lg2_s * (sigma - omega)))

directory = "data/p_244_test21"
params = []

size_data = []
p_data = []
lhs_data = []

fig, axs = plt.subplots(10)
fig.set_figheight(20)

for idx, filename in enumerate(sorted(os.listdir(directory))):
  path = os.path.join(directory, filename)

  with open(path) as fp:
    for i, line in enumerate(fp):
      if i == 1:
        params = line.split(',')
        break

  data = np.genfromtxt(path, delimiter=',', skip_header=4)

  #rhs_data.extend([(d, float(params[0])) for i, d in enumerate(data[:, 0]) if data[i, 2] == 0])
  size_data += [d for i, d in enumerate(data[:, 0]) if data[i, 2] == 0 and data[i,1] != 0]
  p_data += [float(params[0]) for i, d in enumerate(data[:, 0]) if data[i, 2] == 0 and data[i,1] != 0]
  lhs_data += [np.log2(np.sum(data[i:, 1] + data[i:, 2])) for i, d in enumerate(data[:, 1]) if data[i, 2] == 0 and data[i,1] != 0]


popt, pcov = curve_fit(rhs1, (size_data, p_data), lhs_data, p0=(0.2488,2.19,0.5,0.5,20,1,1,1), bounds=((0.2, 1, 0.2, 0.2, -10, -10, -10, -10), (0.3, 3, 5, 5, 30, 30, 30, 30)), maxfev=10000)

print(popt)
print(np.sqrt(np.diag(pcov)))

for idx, filename in enumerate(sorted(os.listdir(directory))):
  path = os.path.join(directory, filename)

  with open(path) as fp:
    for i, line in enumerate(fp):
      if i == 1:
        params = line.split(',')
        break

  data = np.genfromtxt(path, delimiter=',', skip_header=4)

  size_data = [d for i, d in enumerate(data[:, 0]) if data[i, 2] == 0  and data[i,1] != 0]
  number_data = [np.log2(np.sum(2**(i*(2.19-2))*(data[i:, 1] + data[i:, 2]))) for i, d in enumerate(data[:, 1]) if data[i, 2] == 0 and data[i,1] != 0]
  axs[idx].scatter(size_data, number_data)

  #pred_num_data = [rhs1((s, float(params[0])), *popt) for s in size_data]
  #axs[idx].plot(size_data, pred_num_data)

  axs[idx].set_xlabel("log size")
  axs[idx].set_title(params[0])
  axs[idx].set_ylabel("log num_clusters")

fig.show()

In [None]:
# Want to plot s^(1-sigma)/sigma * d log(Q(s)) / ds

directory = "data/p_244_test20"
params = []

pdata = []
ydata = []
sigma=0.435

for idx, filename in enumerate(sorted(os.listdir(directory))):
  path = os.path.join(directory, filename)

  with open(path) as fp:
    for i, line in enumerate(fp):
      if i == 1:
        params = line.split(',')
        break

  data = np.genfromtxt(path, delimiter=',', skip_header=4)
  #data = np.append(np.genfromtxt(path, delimiter=',', skip_header=4), np.array([[0,0,0]]), axis=0)

  #rhs_data.extend([(d, float(params[0])) for i, d in enumerate(data[:, 0]) if data[i, 2] == 0]) # (2**(i*(1-sigma))/sigma) * # 2**(i*(2.19-2))*
  grad_data = [((np.log2(np.sum(2**((i+1)*(2.19-2))*(data[i+1:, 1] + data[i+1:, 2]))) - np.log2(np.sum(2**(i*(2.19-2))*(data[i:, 1] + data[i:, 2])))) / (2**(sigma*(i+1)) - 2**(sigma*i))) for i, d in enumerate(data[:, 0]) if data[i, 2] == 0 and data[i,1] != 0]
  print(grad_data)
  ydata.append(grad_data[-2])
  pdata.append(float(params[0]))

plt.plot(pdata[1:], ydata[1:])
