In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pysr import PySRRegressor
from IPython.display import Markdown as md

In [this article](https://arxiv.org/abs/2211.06393) J. Bayron Orjuela-Quintana et al. have found an explicit expression for the transfer function T(k):

$$ T(k;\omega_b,\omega_m) = [1+59.0998\ x^{1.49177}+4658.01\ x^{4.02755}+3170.79\ x^{6.06}+150.089\ x^7.28478]^{-\frac{1}{4}} $$

where
$$ x=\frac{k\ Mpc}{\omega_m-\omega_b} $$

and $\omega_i=\Omega_i h^2$, where $h$ is the reduced Hubble constant and $\Omega_i$ are the density parameters where $X = b,c,m,r,\nu,\gamma$ denotes baryons, CDM, pressure-less matter, radiation, neutrinos, photons, respectively.

First of all, let's import T(k) from CLASS:

In [None]:
df_cl = pd.read_csv('TF_class.csv')
df_cus = pd.read_csv('TF_custom.csv')

Now, let's generate T(k)

In [None]:
h = 0.6781
k = df_cl['k (h/Mpc)']/h
omega_b = 0.0223828 #omega baryon
omega_m = 0.1201075 #omega pressure-less matter
x = k/(omega_m-omega_b)
T = (1 + 59.0998 * x**1.49177 + 4658.01 * x**4.02755 + 3170.79 * x**6.06 + 150.089 * x**7.28478)**(-1/4)

In [None]:
plt.plot(df_cus['k (1/Mpc)'][4:],df_cus['T(k)'][4:], label='computed by CLASS', lw=2)
plt.plot(k[0:len(k)-3],T[0:len(k)-3], label='analityc formula (from GA)', linestyle="--", color='r')
plt.xlabel(r'$k\ [\frac{1}{Mpc}]$')
plt.ylabel(r'$T(k)$')
plt.title(r"Matter transfer function $T(k)$")
plt.loglog()
plt.grid(True)
plt.legend()

I don't understand why I obtain a good accordance while I'm neglecting that the numerator of x is adimensional ($k\ Mpc$): in fact I used in the calculation only $k$, wich is dimensional ($[k]=\frac{1}{Mpc}$).

However, let's try to fit the analytic expression using PySR:

In [None]:
obv = np.full(len(k), omega_b)
omv = np.full(len(k), omega_m)
X = np.vstack((k.values, omv, obv))
x = x.values.reshape(-1,1)
cluster = ["slurm", "pbs", "lsf", "sge", "qrsh", "scyld", "htc"]

In [None]:
model = PySRRegressor(
    niterations=100,  # < Increase me for better results
    binary_operators=["+", "*",'-','/',"pow"],
    constraints={'pow': (-1, 2)},
    nested_constraints={"pow": {"pow": 2}},
    maxsize=40,
    #unary_operators=[
    #    "exp",
    #    "log",
    #],
    loss="loss(prediction, target) = (prediction - target)^2",
    # ^ Custom loss function (julia syntax)
    turbo=True,  
    cluster_manager=cluster[0],
    multithreading=True,
)

In [None]:
model.fit(x,T)

In [None]:
md(f"The model has this equation as output: ${model.latex()}$")

In [None]:
pred = model.predict(x)
diff = T-pred
err = 100*abs(diff)/T

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(3,1, figsize=(9,18))

ax1.set_xscale('log')
ax1.set_yscale('log')
ax1.plot(k,pred, label='analityc formula (from PySR)', lw=2)
ax1.plot(k,T, label='analityc formula (from GA)', linestyle="--", color='r')
ax1.set_xlabel(r'$k \,\,\,\, [1/\mathrm{Mpc}]$')
ax1.set_ylabel(r'$T(k)$')
ax1.set_title('Transfer function T(k)')
ax1.legend()

ax2.set_xlabel(r'$k \,\,\,\, [1/\mathrm{Mpc}]$')
ax2.set_ylabel(r'GA-PySR$')
ax2.plot(k,diff)
ax2.set_title('Difference between the formula and data')

ax3.set_xlabel(r'$k \,\,\,\, [1/\mathrm{Mpc}]$')
ax3.set_ylabel(r'percentage error')
ax3.plot(k,err)
