In [None]:
import pysr
from pysr import PySRRegressor # Main tool for regression
import numpy as np
import sympy # Represent Equations

In [None]:
# Testing PySR using the 3D Distance Formula

rng = np.random.default_rng()

points = {
    'x1': rng.uniform(0, 10, size=1000).reshape(-1, 1),
    'x2': rng.uniform(0, 10, size=1000).reshape(-1, 1),
    'y1': rng.uniform(0, 10, size=1000).reshape(-1, 1),
    'y2': rng.uniform(0, 10, size=1000).reshape(-1, 1),
    'z1': rng.uniform(0, 10, size=1000).reshape(-1, 1),
    'z2': rng.uniform(0, 10, size=1000).reshape(-1, 1),
}

distances = np.sqrt(((points['x2']-points['x1'])**2)+((points['y2']-points['y1'])**2)+((points['z2']-points['z1'])**2))

In [None]:
inputs = np.concatenate(list(points.values()), axis = 1)
print(inputs.shape)

In [None]:
# Base Model

model = PySRRegressor(
    maxsize=20,
    niterations=100,
    populations=31,
    population_size = 27,
    ncycles_per_iteration = 760,
    binary_operators=["+"],
    unary_operators = ["square", "sqrt"],
    temp_equation_file = True,
    tempdir = 'outputs',
    elementwise_loss="loss(prediction, target) = (prediction - target)^2")

In [None]:
model.fit(inputs, distances, variable_names = list(points.keys()))

In [None]:
print(model.sympy())

In [None]:
# What if we add some operators not part of the original equation?

model = PySRRegressor(
    maxsize=20,
    niterations=100,
    populations=31,
    population_size = 27,
    ncycles_per_iteration = 760,
    binary_operators=["+", "-", "*", "/"],
    unary_operators = ["square", "sqrt", "abs", "cos", "tan"],
    temp_equation_file = True,
    tempdir = 'outputs',
    elementwise_loss="loss(prediction, target) = (prediction - target)^2")

model.fit(inputs, distances, variable_names = list(points.keys()))

print(model.sympy())

In [None]:
# The model has a much lower chance to find the equation due to the increased search space. Let's counteract this by tripling niterations

model = PySRRegressor(
    maxsize=20,
    niterations=300,
    populations=31,
    population_size = 27,
    ncycles_per_iteration = 760,
    binary_operators=["+", "-", "*", "/"],
    unary_operators = ["square", "sqrt", "abs", "cos", "tan"],
    temp_equation_file = True,
    tempdir = 'outputs',
    elementwise_loss="loss(prediction, target) = (prediction - target)^2")

model.fit(inputs, distances, variable_names = list(points.keys()))

print(model.sympy())

In [None]:
# Increasing iterations increases local search quality. What about number of populations?

model = PySRRegressor(
    maxsize=20,
    niterations=100,
    populations=93,
    population_size = 27,
    ncycles_per_iteration = 760,
    binary_operators=["+", "-", "*", "/"],
    unary_operators = ["square", "sqrt", "abs", "cos", "tan"],
    temp_equation_file = True,
    tempdir = 'outputs',
    elementwise_loss="loss(prediction, target) = (prediction - target)^2")

model.fit(inputs, distances, variable_names = list(points.keys()))

print(model.sympy())

In [None]:
# Increasing the number of populations increases global search opportunities. What about population size?

model = PySRRegressor(
    maxsize=20,
    niterations=100,
    populations=31,
    population_size = 81,
    ncycles_per_iteration = 760,
    binary_operators=["+", "-", "*", "/"],
    unary_operators = ["square", "sqrt", "abs", "cos", "tan"],
    temp_equation_file = True,
    tempdir = 'outputs',
    elementwise_loss="loss(prediction, target) = (prediction - target)^2")

model.fit(inputs, distances, variable_names = list(points.keys()))

print(model.sympy())