In [9]:
import numpy as np
from gplearn.genetic import SymbolicRegressor
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [10]:
problem = np.load('../data/problem_1.npz')
x = problem['x'].T
y = problem['y']

x.shape, y.shape

((500, 1), (500,))

In [11]:
x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=0.2, random_state=42)

# Define the symbolic regressor
est = SymbolicRegressor(
    population_size=2000,
    generations=20,
    stopping_criteria=0.01,
    p_crossover=0.7,
    p_subtree_mutation=0.1,
    p_hoist_mutation=0.05,
    p_point_mutation=0.1,
    max_samples=0.9,
    verbose=1,
    parsimony_coefficient=0.01,
    random_state=42
)

# Fit the model
est.fit(x_train, y_train)

# Predict on test data
y_pred = est.predict(x_valid)

# Print the resulting formula
print("Best formula:", est._program)



    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    49.61          14703.4       15        0.0194141        0.0218089     38.99s
   1    10.87          1.59355       15         0.019358        0.0223132     24.72s
   2     6.31         0.919549       15        0.0195138        0.0209111     22.98s
   3     1.65         0.827663        7         0.019175        0.0244398     17.74s
   4     1.71          3.76148        3        0.0267774        0.0255248     20.02s
   5     1.69         0.267989        7        0.0200875         0.019622     19.56s
   6     1.53          1.96656        1         0.035015        0.0572042     15.26s
   7     1.52          165.314        1        0.0346334        0.0606383     15.25s
   8     1.69         0.555737        3        0.0314635        0.0233225  

In [12]:
# Evaluate and visualize
mse = mean_squared_error(y_valid, y_pred)
print(f"Mean Squared Error on Test Set: {mse}")

Mean Squared Error on Test Set: 0.0010592511241927591
