In [7]:
import numpy as np
from gplearn.genetic import SymbolicRegressor
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [8]:
files =['../data/problem_0.npz', '../data/problem_1.npz', '../data/problem_2.npz', '../data/problem_3.npz', '../data/problem_4.npz', '../data/problem_5.npz',
       '../data/problem_6.npz', '../data/problem_7.npz', '../data/problem_8.npz']
#files =['../data/problem_0.npz', '../data/problem_1.npz',]

for (index, file) in enumerate(files, start=1):

    problem = np.load(file)
    x = problem['x'].T
    y = problem['y']

    print("problem number",index,"----------------->", (x.shape, y.shape))

    x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=0.2, random_state=42)

    # Define the symbolic regressor
    est = SymbolicRegressor(
        population_size=2000,
        generations=20,
        stopping_criteria=0.01,
        p_crossover=0.7,
        p_subtree_mutation=0.1,
        p_hoist_mutation=0.05,
        p_point_mutation=0.1,
        max_samples=0.9,
        verbose=1,
        parsimony_coefficient=0.01,
        random_state=42
    )

    # Fit the model
    est.fit(x_train, y_train)

    # Predict on test data
    y_pred = est.predict(x_valid)

    # Print the resulting formula
    print("-------------------Best formula:", est._program)

    # Evaluate and visualize
    mse = mean_squared_error(y_valid, y_pred)
    print(f"Mean Squared Error on Test Set: {mse}")

problem number 1 -----------------> ((1000, 2), (1000,))
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left




   0    37.95         2.44e+07        5        0.0913182         0.101076     40.09s
   1    10.51          4.66147       35        0.0336774        0.0448514     21.68s
   2     6.17           2.2241        7        0.0206465        0.0211656     19.05s
   3     1.62           1.2801        7        0.0205167        0.0223341     16.39s
   4     1.80         0.502547        7        0.0204261        0.0231494     16.20s
   5     2.58          1.05212        5       0.00419889       0.00442659     14.79s
-------------------Best formula: sub(X0, mul(-0.188, X1))
Mean Squared Error on Test Set: 2.9642199184763113e-05
problem number 2 -----------------> ((500, 1), (500,))
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left




   0    49.61          14703.4       15        0.0194141        0.0218089     38.03s
   1    10.87          1.59355       15         0.019358        0.0223132     22.98s
   2     6.31         0.919549       15        0.0195138        0.0209111     20.69s
   3     1.65         0.827663        7         0.019175        0.0244398     16.57s
   4     1.71          3.76148        3        0.0267774        0.0255248     15.17s
   5     1.69         0.267989        7        0.0200875         0.019622     14.21s
   6     1.53          1.96656        1         0.035015        0.0572042     13.40s
   7     1.52          165.314        1        0.0346334        0.0606383     13.19s
   8     1.69         0.555737        3        0.0314635        0.0233225     11.05s
   9     1.71          2.20067        3        0.0270848         0.029049     10.29s
  10     1.81         0.579345        1        0.0337048        0.0689957      9.96s
  11     1.50         0.276071        1        0.0345376        0



   0    33.27        2.227e+07       15      4.86161e+06      5.28609e+06     44.65s
   1    28.61      4.90663e+06       59       4.8224e+06      5.04621e+06     39.14s
   2    34.09      5.08812e+06       51      4.72536e+06       4.7495e+06     37.28s
   3    40.02      4.47662e+08       47      4.71388e+06      4.78502e+06     39.50s
   4    58.82      6.02613e+09       53      4.69819e+06      4.88888e+06     42.07s
   5    57.66      2.14418e+09       65      4.67896e+06      4.91483e+06     40.23s
   6    59.69      1.82972e+09       57      4.68806e+06      4.97727e+06     36.81s
   7    60.52      2.22122e+09       35      4.64268e+06      4.59408e+06     34.48s
   8    64.06       1.4889e+09       31      4.62406e+06      4.72035e+06     32.50s
   9    67.49       1.2614e+09       31      4.59737e+06      4.95865e+06     30.71s
  10    55.67      4.96095e+09       31      4.58563e+06      4.80277e+06     25.39s
  11    35.59      1.04049e+09       69      4.57799e+06      5.0



   0    33.27      2.42385e+07        9          27.0422          27.4575     47.40s
   1    11.65          52.2842        9          27.0997          26.9401     28.62s
   2    14.38          103.152       17          21.3119          22.8707     28.40s
   3    14.79          55.1355       27          18.8168          17.5467     29.27s
   4    19.36          67.3282       27          17.9121          17.9241     28.85s
   5    24.77          63.2271       19          17.0319          17.0693     30.60s
   6    24.64          101.921       25          16.2918          16.0968     27.48s
   7    24.98           42.267       27          15.8011          16.9189     24.54s
   8    24.97          36.5662       35          15.8215          16.9302     25.23s
   9    25.50          32.0101       25          15.8033          17.7427     21.90s
  10    26.60          33.9826       29          15.7211          16.7301     18.62s
  11    26.52          33.0107       35          15.6442         



   0    37.95       1.1764e+07        3          3.96649          4.31342     52.23s
   1    10.35          11.1915       17          3.94207          3.47371     28.56s
   2     5.02          6.34729        3          3.95643          4.40401     25.38s
   3     2.88          29.9505        1          3.95381          4.44509     22.67s
   4     2.10          4.95953        1           3.9593          4.39566     20.50s
   5     1.55          18.7206        1          3.95786          4.44868     19.19s
   6     1.49          4.32772        1           3.9595          4.38876     19.64s
   7     1.60          5.14626        1          3.96167          4.37432     16.99s
   8     1.58           2163.6        1          3.95866          4.39628     15.19s
   9     1.80           10.011        1          3.95772          4.40989     13.22s
  10     1.62          5.34706        1          3.95949          4.39394     13.54s
  11     1.51          5.84419        1          3.95934         



   0    37.95      9.57199e+07       25      7.26275e-10      8.98283e-10     50.68s
-------------------Best formula: mul(mul(sub(add(X0, X1), add(X1, X0)), mul(div(X1, X0), sub(0.384, 0.629))), sub(-0.720, sub(add(-0.502, X1), sub(X1, -0.558))))
Mean Squared Error on Test Set: 6.023793117670641e-18
problem number 7 -----------------> ((5000, 2), (5000,))
    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left




   0    37.95           498274        7         0.733513          0.71309     52.39s
   1    11.53          14.9767       13         0.195973         0.190186     29.25s
   2     9.20          4.77595       13         0.193551          0.21199     28.46s
   3     9.20          5.29178       15          0.15439         0.151034     25.01s
   4     9.50          4.26564       15         0.153759         0.156716     23.81s
   5    10.46          22.3252       15        0.0419953        0.0426601     25.62s
   6     9.58          3.79505       19        0.0417746        0.0446462     20.71s
   7     9.35          4.73044       19        0.0416232        0.0460087     19.53s
   8    12.32          640.652       11         0.041662        0.0456593     19.07s
   9    14.94          12.0385       15        0.0180636        0.0191328     16.67s
  10    13.79          6.96722       11        0.0180477        0.0192754     14.38s
  11    11.63          6.86815        7       0.00271655       0.



   0    37.95          58594.7       15          7.45545           6.3986     51.07s
   1    10.64          19.9165       23          6.78409          8.93869     28.16s
   2    12.99          21.0593       27          6.58622          8.04709     28.89s
   3    17.82          9.40817       19          6.24914          8.25361     28.83s
   4    19.26          8.98816       35          6.16254          9.18567     25.57s
   5    19.04          10.2422       43          5.99513          5.55128     25.65s
   6    18.23          8.55514       43          5.81049          7.21304     21.58s
   7    17.39          10.7962       41          5.78292          7.69546     19.84s
   8    17.74          9.69563       19          5.56516           7.4378     19.17s
   9    20.00          19.4874       21          5.08983          7.75918     17.33s
  10    21.02          10.2772       21          5.20735          6.70156     16.32s
  11    20.99          9.16548       21           4.9311         



   0    28.37           253315       13          2706.85          2726.41      2.72m
   1    12.42          3050.86       13          2707.15          2723.66      1.74m
   2    22.57          3495.96       55          2638.22          2672.69      2.25m
   3    50.41           163440       61          2567.45          2522.17      4.74m
   4    46.64           719581       69           2286.4          2258.37      4.00m
   5    66.31          10470.7       73          2279.76          2319.24      4.10m
   6    73.92          5932.43      115          2251.64          2288.75      3.97m
   7    80.29          33374.3       87          2181.41          2245.39      4.24m
   8    95.75          59446.9      131          2182.94          2251.99      5.20m
   9   114.86          97480.3       85           2155.2          2124.26      6.11m
  10   135.31          59495.1       89           2149.6          2173.09      5.88m
  11   114.11           191888       87          2146.14         