In [None]:
import os

import numpy as np
import pandas as pd
from tqdm import tqdm
from pprint import pprint

from algorithms.gp.gpgomea import GpGomeaConfig, GpGomeaRegressor

In [None]:
config = GpGomeaConfig(verbose=False, finetune=True)
pprint(config)

GpGomeaConfig(t=3600, g=-1, e=499500, finetune_max_evals=500, finetune=True, tour=4, d=4, pop=1024, disable_ims=True, feat_sel=20, no_univ_exc_leaves_fos=False, no_large_fos=True, bs=100, fset='+,-,*,/,log,sqrt,sin,cos', cmp=0.0, rci=0.0, verbose=False, random_state=0)


In [None]:
train_df = pd.read_csv("dataset/train_df.csv", index_col=0)
sampled_df = train_df.groupby('number').apply(lambda x: x.sample(1, random_state=42)).reset_index(drop=True)
sampled_df["path"] = sampled_df.apply(lambda row:os.path.join(row["filename"], f"{row['data_num']}.npy"), axis=1)
sampled_df.head()

In [None]:
models = []

for index, row in tqdm(sampled_df.iterrows(), total=len(sampled_df)):
    data = np.load(os.path.join("dataset", row["path"]))
    X = data[:, :-1]
    y = data[:, -1]
    try:
        regressor = GpGomeaRegressor(config)
        model = regressor.predict_single(X, y)
    except:
        model = None

    models.append(model)

0 0.0790617
1 -0.0636908188462257 + (0.0898348242044449*x_0 + 0.210512861609459 + 0.317851394414902/x_0)/(x_0 + x_1**2/x_0**2)
2 -0.0859353691339493 + (0.0913130939006805*x_0 + 0.384801775217056)*cos((0.767461121082306*x_1 - 0.76799213886261*x_2)/x_0)/x_0
3 0.198126062750816*(x_0 - x_1)**2 + 0.199752911925316*(x_2 - x_3)**2 - 0.396846324205399*cos(x_0 - x_1)*cos(x_2 - x_3) + 1.04219973087311
4 0.354746073484421*x_0*x_1*x_2*(x_4 + x_6)*(x_8 - cos(x_3) + 1.00802457332611)/(x_3*x_5*x_7) - 0.00661400007084012
5 -0.977821409702301*x_0*x_1/((4.14405488967896 + x_2/x_1)*(x_1 - x_2)) + 0.993877053260803*x_0 - 0.00010473579459358
6 0.999999582767487*x_0*x_3 + 1.00000023841858*x_1*x_4 + 1.00000023841858*x_2*x_5 - 1.98388306671404e-6
7 1.0*x_0*x_1
8 0.352261632680893*x_0*x_1*(x_1 - x_2) + 0.184230580925941*x_0*x_3**2 + 0.337708055973053*x_0*(x_1 + x_2)*(x_2 + x_3) + 1.56818103790283
9 0.0795722827315331*x_0*x_1/(x_2*x_3**2)
10 0.0795759409666061*x_0/(x_1*x_2**2)
11 1.0*x_0*x_1
12 1.0*x_0*(x_1 + x

  return -1.00002658499452710000098572153*x_0*x_1*numpy.cos(x_2) + (0.47207661104379667*x_1 + 0.522805491589952)*numpy.sin(x_1)/numpy.log(clip(x_0/x_2, 1.0, numpy.inf))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


69 -0.999999940395355*x_0*x_1*cos(x_2) + 7.4508716352284e-7
70 -0.0806464180350304*x_0*x_4/((x_3 - x_4)*(x_1*x_2 + 0.494507551193237/(x_0*x_4))) - 0.00032299998565577
71 -0.448078155517578 + (0.995200037956238*x_0 + 0.476821452379227*x_1)*cos(3.10593104362488*x_1/(x_0*x_2))/x_1
72 1.0*x_0*x_1*x_2**2
73 1.0*x_0*x_1**2 + 1.55749876284972e-6
74 0.159154936671257*x_0*x_1/x_2
75 0.5*x_0*x_1*x_2
76 0.5*x_0*x_1*x_2/x_3
77 0.0795774683356285*x_0*x_1/x_2
78 6.28318548202515*x_0*x_2*x_3*x_4/x_1 - 2.08649726118892e-5
79 0.0728570073843002*x_0*x_2 + 0.0880375057458878*x_0*(x_1 - x_3 - x_4 + 3.75651073455811) + 0.0716067627072334*x_3/x_4 + 0.0696248039603233 + 0.0419163890182972*x_3*(x_1 - 5.1931529045105)/x_2
80 -1.36378061771393 + (2.15035486221313*x_0*x_1 + 3.79783129692078)/(1.88061690330505 + x_3*x_4/(x_1*x_2))
81 0.946076929569244*x_0*(x_1 + x_4/x_6 - cos(x_7/x_5) + 0.188123390078545)/(x_2*x_3) + 0.00830301735550165
82 1.00000011920929*x_0*x_1*(x_2 + 1) - 5.15300780534744e-6
83 0.999999940395

In [None]:
models

[0.0790617,
 -0.0636908188462257 + (0.0898348242044449*x_0 + 0.210512861609459 + 0.317851394414902/x_0)/(x_0 + x_1**2/x_0**2),
 -0.0859353691339493 + (0.0913130939006805*x_0 + 0.384801775217056)*cos((0.767461121082306*x_1 - 0.76799213886261*x_2)/x_0)/x_0,
 0.198126062750816*(x_0 - x_1)**2 + 0.199752911925316*(x_2 - x_3)**2 - 0.396846324205399*cos(x_0 - x_1)*cos(x_2 - x_3) + 1.04219973087311,
 0.354746073484421*x_0*x_1*x_2*(x_4 + x_6)*(x_8 - cos(x_3) + 1.00802457332611)/(x_3*x_5*x_7) - 0.00661400007084012,
 -0.977821409702301*x_0*x_1/((4.14405488967896 + x_2/x_1)*(x_1 - x_2)) + 0.993877053260803*x_0 - 0.00010473579459358,
 0.999999582767487*x_0*x_3 + 1.00000023841858*x_1*x_4 + 1.00000023841858*x_2*x_5 - 1.98388306671404e-6,
 1.0*x_0*x_1,
 0.352261632680893*x_0*x_1*(x_1 - x_2) + 0.184230580925941*x_0*x_3**2 + 0.337708055973053*x_0*(x_1 + x_2)*(x_2 + x_3) + 1.56818103790283,
 0.0795722827315331*x_0*x_1/(x_2*x_3**2),
 0.0795759409666061*x_0/(x_1*x_2**2),
 1.0*x_0*x_1,
 1.0*x_0*(x_1 + x_2*x

In [None]:
import pickle

data = {"df": sampled_df,
        "models": models,
        "config":config}

with open('gpgomea_test.pkl', 'wb') as outp:
    pickle.dump(data, outp, pickle.HIGHEST_PROTOCOL)