In [1]:
import os

import numpy as np
import pandas as pd
from tqdm import tqdm
from pprint import pprint

from algorithms.gp.eplex import EplexConfig, EplexRegressor

In [2]:
config = EplexConfig(g=10)
pprint(config)

EplexConfig(selection='epsilon_lexicase', lex_eps_global=False, lex_eps_dynamic=False, islands=False, num_islands=10, island_gens=100, verbosity=0, print_data=False, elitism=True, pHC_on=True, prto_arch_on=True, max_len=64, max_len_init=20, popsize=500, g=10, time_limit=120)


In [3]:
train_df = pd.read_csv("dataset/train_df.csv", index_col=0)
sampled_df = train_df.groupby('number').apply(lambda x: x.sample(1, random_state=42)).reset_index(drop=True)
sampled_df["path"] = sampled_df.apply(lambda row:os.path.join(row["filename"], f"{row['data_num']}.npy"), axis=1)
sampled_df.head()

Unnamed: 0,filename,data_num,number,path
0,I.6.2a,83,1,I.6.2a/83.npy
1,I.6.2,83,2,I.6.2/83.npy
2,I.6.2b,83,3,I.6.2b/83.npy
3,I.8.14,83,4,I.8.14/83.npy
4,I.9.18,83,5,I.9.18/83.npy


In [4]:
models = []

for index, row in tqdm(sampled_df.iterrows(), total=len(sampled_df)):
    data = np.load(os.path.join("dataset", row["path"]))
    X = data[:, :-1]
    y = data[:, -1]
    try:
        regressor = EplexRegressor(config)
        model = regressor.predict_single(X, y)
    except:
        model = None

    models.append(model)

100%|██████████| 100/100 [57:43<00:00, 34.63s/it] 


In [5]:
models

[1/(x_0*(1.96463654223969*x_0**2 + 1.1041257367387) + 1/(2*x_0)),
 0.375/(2*x_1 + x_1/(0.997240223463687*x_0*(0.285753 - 0.431*x_0)/(x_1*(x_1 - 0.183/x_1)) + 3.53631284916201*x_1) - 0.602),
 0.455/(x_0 + 0.332 + 0.611/(x_2 + 2.322685 + 0.413/x_0)),
 1.016*(x_2 - 0.861)/x_3 + x_3/x_2,
 x_0*(0.342728297632469*x_2 + 0.342728297632469*x_2*x_4/x_3)/(-x_1 + x_3 + x_7 - x_8 - 0.086),
 1.057*x_0 - 0.113*x_1/(0.00804437410071942 - 1.96402877697842*x_0) - 0.092,
 1.695*x_0 + x_1*x_4 + x_2*x_5 + 2*x_3 - x_4 + 0.560290931174089,
 x_0*x_1,
 0.588*x_0*x_1**2 + x_0*x_2*x_3,
 -0.083*x_0*(1.029568*x_1 + 0.008101632)/(x_2*x_3*(-x_3 - 0.091)),
 0.008835764*x_0*x_2/((x_2 - 0.545)*(x_2 - 0.468)),
 x_0*x_1,
 x_2*(x_0 - 0.658)*(x_0/x_3 + x_1 + x_3 - 2*x_4),
 -0.284*x_4*(x_2 - x_3)*(-x_0*(x_1/x_2 + 0.701) - x_1 + x_3 + 1),
 x_0*x_1*x_2,
 (x_1 - 0.866)*(2.54798761609907*x_0 + x_1 - 3.88561743761104),
 x_0 - 0.79*x_1*x_3 - 0.367,
 x_3 + 0.094 - 0.435/(x_3 + 0.721),
 0.06176599*x_0*x_1**2 + 0.950246*x_0*x_1,
 -0

In [6]:
import pickle

data = {"df": sampled_df,
        "models": models,
        "config":config}

with open('eplex_test.pkl', 'wb') as outp:
    pickle.dump(data, outp, pickle.HIGHEST_PROTOCOL)