In [1]:
import os

import numpy as np
import pandas as pd
from tqdm import tqdm
from pprint import pprint

from algorithms.gp.feat import FeatConfig, FeatRegressor

In [2]:
config = FeatConfig(verbosity=0)
pprint(config)

FeatConfig(pop_size=100,
           gens=100,
           max_time=120,
           max_depth=6,
           verbosity=0,
           batch_size=100,
           functions=['+',
                      '-',
                      '*',
                      '/',
                      '^2',
                      '^3',
                      'sqrt',
                      'sin',
                      'cos',
                      'exp',
                      'log'],
           otype='f')


In [3]:
train_df = pd.read_csv("dataset/train_df.csv", index_col=0)
sampled_df = train_df.groupby('number').apply(lambda x: x.sample(1, random_state=42)).reset_index(drop=True)
sampled_df["path"] = sampled_df.apply(lambda row:os.path.join(row["filename"], f"{row['data_num']}.npy"), axis=1)

  sampled_df = train_df.groupby('number').apply(lambda x: x.sample(1, random_state=42)).reset_index(drop=True)


In [4]:
sampled_df.head()

Unnamed: 0,filename,data_num,number,path
0,I.6.2a,83,1,I.6.2a/83.npy
1,I.6.2,83,2,I.6.2/83.npy
2,I.6.2b,83,3,I.6.2b/83.npy
3,I.8.14,83,4,I.8.14/83.npy
4,I.9.18,83,5,I.9.18/83.npy


In [5]:
models = []

for index, row in tqdm(sampled_df.iterrows(), total=len(sampled_df)):
    data = np.load(os.path.join("dataset", row["path"]))
    X = data[:, :-1]
    y = data[:, -1]

    try:
        regressor = FeatRegressor(config)
        model = regressor.predict_single(X, y)
    except:
        model = None
        
    models.append(model)

100%|██████████| 100/100 [21:09<00:00, 12.70s/it]


In [6]:
models

[0.925093924437197*x_0**5*sin(0.1911*cos(0.5616*x_0)) - 0.000891814202509849*x_0**4 + 0.00595899281010016*x_0**3*log(0.183498530525678*x_0**2) + 0.0146971194006986*x_0**2*log(0.1435*exp(0.6423*sin(0.2196*x_0)))*sin(0.7058*x_0) - 0.042755256438396*x_0**2*log(0.1435*exp(0.6423*sin(0.2196*x_0)))*sin(0.7325*x_0) - 0.0882*x_0 - 0.00969445512598658*(x_0**2)**0.5 + 0.23*exp(0.5609*x_0) - 0.4*sin(0.686*x_0) - 0.21*sin(0.7325*x_0) - 0.574416*sin(0.0510804*x_0**2) - 0.00172619144943653*sin(0.6801*exp(0.6423*x_0))**4 - 0.12916,
 3.217432*x_0**3 + 0.02538*x_0*sin(0.2457*x_1) - 0.00027367051166*x_0*cos(0.526*x_0)**2 - 0.071191079809332*x_0**2.0*sin(0.2457*x_1)**2 + 0.163726*x_1*cos(0.282*x_0) + 0.065566*x_1 + 0.19*sin(0.1662841831838*x_0**2 - 0.09924381585*x_0*x_1) + 0.11352*sin(0.929*sin(0.1662841831838*x_0**2 - 0.4692379*x_1)) - 16.254718*cos(0.526*x_0)**3 - 15.5*cos(0.10610955*x_0*sin(0.2457*x_1)) + 15.6431,
 -0.0137656385081719*x_0**3*cos(0.663201*x_1 - 0.52964846*x_2)**3 - 0.0193249206914087*x

In [7]:
import pickle

data = {"df": sampled_df,
        "models": models,
        "config":config}

with open('feat_test.pkl', 'wb') as outp:
    pickle.dump(data, outp, pickle.HIGHEST_PROTOCOL)