In [1]:
from functions.test_algorithms import *
from functions.random_search import * 
from slim_gsgp_lib.datasets.data_loader import *
import pickle

datasets = [globals()[i] for i in globals() if 'load' in i][2:]

pop_size = 100 
n_iter = 100
n_iter_rs = 50
n_iter_test = 30
p_train = 0.7

In [None]:
for dataset_loader in tqdm(datasets[2:]):
    X, y = dataset_loader()
    dataset_name = dataset_loader.__name__.split('load_')[1]
    
    # Check if there are already available best parameters for the dataset
    try:
        with open(f"best_params/best_gp_{dataset_name}_{pop_size}_{n_iter}_FALSE.pkl", 'rb') as f:
            best_params_scaled = pickle.load(f)
    except:
        best_params_scaled = None

    try:
        with open(f"best_params/best_gp_{dataset_name}_{pop_size}_{n_iter}_TRUE.pkl", 'rb') as f:
            best_params_unscaled = pickle.load(f)
    except:
        best_params_unscaled = None

    # If there are no best parameters available, perform random search
    if best_params_scaled is None:
        # Perform random search for both scaled and unscaled versions
        print(f"Performing random search for {dataset_name} scaled...")
        
        best_params_scaled = random_search_gp(X, y, dataset_name, scale=True, p_train=p_train,
                                            iterations=n_iter_rs, pop_size=pop_size, n_iter=n_iter, show_progress=True, verbose=1)
    
    if best_params_unscaled is None:
        print(f"Performing random search for {dataset_name} unscaled...")

        best_params_unscaled = random_search_gp(X, y, dataset_name, scale=False, p_train=p_train,
                                            iterations=n_iter_rs, pop_size=pop_size, n_iter=n_iter, show_progress=True, verbose=1)
    
    print(f"Random search for {dataset_name} completed!")
            
    # Initialize dictionaries for scaled and unscaled results
    metrics = ['rm', 'ma', 'nrmse', 'r2', 'mae', 'std_rmse', 'time', 'train', 'test', 'size']
    results_scaled_dict = {metric: [] for metric in metrics}
    results_unscaled_dict = {metric: [] for metric in metrics}

    # Test the scaled and unscaled versions in GP 
    rm_sc, ma_sc, nrmse_sc, r2_sc, mae_sc, std_rmse_sc, time_sc, train_sc, test_sc, size_sc = test_gp(
        X=X, y=y, args_dict=best_params_scaled, dataset_name=dataset_name,
        scale=True, verbose=0, p_train=p_train, show_progress=False,
    )

    rm_un, ma_un, nrmse_un, r2_un, mae_un, std_rmse_un, time_un, train_un, test_un, size_un = test_gp(
        X=X, y=y, args_dict=best_params_unscaled, dataset_name=dataset_name,
        scale=False, verbose=0, p_train=p_train, show_progress=False,
    )

    # Initialize storage for each algorithm if not already present
    for metric in metrics:
        results_scaled_dict[metric].extend(eval(metric + '_sc'))
        results_unscaled_dict[metric].extend(eval(metric + '_un'))

    # Save the results to disk
    with open(f"results/GP/{dataset_name}_gp_scaled.pkl", 'wb') as f:
        pickle.dump(results_scaled_dict, f)

    with open(f"results/GP/{dataset_name}_gp_unscaled.pkl", 'wb') as f:
        pickle.dump(results_unscaled_dict, f)
        
    print(f"Results for {dataset_name} saved!")
    print("---------------------------------------------------")    

  0%|          | 0/14 [00:00<?, ?it/s]

Performing random search for airfoil scaled...




+-----------------------+-----------------------+-----------------------+-----------------------+-----------------------+-----------------------+
|        dataset        |       iteration       |        fitness        |      test_fitness     |          time         |         nodes         |
+-----------------------+-----------------------+-----------------------+-----------------------+-----------------------+-----------------------+
|    random_search_gp   |           0           |         0.435         |         0.430         |         0.070         |           3           |
|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|
|    random_search_gp   |           1           |         0.340         |         0.340         |         0.032         |           19          |
|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|---



|    random_search_gp   |          100          |         0.158         |         0.161         |         0.549         |          227          |
|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|
+-----------------------+-----------------------+-----------------------+-----------------------+-----------------------+-----------------------+
|        dataset        |       iteration       |        fitness        |      test_fitness     |          time         |         nodes         |
+-----------------------+-----------------------+-----------------------+-----------------------+-----------------------+-----------------------+
|    random_search_gp   |           0           |         0.433         |         0.432         |         0.069         |           5           |
|-----------------------|-----------------------|-----------------------|-----------------------|-----------------------|---