In [1]:
import pandas as pd
import os
import glob
import numpy as np

In [2]:
extension = 'csv'
result_files = glob.glob('*.{}'.format(extension))
print(result_files)
print(len(result_files))

['678_visualizing_environmental.csv', '706_sleuth_case1202.csv', '695_chatfield_4.csv', '522_pm10.csv', '547_no2.csv', '229_pwLinear.csv', '666_rmftsa_ladata.csv', '227_cpu_small.csv', 'alpinegp-blackbox_results.csv', '542_pollution.csv', '210_cloud.csv', '523_analcatdata_neavote.csv', '687_sleuth_ex1605.csv', '712_chscase_geyser1.csv', '505_tecator.csv', '1096_FacultySalaries.csv', '1030_ERA.csv', '690_visualizing_galaxy.csv', '225_puma8NH.csv', '1028_SWD.csv', '228_elusage.csv', '529_pollen.csv', '557_analcatdata_apnea1.csv', '560_bodyfat.csv', '485_analcatdata_vehicle.csv', '659_sleuth_ex1714.csv', '527_analcatdata_election2000.csv', '1029_LEV.csv', '503_wind.csv', '556_analcatdata_apnea2.csv', '192_vineyard.csv', '663_rabe_266.csv', '665_sleuth_case2002.csv', '1027_ESL.csv', '230_machine_cpu.csv', '1089_USCrime.csv', '561_cpu.csv', '519_vinnie.csv', '197_cpu_act.csv']
39


In [3]:
r2_tests= []
aggregated_results = pd.DataFrame()
# combine all the results files into one dataset
for file in result_files: 
    # skip aggregate results file
    if "_results" in file:
        continue
    # skip Friedman datasets
    if "fri" in file:
        continue
    data = pd.read_csv(file, sep=";", header=0)
    aggregated_results = pd.concat([aggregated_results, data])
    r2_tests.append(data["r2_test"].to_numpy())

# add algorithm name
aggregated_results["algorithm"] = "AlpineGP"

In [4]:
print(aggregated_results[["algorithm", "problem", "r2_test"]])

   algorithm                        problem   r2_test
0   AlpineGP  678_visualizing_environmental -0.389721
1   AlpineGP  678_visualizing_environmental  0.294045
2   AlpineGP  678_visualizing_environmental  0.147189
3   AlpineGP  678_visualizing_environmental  0.290096
4   AlpineGP  678_visualizing_environmental -0.199600
..       ...                            ...       ...
5   AlpineGP                    197_cpu_act  0.943117
6   AlpineGP                    197_cpu_act  0.943355
7   AlpineGP                    197_cpu_act  0.948425
8   AlpineGP                    197_cpu_act  0.933159
9   AlpineGP                    197_cpu_act  0.938404

[380 rows x 3 columns]


In [5]:
aggregated_results = aggregated_results.rename(columns={"r2_test": "r2_zero_test", "problem": "dataset"})

aggregated_results.to_csv("alpinegp-blackbox_results.csv", index=False)

In [6]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset").agg({"r2_train": "median", "r2_zero_test": "median"}).reset_index()

algorithm_stats["r2_difference"] = algorithm_stats["r2_train"] - algorithm_stats["r2_zero_test"]

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="r2_difference", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset  r2_train  r2_zero_test  r2_difference
0                   542_pollution  0.874251      0.173085       0.701166
1               687_sleuth_ex1605  0.868065      0.254565       0.613500
2   678_visualizing_environmental  0.578558      0.033098       0.545460
3                    192_vineyard  0.879874      0.407618       0.472256
4             706_sleuth_case1202  0.886404      0.461436       0.424968
5         485_analcatdata_vehicle  0.951373      0.527420       0.423953
6             665_sleuth_case2002  0.567414      0.147407       0.420007
7               659_sleuth_ex1714  0.961037      0.668219       0.292818
8                    1089_USCrime  0.964041      0.711690       0.252351
9                        522_pm10  0.378014      0.203602       0.174413
10                      210_cloud  0.943635      0.771190       0.172446
11                    228_elusage  0.901250      0.731864       0.169386
12                        547_no2  0.614464      0.

In [7]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset")["r2_zero_test"].agg(['mean', 'median', 'std']).reset_index()

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="median", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset      mean    median       std
0                    663_rabe_266  0.994842  0.995115  0.001182
1    527_analcatdata_election2000  0.992449  0.992758  0.006080
2                     505_tecator  0.986318  0.987719  0.005882
3                     560_bodyfat  0.762608  0.985325  0.476887
4          690_visualizing_galaxy  0.960013  0.960901  0.007752
5                         561_cpu  0.952311  0.959272  0.037394
6                   227_cpu_small  0.948032  0.949224  0.007988
7                     197_cpu_act  0.944818  0.944068  0.007780
8         523_analcatdata_neavote  0.936577  0.943564  0.027836
9            1096_FacultySalaries  0.668179  0.900892  0.528163
10         557_analcatdata_apnea1  0.883680  0.890836  0.049260
11         556_analcatdata_apnea2  0.867434  0.868689  0.034457
12                   229_pwLinear  0.852877  0.862304  0.030776
13                       1027_ESL  0.862582  0.861795  0.015694
14                230_machine_cpu  0.837

In [8]:
r2_tests = np.concatenate(r2_tests).ravel()
# print(r2_tests)

In [9]:
print("Mean test R2 = ", r2_tests.mean().item())

Mean test R2 =  0.6412848426713436


In [10]:
r2_tests.std().item()

0.3791587018927324

In [11]:
print("Median test R2 = ", np.median(r2_tests).item())

Median test R2 =  0.7634275998757735


In [12]:
# Convert the DataFrame to Markdown
markdown_table = algorithm_stats.to_markdown(index=False)

# Print the Markdown table
print(markdown_table)

# Save the Markdown table to a file
with open('table.md', 'w') as file:
    file.write(markdown_table)


| dataset                       |      mean |    median |        std |
|:------------------------------|----------:|----------:|-----------:|
| 663_rabe_266                  |  0.994842 | 0.995115  | 0.0011823  |
| 527_analcatdata_election2000  |  0.992449 | 0.992758  | 0.00608007 |
| 505_tecator                   |  0.986318 | 0.987719  | 0.00588222 |
| 560_bodyfat                   |  0.762608 | 0.985325  | 0.476887   |
| 690_visualizing_galaxy        |  0.960013 | 0.960901  | 0.00775153 |
| 561_cpu                       |  0.952311 | 0.959272  | 0.0373935  |
| 227_cpu_small                 |  0.948032 | 0.949224  | 0.00798752 |
| 197_cpu_act                   |  0.944818 | 0.944068  | 0.00778017 |
| 523_analcatdata_neavote       |  0.936577 | 0.943564  | 0.0278365  |
| 1096_FacultySalaries          |  0.668179 | 0.900892  | 0.528163   |
| 557_analcatdata_apnea1        |  0.88368  | 0.890836  | 0.0492599  |
| 556_analcatdata_apnea2        |  0.867434 | 0.868689  | 0.0344567  |
| 229_