In [816]:
import pandas as pd
import os
import glob
import numpy as np

In [817]:
extension = 'csv'
result_files = glob.glob('*.{}'.format(extension))
print(result_files)
print(len(result_files))

['678_visualizing_environmental.csv', '687_sleuth_ex1605.csv', '659_sleuth_ex1714.csv', '561_cpu.csv', '529_pollen.csv', 'alpinegp-blackbox_results.csv', '503_wind.csv', '1029_LEV.csv', '522_pm10.csv', '542_pollution.csv', '1027_ESL.csv', '1028_SWD.csv', '695_chatfield_4.csv', '225_puma8NH.csv', '227_cpu_small.csv', '229_pwLinear.csv', '712_chscase_geyser1.csv', '547_no2.csv', '1096_FacultySalaries.csv', '666_rmftsa_ladata.csv', '192_vineyard.csv', '519_vinnie.csv', '527_analcatdata_election2000.csv', '706_sleuth_case1202.csv', '523_analcatdata_neavote.csv', '560_bodyfat.csv', '1030_ERA.csv', '485_analcatdata_vehicle.csv', '505_tecator.csv', '556_analcatdata_apnea2.csv', '690_visualizing_galaxy.csv', '663_rabe_266.csv', '557_analcatdata_apnea1.csv', '197_cpu_act.csv', '665_sleuth_case2002.csv', '210_cloud.csv', '1089_USCrime.csv', '230_machine_cpu.csv', '228_elusage.csv']
39


In [818]:
r2_tests= []
aggregated_results = pd.DataFrame()
# combine all the results files into one dataset
for file in result_files: 
    # skip aggregate results file
    if "_results" in file:
        continue
    # skip Friedman datasets
    if "fri" in file:
        continue
    data = pd.read_csv(file, sep=";", header=0)
    aggregated_results = pd.concat([aggregated_results, data])
    r2_tests.append(data["r2_test"].to_numpy())

# add algorithm name
aggregated_results["algorithm"] = "AlpineGP"

In [819]:
print(aggregated_results[["algorithm", "problem", "r2_test"]])

   algorithm                        problem   r2_test
0   AlpineGP  678_visualizing_environmental -0.234804
1   AlpineGP  678_visualizing_environmental  0.384211
2   AlpineGP  678_visualizing_environmental  0.200520
3   AlpineGP  678_visualizing_environmental  0.121942
4   AlpineGP  678_visualizing_environmental -0.371221
..       ...                            ...       ...
5   AlpineGP                    228_elusage  0.716071
6   AlpineGP                    228_elusage  0.834505
7   AlpineGP                    228_elusage  0.772651
8   AlpineGP                    228_elusage  0.712184
9   AlpineGP                    228_elusage  0.236847

[380 rows x 3 columns]


In [820]:
aggregated_results = aggregated_results.rename(columns={"r2_test": "r2_zero_test", "problem": "dataset"})

aggregated_results.to_csv("alpinegp-blackbox_results.csv", index=False)

In [821]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset").agg({"r2_train": "median", "r2_zero_test": "median"}).reset_index()

algorithm_stats["r2_difference"] = algorithm_stats["r2_train"] - algorithm_stats["r2_zero_test"]

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="r2_difference", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset  r2_train  r2_zero_test  r2_difference
0               687_sleuth_ex1605  0.849301     -0.074039       0.923340
1                   542_pollution  0.853563      0.279329       0.574234
2                    192_vineyard  0.863953      0.380180       0.483773
3         485_analcatdata_vehicle  0.926831      0.470830       0.456001
4   678_visualizing_environmental  0.544766      0.193514       0.351252
5             706_sleuth_case1202  0.864212      0.568134       0.296078
6             665_sleuth_case2002  0.544097      0.257690       0.286407
7               659_sleuth_ex1714  0.953942      0.702428       0.251514
8                    1089_USCrime  0.951012      0.756442       0.194569
9                     228_elusage  0.895345      0.714127       0.181218
10                      210_cloud  0.935505      0.786611       0.148894
11                       522_pm10  0.370838      0.233109       0.137729
12                        547_no2  0.594315      0.

In [822]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset")["r2_zero_test"].agg(['mean', 'median', 'std']).reset_index()

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="median", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset      mean    median       std
0    527_analcatdata_election2000  0.997727  0.999273  0.003575
1                    663_rabe_266  0.994945  0.995115  0.001346
2                     560_bodyfat  0.988467  0.992938  0.012163
3                     505_tecator  0.986861  0.986026  0.003901
4                         561_cpu  0.957349  0.967161  0.033006
5          690_visualizing_galaxy  0.963404  0.964137  0.008677
6                     197_cpu_act  0.943090  0.945666  0.009666
7                   227_cpu_small  0.946096  0.945094  0.008128
8         523_analcatdata_neavote  0.936577  0.943564  0.027836
9            1096_FacultySalaries  0.662191  0.894004  0.525012
10         557_analcatdata_apnea1  0.881416  0.889496  0.039704
11                230_machine_cpu  0.778943  0.879675  0.273846
12         556_analcatdata_apnea2  0.863157  0.867148  0.034773
13                       1027_ESL  0.858838  0.860647  0.012759
14                695_chatfield_4  0.827

In [823]:
r2_tests = np.concatenate(r2_tests).ravel()
# print(r2_tests)

In [824]:
print("Mean test R2 = ", r2_tests.mean().item())

Mean test R2 =  0.6552373025589278


In [825]:
r2_tests.std().item()

0.3487136233938222

In [826]:
print("Median test R2 = ", np.median(r2_tests).item())

Median test R2 =  0.7683394391975586


In [828]:
# Convert the DataFrame to Markdown
markdown_table = algorithm_stats.to_markdown(index=False)

# Print the Markdown table
print(markdown_table)

# Save the Markdown table to a file
with open('table.md', 'w') as file:
    file.write(markdown_table)


| dataset                       |       mean |     median |        std |
|:------------------------------|-----------:|-----------:|-----------:|
| 527_analcatdata_election2000  |  0.997727  |  0.999273  | 0.00357541 |
| 663_rabe_266                  |  0.994945  |  0.995115  | 0.00134602 |
| 560_bodyfat                   |  0.988467  |  0.992938  | 0.0121634  |
| 505_tecator                   |  0.986861  |  0.986026  | 0.0039009  |
| 561_cpu                       |  0.957349  |  0.967161  | 0.0330056  |
| 690_visualizing_galaxy        |  0.963404  |  0.964137  | 0.00867664 |
| 197_cpu_act                   |  0.94309   |  0.945666  | 0.00966613 |
| 227_cpu_small                 |  0.946096  |  0.945094  | 0.00812824 |
| 523_analcatdata_neavote       |  0.936577  |  0.943564  | 0.0278365  |
| 1096_FacultySalaries          |  0.662191  |  0.894004  | 0.525012   |
| 557_analcatdata_apnea1        |  0.881416  |  0.889496  | 0.0397044  |
| 230_machine_cpu               |  0.778943  |  0.8