In [None]:
import pandas as pd
import glob
import numpy as np

In [14]:
extension = 'csv'
result_files = glob.glob('*.{}'.format(extension))
print(result_files)
print(len(result_files))

['678_visualizing_environmental.csv', '706_sleuth_case1202.csv', '695_chatfield_4.csv', '522_pm10.csv', '547_no2.csv', '229_pwLinear.csv', '666_rmftsa_ladata.csv', '227_cpu_small.csv', 'alpinegp-blackbox_results.csv', '542_pollution.csv', '210_cloud.csv', '523_analcatdata_neavote.csv', '687_sleuth_ex1605.csv', '712_chscase_geyser1.csv', '505_tecator.csv', '1096_FacultySalaries.csv', '1030_ERA.csv', '690_visualizing_galaxy.csv', '225_puma8NH.csv', '1028_SWD.csv', '228_elusage.csv', '529_pollen.csv', '557_analcatdata_apnea1.csv', '560_bodyfat.csv', '485_analcatdata_vehicle.csv', '659_sleuth_ex1714.csv', '527_analcatdata_election2000.csv', '1029_LEV.csv', '503_wind.csv', '556_analcatdata_apnea2.csv', '192_vineyard.csv', '663_rabe_266.csv', '665_sleuth_case2002.csv', '1027_ESL.csv', '230_machine_cpu.csv', '1089_USCrime.csv', '561_cpu.csv', '519_vinnie.csv', '197_cpu_act.csv']
39


In [15]:
r2_tests= []
aggregated_results = pd.DataFrame()
# combine all the results files into one dataset
for file in result_files: 
    # skip aggregate results file
    if "_results" in file:
        continue
    # skip Friedman datasets
    if "fri" in file:
        continue
    data = pd.read_csv(file, sep=";", header=0)
    aggregated_results = pd.concat([aggregated_results, data])
    r2_tests.append(data["r2_test"].to_numpy())

# add algorithm name
aggregated_results["algorithm"] = "Flex"

In [16]:
print(aggregated_results[["algorithm", "problem", "r2_test"]])

   algorithm                        problem   r2_test
0       Flex  678_visualizing_environmental -0.201873
1       Flex  678_visualizing_environmental  0.374990
2       Flex  678_visualizing_environmental  0.289871
3       Flex  678_visualizing_environmental  0.338446
4       Flex  678_visualizing_environmental  0.242497
..       ...                            ...       ...
5       Flex                    197_cpu_act  0.944783
6       Flex                    197_cpu_act  0.947035
7       Flex                    197_cpu_act  0.942767
8       Flex                    197_cpu_act  0.943261
9       Flex                    197_cpu_act  0.925469

[380 rows x 3 columns]


In [17]:
aggregated_results = aggregated_results.rename(columns={"r2_test": "r2_zero_test", "problem": "dataset"})

aggregated_results.to_csv("flex-blackbox_results.csv", index=False)

In [18]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset").agg({"r2_train": "median", "r2_zero_test": "median"}).reset_index()

algorithm_stats["r2_difference"] = algorithm_stats["r2_train"] - algorithm_stats["r2_zero_test"]

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="r2_difference", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset  r2_train  r2_zero_test  r2_difference
0         485_analcatdata_vehicle  0.928629      0.218689       0.709939
1                   542_pollution  0.862609      0.205069       0.657540
2               687_sleuth_ex1605  0.836636      0.252290       0.584346
3                    192_vineyard  0.869627      0.379204       0.490423
4               659_sleuth_ex1714  0.953182      0.590340       0.362842
5                    1089_USCrime  0.954135      0.642937       0.311198
6             665_sleuth_case2002  0.525834      0.224279       0.301555
7   678_visualizing_environmental  0.559170      0.266184       0.292986
8             706_sleuth_case1202  0.861467      0.589932       0.271535
9                     228_elusage  0.900911      0.722935       0.177976
10                       522_pm10  0.357833      0.183038       0.174795
11                      210_cloud  0.938630      0.777608       0.161022
12                        547_no2  0.588655      0.

In [19]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset")["r2_zero_test"].agg(['mean', 'median', 'std']).reset_index()

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="median", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset      mean    median       std
0    527_analcatdata_election2000  0.995885  0.998556  0.004849
1                    663_rabe_266  0.994959  0.995115  0.001374
2                     560_bodyfat  0.858423  0.989548  0.397481
3                     505_tecator  0.987277  0.986499  0.003633
4                         561_cpu  0.932056  0.980248  0.114270
5          690_visualizing_galaxy  0.963504  0.962481  0.008851
6                   227_cpu_small  0.948131  0.947010  0.005893
7                     197_cpu_act  0.946148  0.945909  0.008979
8         523_analcatdata_neavote  0.936577  0.943564  0.027836
9                 230_machine_cpu  0.636093  0.879896  0.650444
10           1096_FacultySalaries  0.657977  0.872021  0.523762
11         556_analcatdata_apnea2  0.870681  0.871478  0.022826
12                       1027_ESL  0.857537  0.861532  0.020075
13         557_analcatdata_apnea1  0.815327  0.859675  0.136672
14                695_chatfield_4  0.850

In [20]:
r2_tests = np.concatenate(r2_tests).ravel()
# print(r2_tests)

In [21]:
print("Mean test R2 = ", r2_tests.mean().item())

Mean test R2 =  0.6432775284276235


In [22]:
r2_tests.std().item()

0.3599655834629445

In [23]:
print("Median test R2 = ", np.median(r2_tests).item())

Median test R2 =  0.7481701082437232


In [24]:
# Convert the DataFrame to Markdown
markdown_table = algorithm_stats.to_markdown(index=False)

# Print the Markdown table
print(markdown_table)

# Save the Markdown table to a file
with open('table.md', 'w') as file:
    file.write(markdown_table)


| dataset                       |     mean |   median |        std |
|:------------------------------|---------:|---------:|-----------:|
| 527_analcatdata_election2000  | 0.995885 | 0.998556 | 0.00484857 |
| 663_rabe_266                  | 0.994959 | 0.995115 | 0.00137354 |
| 560_bodyfat                   | 0.858423 | 0.989548 | 0.397481   |
| 505_tecator                   | 0.987277 | 0.986499 | 0.00363255 |
| 561_cpu                       | 0.932056 | 0.980248 | 0.11427    |
| 690_visualizing_galaxy        | 0.963504 | 0.962481 | 0.00885051 |
| 227_cpu_small                 | 0.948131 | 0.94701  | 0.00589332 |
| 197_cpu_act                   | 0.946148 | 0.945909 | 0.00897869 |
| 523_analcatdata_neavote       | 0.936577 | 0.943564 | 0.0278365  |
| 230_machine_cpu               | 0.636093 | 0.879896 | 0.650444   |
| 1096_FacultySalaries          | 0.657977 | 0.872021 | 0.523762   |
| 556_analcatdata_apnea2        | 0.870681 | 0.871478 | 0.0228258  |
| 1027_ESL                      | 