In [None]:
# Setup and Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Navigate to the project folder
# %cd #YOUR PATH TO THE NOTEBOOK IN GOOGLE COLAB

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Import dependencies
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
import natsort
from functools import reduce

In [None]:
from configuration import experiment_id as EXPERIMENT_ID
from configuration import data_root as DATA_ROOT
from configuration import data_path as DATA_PATH
from configuration import budget as BUDGET
from configuration import algorithms as ALGORITHMS
from configuration import n_iid as N_IID

DATA_PATH

In [None]:
pd.set_option('display.max_columns', None)

BUDGET

In [None]:
ALGORITHMS

In [None]:
N_IID

In [None]:
def target_check(data_path, algorithms, budget, n_iid):
  """ Function to check target data. """
  for algorithm_name in algorithms:
      for i in range(1, n_iid+1):
          file_path = f"{data_path}/{algorithm_name}_{i}_fixed_runtime.csv"
          if not os.path.exists(file_path):
              print(f"File not found: {file_path}")
              continue
          else:
              data = pd.read_csv(f"{data_path}/{algorithm_name}_{i}_fixed_runtime.csv", index_col="Unnamed: 0"
                                  , dtype={"funcId": int, "runtime": int, "run": int, "f(x)": float, "IID": int})
              data = data.rename(columns={"funcId": "f_id", "IID": "i_id", "runtime": "budget"})
              data = data[data["budget"] == budget]
              data = data.drop(["DIM", "ID", "budget"], axis=1)
              data_mean = data.groupby(["f_id", "i_id"])["f(x)"].mean().reset_index()
              data_mean = data_mean.sort_values(by=["f_id"])
              if data_mean.shape[0] != 24:
                  print(f"Missing instance: {data_path}/{algorithm_name}_{i}_fixed_runtime.csv")
                  print(data_mean.shape)

              data_runs = data.groupby(["f_id", "i_id"])["run"].size().reset_index()
              if len(data_runs[data_runs["run"]!=50])!=0:
                print(f"Missing runs: {data_path}/{algorithm_name}_{i}_fixed_runtime.csv")
                print(data_runs[data_runs["run"]!=50])


# target_check(data_path=f"Data/performance",
#              algorithms=ALGORITHMS, budget=BUDGET, n_iid=N_IID)

In [None]:
def prepare_target(data_path, algorithms, budget, n_iid, save_path):
    """ Function to prepare target data: aggregate algorithm performance over multiple runs, stack perforamnce from multiple algorithms."""
    result_mtr = list()
    for algorithm_name in algorithms:
        result_str = pd.DataFrame()
        for i in range(1, n_iid+1):
            file_path = f"{data_path}/{algorithm_name}_{i}_fixed_runtime.csv"
            if not os.path.exists(file_path):
                print(f"File not found: {file_path}")
                continue
            else:
                data = pd.read_csv(f"{data_path}/{algorithm_name}_{i}_fixed_runtime.csv", index_col="Unnamed: 0"
                                   , dtype={"funcId": int, "runtime": int, "run": int, "f(x)": float, "IID": int})
                data = data.rename(columns={"funcId": "f_id", "IID": "i_id", "runtime": "budget"})
                data = data[data["budget"] == budget]
                data = data.drop(["DIM", "ID", "budget"], axis=1)
                data = data.groupby(["f_id", "i_id"])["f(x)"].mean().reset_index()
                data[algorithm_name] = np.log10(data["f(x)"] + 1)
                data = data.drop(["f(x)"], axis=1)
                data = data.sort_values(by=["f_id"], key=natsort.natsort_keygen())
                result_str = pd.concat([result_str, data], axis=0)
        result_mtr.append(result_str)
    data = reduce(lambda left, right: pd.merge(left, right, on=['f_id', 'i_id'],
                                               how='inner'), result_mtr)
    data.to_csv(f"{save_path}/y.csv", index=False)

    return data

y = prepare_target(data_path=f"Data/performance",
                   algorithms=ALGORITHMS
                   , budget=BUDGET, n_iid=N_IID, save_path=DATA_PATH)

In [None]:
y.head()

In [None]:
y.shape