In [47]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time
from datetime import datetime

from tqdm.notebook import tqdm

import plotly.express as px
# %matplotlib notebook

In [48]:
import ConfigSpace as CS

from deephyper.problem import HpProblem


problem = HpProblem()

# Model hyperparameters
ACTIVATIONS = [
    "elu",
    "gelu",
    "hard_sigmoid",
    "linear",
    "relu",
    "selu",
    "sigmoid",
    "softplus",
    "softsign",
    "swish",
    "tanh",
]
default_dense = [1000, 1000, 1000]
default_dense_feature_layers = [1000, 1000, 1000]

for i in range(len(default_dense)):

    problem.add_hyperparameter(
        (10, 1024, "log-uniform"),
        f"dense_{i}",
        default_value=default_dense[i],
    )

    problem.add_hyperparameter(
        (10, 1024, "log-uniform"),
        f"dense_feature_layers_{i}",
        default_value=default_dense_feature_layers[i],
    )

problem.add_hyperparameter(ACTIVATIONS, "activation", default_value="relu")

# Optimization hyperparameters
problem.add_hyperparameter(
    [
        "sgd",
        "rmsprop",
        "adagrad",
        "adadelta",
        "adam",
    ],
    "optimizer",
    default_value="sgd",
)

problem.add_hyperparameter((0, 0.5), "dropout", default_value=0.0)
problem.add_hyperparameter((8, 512, "log-uniform"), "batch_size", default_value=32)

problem.add_hyperparameter(
    (1e-5, 1e-2, "log-uniform"), "learning_rate", default_value=0.001
)
problem.add_hyperparameter((1e-5, 1e-2, "log-uniform"), "base_lr", default_value=0.001)
problem.add_hyperparameter([True, False], "residual", default_value=False)

problem.add_hyperparameter([True, False], "early_stopping", default_value=False)
problem.add_hyperparameter((5, 20), "early_stopping_patience", default_value=5)

problem.add_hyperparameter([True, False], "reduce_lr", default_value=False)
problem.add_hyperparameter((0.1, 1.0), "reduce_lr_factor", default_value=0.5)
problem.add_hyperparameter((5, 20), "reduce_lr_patience", default_value=5)

problem.add_hyperparameter([True, False], "warmup_lr", default_value=False)
problem.add_hyperparameter([True, False], "batch_normalization", default_value=False)

problem.add_hyperparameter(
    ["mse", "mae", "logcosh", "mape", "msle", "huber"], "loss", default_value="mse"
)

problem.add_hyperparameter(["std", "minmax", "maxabs"], "scaling", default_value="std")

problem

Configuration space object:
  Hyperparameters:
    activation, Type: Categorical, Choices: {elu, gelu, hard_sigmoid, linear, relu, selu, sigmoid, softplus, softsign, swish, tanh}, Default: relu
    base_lr, Type: UniformFloat, Range: [1e-05, 0.01], Default: 0.001, on log-scale
    batch_normalization, Type: Categorical, Choices: {True, False}, Default: False
    batch_size, Type: UniformInteger, Range: [8, 512], Default: 32, on log-scale
    dense_0, Type: UniformInteger, Range: [10, 1024], Default: 1000, on log-scale
    dense_1, Type: UniformInteger, Range: [10, 1024], Default: 1000, on log-scale
    dense_2, Type: UniformInteger, Range: [10, 1024], Default: 1000, on log-scale
    dense_feature_layers_0, Type: UniformInteger, Range: [10, 1024], Default: 1000, on log-scale
    dense_feature_layers_1, Type: UniformInteger, Range: [10, 1024], Default: 1000, on log-scale
    dense_feature_layers_2, Type: UniformInteger, Range: [10, 1024], Default: 1000, on log-scale
    dropout, Type: Un

In [49]:
exp_dir = "/Users/romainegele/Documents/Argonne/deephyper-scalable-bo/experiments/polaris/dhb/output/"

# exp_name = "dhb_combo-OPT-TPE-SHA-40-10800-42"
# exp_name = "dhb_combo-DBO-RF-UCB-SHA-40-10800-42"

# exp_name = "dhb_combo-OPT-TPE-NONE-40-10800-42"
# exp_name = "dhb_combo-DBO-RF-UCB-40-10800-42"

exp_name = "dhb_combo-OPT-TPE-SHA-160-10800-42"
# exp_name = "dhb_combo-DBO-RF-UCB-SHA-160-10800-42"

# exp_name = "dhb_combo-DBO-RF-UCB-480-10800-42"


path = os.path.join(exp_dir, exp_name, "results.csv")

df = pd.read_csv(path, index_col=None)
try:
    df = df.drop(columns=["Unnamed: 0"])
except KeyError: pass
    
df = df[~df.objective.astype(str).str.startswith("F")]

df = df[~df["m:stopped"].astype(bool)]

df.objective = df["objective"].astype(float)

df = df.sort_values("objective", ascending=False)

In [50]:
fig = px.histogram(df, 
                   x="objective",
                   nbins=200,
                   hover_data=df.columns)
fig.show()

In [51]:
df = df[df["objective"] > 0.894] # baseline performance
# df = df[df["objective"] > 0.915]

In [55]:
df_hp = df[[p_name for p_name in df.columns if p_name.startswith("p:")]]
df_hp = df_hp.rename(columns={p_name:p_name[2:] for p_name in df_hp.columns})

hp_array = []

config = problem.space.sample_configuration().get_dictionary()
CS.Configuration(problem.space, config).get_array()

for i, row in df_hp.iterrows():
    norm_config = CS.Configuration(problem.space, row.to_dict()).get_array()
    hp_array.append(norm_config)

hp_array = np.array(hp_array)

# Normalize categorical variables to [0,1]
bounds = []
for i, (hp_name, hp) in enumerate(problem.space.items()):
    if isinstance(hp, CS.CategoricalHyperparameter):
        # print(hp.choices)
        hp_array[:, i] /= len(hp.choices)-1

In [56]:
# hp_array = np.diag(np.ones((22,)))

# hp_array = np.zeros((22, 22))
# hp_array[:,0] = 1
# hp_array

In [57]:
mat = np.cov(hp_array.T)

print("trace: ", np.trace(mat))
print("spec norm: ", np.linalg.norm(mat, ord=2))

fig = px.imshow(mat)
fig.show()

trace:  1.4055022624112488
spec norm:  0.8003172534971003


In [58]:
len(problem.space)

22

In [59]:
1/22

0.045454545454545456

In [60]:
from sklearn.decomposition import PCA

pca = PCA(n_components=len(problem.space))
pca.fit(hp_array)

print(pca.explained_variance_ratio_)

score = 0
for i in range(len(pca.explained_variance_ratio_)-1):
    score += pca.explained_variance_ratio_[i] - pca.explained_variance_ratio_[i+1]

print(score)

# score = 1 - (score/(1/(22-1)))
print(1 - score)

fig = px.line(x=np.arange(len(problem.space)), y=pca.explained_variance_ratio_)
fig.show()

[0.56941726 0.12431356 0.0704017  0.04136376 0.0380579  0.02515271
 0.02407799 0.02110521 0.01824109 0.01360631 0.01115611 0.0086995
 0.00645604 0.00552595 0.00478627 0.0043883  0.00341419 0.0030577
 0.00221581 0.00192391 0.00135736 0.00128137]
0.5681358965143781
0.4318641034856219


In [23]:
# categorical hp

cat_params = [
    "p:activation", 
    "p:batch_normalization", 
    "p:early_stopping", 
    "p:loss", 
    "p:optimizer", 
    "p:residual", 
    "p:reduce_lr", 
    "p:scaling"
]


topk = 100

In [61]:
labels = {p_name:p_name[2:].replace("dense", "D")
                           .replace("feature", "F")
                           .replace("reduce", "red")
                           .replace("factor", "fact")
                           .replace("patience", "pat")
                           .replace("stopping", "stop")
                           .replace("learning_rate", "lr") for p_name in df.columns if p_name.startswith("p:")}

# df = px.data.iris()
fig = px.parallel_coordinates(df[:topk], 
                              color="objective",
                              dimensions=[p_name for p_name in df.columns if p_name.startswith("p:")],
                              labels=labels,
                              # range_color=[df.objective.min(), df.objective.max()],
                              color_continuous_scale=px.colors.diverging.RdBu,
                              # color_continuous_midpoint=0.89,
                             )
fig.show(renderer="jupyterlab")

In [63]:
fig = px.parallel_categories(df[:topk], 
                             dimensions=cat_params,
                             color="objective", 
                             color_continuous_scale=px.colors.diverging.RdBu,
                             labels={p_name:p_name[2:] for p_name in df.columns if p_name.startswith("p:")})
fig.show()