In [1]:
import numpy as np
import pandas as pd
import re
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import optuna
import torch
from torch.autograd import Variable
import plotly


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
x_train = pd.read_csv(
    "../data/processed/x_train_w_OHE.csv", index_col=0, dtype=str
)
x_test = pd.read_csv(
    "../data/processed/x_test_w_OHE.csv", index_col=0, dtype=str
)
y_train = pd.read_csv(
    "../data/processed/y_train.csv", index_col=0, dtype=float
).squeeze("columns").reset_index(drop=True)
y_test = pd.read_csv(
    "../data/processed/y_test.csv", index_col=0, dtype=float
).squeeze("columns").reset_index(drop=True)

x_train, x_valid, y_train, y_valid = train_test_split(
    x_train, y_train, test_size=0.2, random_state=42)


In [3]:
x_train.head()

Unnamed: 0,circuit_name,deenergize_time,restoration_time,key_communities,hftd_tier,total_affected,residential_affected,zip_code,longitude,latitude,...,zip_is_96035,zip_is_96051,zip_is_96055,zip_is_96059,zip_is_96069,zip_is_96073,zip_is_96076,zip_is_96080,zip_is_96096,zip_is_96137
452,SWIFT2110,2019-10-10 00:05:00,2019-10-11 14:17:00,"SAN JOSE, LIVERMORE",2.0,2232.0,2047.0,95148,-121.796959988177,37.3225680192999,...,0,0,0,0,0,0,0,0,0,0
443,STATION E EUREKA1105,2019-10-09 01:20:00,2019-10-09 23:10:00,EUREKA,0.0,1618.0,1264.0,95501,-124.180313467792,40.7934324220744,...,0,0,0,0,0,0,0,0,0,0
1804,SILVERADO,2021-08-17 18:22:00,2021-08-18 23:55:00,NAPA,3.0,1815.0,1516.0,94574,-122.459110675812,38.4998179385502,...,0,0,0,0,0,0,0,0,0,0
340,OREGON TRAIL1103,2019-10-09 01:39:00,2019-10-11 11:32:00,"REDDING, BELLA VISTA",2.0,1706.0,1599.0,96003,-122.322060242415,40.619045588007,...,0,0,0,0,0,0,0,0,0,0
1390,OREGON TRAIL,2020-10-22 03:23:00,2020-10-23 11:30:00,"PALO CEDRO, REDDING",2.0,952.0,843.0,96003,-122.322060242415,40.619045588007,...,0,0,0,0,0,0,0,0,0,0


In [4]:
zip_cols = x_train.columns[
    [re.search('zip_is', col) is not None for col in x_train.columns]
]


In [5]:
def get_correct_types_x(df, numeric_cols):
    for col in ['deenergize_time', 'restoration_time']:
        df[col] = pd.to_datetime(df[col], format='%Y-%m-%d %H:%M:%S')
    for col in numeric_cols:
        df[col] = df[col].astype(float)
    return df


numeric_cols = [
    'hftd_tier', 'total_affected', 'residential_affected',
    'longitude', 'latitude', 'total_pop', 'median_age', 'median_income',
    'white_pct', 'tmin_d-5', 'tmax_d-5', 'wspd_d-5', 'tmin_d-4', 'tmax_d-4',
    'wspd_d-4', 'tmin_d-3', 'tmax_d-3', 'wspd_d-3', 'tmin_d-2', 'tmax_d-2',
    'wspd_d-2', 'tmin_d-1', 'tmax_d-1', 'wspd_d-1'
]
x_train = get_correct_types_x(x_train, numeric_cols)
x_valid = get_correct_types_x(x_valid, numeric_cols)
x_test = get_correct_types_x(x_test, numeric_cols)
rel_x_train = x_train[numeric_cols]
rel_x_valid = x_valid[numeric_cols]
rel_x_test = x_test[numeric_cols]

scaler = StandardScaler()
scaler.fit(rel_x_train)
scaled_x_train = scaler.transform(rel_x_train)
scaled_x_valid = scaler.transform(rel_x_valid)
scaled_x_test = scaler.transform(rel_x_test)


In [6]:
class base_model(torch.nn.Module):

    def __init__(self, n_hidden_layers, n_hidden_units, activation=torch.nn.ReLU()):
        super(base_model, self).__init__()
        if n_hidden_layers == 0:
            self.linears =torch.nn.ModuleList([
                torch.nn.Linear(scaled_x_train.shape[1], 1)
            ])
            self.activation = activation
        else:
            assert len(n_hidden_units) == n_hidden_layers
            self.layers = []
            
            for layer, n_units in enumerate(n_hidden_units):
                if layer == 0:
                    curr_layer = torch.nn.Linear(scaled_x_train.shape[1], n_units)
                else:
                    curr_layer = torch.nn.Linear(n_hidden_units[layer - 1], n_units)
                self.layers.append(curr_layer)
            self.layers.append(torch.nn.Linear(n_hidden_units[-1], 1))
            self.linears = torch.nn.ModuleList(self.layers)
            self.activation = activation

    def forward(self, x):
        for layer in self.linears:
            x = self.activation(layer(x))
        return x


In [8]:
# x = torch.from_numpy(scaled_x_train).float()
# y = torch.from_numpy(y_train.values.reshape(-1, 1)).float()

# inputs = Variable(x)
# targets = Variable(y)

# # base = base_model(1, [1], activation=torch.nn.Tanh())
# base = base_model(2, [6, 3])
# print(base)
# optimizer = torch.optim.Adagrad(base.parameters(), lr=0.2)
# loss_func = torch.nn.MSELoss()

# for i in range(100000):
#    prediction = base(inputs)
#    loss = loss_func(prediction, targets)
#    if i % 100 == 0:
#       print(loss)
#    optimizer.zero_grad()
#    loss.backward()
#    optimizer.step()


- Used TanH instead of ReLU
- Adagrad instead of SGD -> SGD just returned 0 for all predictions
- More layers -> more overfitting, run simpler networks for more epochs gets better test error

In [9]:
# pd.Series(prediction.detach().numpy().reshape(-1))

In [10]:
# np.sqrt(loss.detach().numpy())

In [11]:
# test_x = Variable(torch.from_numpy(scaled_x_test).float())
# test_y = Variable(torch.from_numpy(y_test.values.reshape(-1, 1)).float())
# test_predictions = base(test_x)
# valid_x = Variable(torch.from_numpy(scaled_x_valid).float())
# valid_y = Variable(torch.from_numpy(y_valid.values.reshape(-1, 1)).float())
# valid_predictions = base(valid_x)

In [12]:
# loss = loss_func(valid_predictions, valid_y)
# print(np.sqrt(loss.detach().numpy()))


In [17]:
# 1. Define an objective function to be maximized.
def objective(trial):

    # 2. Suggest values of the hyperparameters using a trial object.
    n_layers = trial.suggest_int('n_layers', 0, 5)
    n_hidden_units = [0] * n_layers
    print(n_layers)
    for i in range(n_layers):
        n_hidden_units[i] = trial.suggest_int(f"n_h_{i}", 1, 100)
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    n_epochs = trial.suggest_int("n_epochs", 100, 100000)
    print(f"""Params:
          n_layers: {n_layers}
          n_hidden_units: {n_hidden_units}
          lr: {lr}
          n_epochs: {n_epochs}""")
        
    x = torch.from_numpy(scaled_x_train).float()
    y = torch.from_numpy(y_train.values.reshape(-1, 1)).float()

    inputs = Variable(x)
    targets = Variable(y)

    # base = base_model(1, [1], activation=torch.nn.Tanh())
    base = base_model(n_layers, n_hidden_units)
    print(base)
    optimizer = torch.optim.Adagrad(base.parameters(), lr=lr)
    loss_func = torch.nn.MSELoss()

    for i in range(n_epochs):
        prediction = base(inputs)
        loss = loss_func(prediction, targets)
        if i % 1000 == 0:
            print(loss)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    valid_x = Variable(torch.from_numpy(scaled_x_valid).float())
    valid_y = Variable(torch.from_numpy(y_valid.values.reshape(-1, 1)).float())
    valid_predictions = base(valid_x)
    loss = loss_func(valid_predictions, valid_y)
    return np.sqrt(loss.detach().numpy())


In [33]:
fig.write_image("nn_hpo/run_1.png")

In [15]:
# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)
pd.DataFrame.from_dict({"value": study.best_trial.values, "params": str(
    study.best_trial.params)}).to_csv("nn_hpo/run_1.csv", index=False)

fig = optuna.visualization.plot_optimization_history(study)
fig.show()


[32m[I 2022-11-25 14:25:19,656][0m A new study created in memory with name: no-name-40487ac1-e520-466e-8b1e-074a1145bd3f[0m


2
Params:
          n_layers: 2
          n_hidden_units: [87, 55]
          lr: 0.05787055677815427
          n_epochs: 1347
base_model(
  (linears): ModuleList(
    (0): Linear(in_features=24, out_features=87, bias=True)
    (1): Linear(in_features=87, out_features=55, bias=True)
    (2): Linear(in_features=55, out_features=1, bias=True)
  )
  (activation): ReLU()
)
tensor(9497073., grad_fn=<MseLossBackward0>)
tensor(1044370.4375, grad_fn=<MseLossBackward0>)
tensor(886137.3750, grad_fn=<MseLossBackward0>)
tensor(819015.3125, grad_fn=<MseLossBackward0>)
tensor(780284.3125, grad_fn=<MseLossBackward0>)
tensor(752925.9375, grad_fn=<MseLossBackward0>)
tensor(732037.3750, grad_fn=<MseLossBackward0>)
tensor(715010.1250, grad_fn=<MseLossBackward0>)
tensor(700812.2500, grad_fn=<MseLossBackward0>)
tensor(689068.8125, grad_fn=<MseLossBackward0>)
tensor(678620.5625, grad_fn=<MseLossBackward0>)


[32m[I 2022-11-25 14:25:20,552][0m Trial 0 finished with value: 915.4677124023438 and parameters: {'n_layers': 2, 'n_h_0': 87, 'n_h_1': 55, 'lr': 0.05787055677815427, 'n_epochs': 1347}. Best is trial 0 with value: 915.4677124023438.[0m


tensor(669382.3125, grad_fn=<MseLossBackward0>)
tensor(660984.4375, grad_fn=<MseLossBackward0>)
tensor(652955.8750, grad_fn=<MseLossBackward0>)
3
Params:
          n_layers: 3
          n_hidden_units: [68, 71, 95]
          lr: 0.45873270769849556
          n_epochs: 4290
base_model(
  (linears): ModuleList(
    (0): Linear(in_features=24, out_features=68, bias=True)
    (1): Linear(in_features=68, out_features=71, bias=True)
    (2): Linear(in_features=71, out_features=95, bias=True)
    (3): Linear(in_features=95, out_features=1, bias=True)
  )
  (activation): ReLU()
)
tensor(9496928., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148.,

[32m[I 2022-11-25 14:25:24,408][0m Trial 1 finished with value: 3057.548095703125 and parameters: {'n_layers': 3, 'n_h_0': 68, 'n_h_1': 71, 'n_h_2': 95, 'lr': 0.45873270769849556, 'n_epochs': 4290}. Best is trial 0 with value: 915.4677124023438.[0m


tensor(9497148., grad_fn=<MseLossBackward0>)
1
Params:
          n_layers: 1
          n_hidden_units: [16]
          lr: 0.20662951081511668
          n_epochs: 7041
base_model(
  (linears): ModuleList(
    (0): Linear(in_features=24, out_features=16, bias=True)
    (1): Linear(in_features=16, out_features=1, bias=True)
  )
  (activation): ReLU()
)
tensor(9496977., grad_fn=<MseLossBackward0>)
tensor(3396010.2500, grad_fn=<MseLossBackward0>)
tensor(2088741.8750, grad_fn=<MseLossBackward0>)
tensor(1690245.5000, grad_fn=<MseLossBackward0>)
tensor(1495504., grad_fn=<MseLossBackward0>)
tensor(1368100.7500, grad_fn=<MseLossBackward0>)
tensor(1273337.6250, grad_fn=<MseLossBackward0>)
tensor(1201072., grad_fn=<MseLossBackward0>)
tensor(1149981.3750, grad_fn=<MseLossBackward0>)
tensor(1111975.7500, grad_fn=<MseLossBackward0>)
tensor(1082755.8750, grad_fn=<MseLossBackward0>)
tensor(1058824.7500, grad_fn=<MseLossBackward0>)
tensor(1038465.6250, grad_fn=<MseLossBackward0>)
tensor(1021172., grad_f

[32m[I 2022-11-25 14:25:25,469][0m Trial 2 finished with value: 950.7656860351562 and parameters: {'n_layers': 1, 'n_h_0': 16, 'lr': 0.20662951081511668, 'n_epochs': 7041}. Best is trial 0 with value: 915.4677124023438.[0m


tensor(782762.0625, grad_fn=<MseLossBackward0>)
tensor(780670.7500, grad_fn=<MseLossBackward0>)
tensor(778563.8750, grad_fn=<MseLossBackward0>)
tensor(776573., grad_fn=<MseLossBackward0>)
tensor(774627.7500, grad_fn=<MseLossBackward0>)
tensor(772722.2500, grad_fn=<MseLossBackward0>)
tensor(770825.3125, grad_fn=<MseLossBackward0>)
3
Params:
          n_layers: 3
          n_hidden_units: [46, 34, 32]
          lr: 0.2838823225467899
          n_epochs: 4725
base_model(
  (linears): ModuleList(
    (0): Linear(in_features=24, out_features=46, bias=True)
    (1): Linear(in_features=46, out_features=34, bias=True)
    (2): Linear(in_features=34, out_features=32, bias=True)
    (3): Linear(in_features=32, out_features=1, bias=True)
  )
  (activation): ReLU()
)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(94

[32m[I 2022-11-25 14:25:28,333][0m Trial 3 finished with value: 3057.548095703125 and parameters: {'n_layers': 3, 'n_h_0': 46, 'n_h_1': 34, 'n_h_2': 32, 'lr': 0.2838823225467899, 'n_epochs': 4725}. Best is trial 0 with value: 915.4677124023438.[0m


tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
0
Params:
          n_layers: 0
          n_hidden_units: []
          lr: 0.29126389004441006
          n_epochs: 8441
base_model(
  (linears): ModuleList(
    (0): Linear(in_features=24, out_features=1, bias=True)
  )
  (activation): ReLU()
)
tensor(9495504., grad_fn=<MseLossBackward0>)
tensor(9357723., grad_fn=<MseLossBackward0>)
tensor(9295922., grad_fn=<MseLossBackward0>)
tensor(9247503., grad_fn=<MseLossBackward0>)
tensor(9206941., grad_fn=<MseLossBackward0>)
tensor(9171508., grad_fn=<MseLossBackward0>)
tensor(9139728., grad_fn=<MseLossBackward0>)
tensor(9110805., grad_fn=<MseLossBackward0>)
tensor(9084098., grad_fn=<MseLossBackward0>)
tensor(9059110., grad_fn=<MseLossBackward0>)
tensor(9035633., grad_fn=<MseLossBackward0>)
tensor(9013414., grad_fn=<MseLossBackward0>)
tensor(8992280., grad_fn=<MseLossBackward0>)
tensor(8972098., grad_fn=<MseLossBackward0>)
tensor(8952758., grad_fn=<MseLossBa

[32m[I 2022-11-25 14:25:29,005][0m Trial 4 finished with value: 2844.079345703125 and parameters: {'n_layers': 0, 'lr': 0.29126389004441006, 'n_epochs': 8441}. Best is trial 0 with value: 915.4677124023438.[0m


tensor(8361440., grad_fn=<MseLossBackward0>)
tensor(8353839.5000, grad_fn=<MseLossBackward0>)
tensor(8346307.5000, grad_fn=<MseLossBackward0>)
tensor(8338843., grad_fn=<MseLossBackward0>)
tensor(8331445., grad_fn=<MseLossBackward0>)
tensor(8324110.5000, grad_fn=<MseLossBackward0>)
tensor(8316840., grad_fn=<MseLossBackward0>)
tensor(8309629.5000, grad_fn=<MseLossBackward0>)
tensor(8302481., grad_fn=<MseLossBackward0>)
tensor(8295391.5000, grad_fn=<MseLossBackward0>)
tensor(8288359.5000, grad_fn=<MseLossBackward0>)
tensor(8281385.5000, grad_fn=<MseLossBackward0>)
tensor(8274467., grad_fn=<MseLossBackward0>)
tensor(8267601.5000, grad_fn=<MseLossBackward0>)
tensor(8260791.5000, grad_fn=<MseLossBackward0>)
tensor(8254033.5000, grad_fn=<MseLossBackward0>)
tensor(8247327., grad_fn=<MseLossBackward0>)
tensor(8240671., grad_fn=<MseLossBackward0>)
1
Params:
          n_layers: 1
          n_hidden_units: [55]
          lr: 0.4052954218938756
          n_epochs: 9100
base_model(
  (linears): Modu

[32m[I 2022-11-25 14:25:31,569][0m Trial 5 finished with value: 894.4140014648438 and parameters: {'n_layers': 1, 'n_h_0': 55, 'lr': 0.4052954218938756, 'n_epochs': 9100}. Best is trial 5 with value: 894.4140014648438.[0m


tensor(513250.2500, grad_fn=<MseLossBackward0>)
3
Params:
          n_layers: 3
          n_hidden_units: [50, 11, 32]
          lr: 0.33889983545149355
          n_epochs: 3325
base_model(
  (linears): ModuleList(
    (0): Linear(in_features=24, out_features=50, bias=True)
    (1): Linear(in_features=50, out_features=11, bias=True)
    (2): Linear(in_features=11, out_features=32, bias=True)
    (3): Linear(in_features=32, out_features=1, bias=True)
  )
  (activation): ReLU()
)
tensor(9496552., grad_fn=<MseLossBackward0>)
tensor(755602.5000, grad_fn=<MseLossBackward0>)
tensor(682822.1250, grad_fn=<MseLossBackward0>)
tensor(646079.5625, grad_fn=<MseLossBackward0>)
tensor(618317.8125, grad_fn=<MseLossBackward0>)
tensor(584816.8750, grad_fn=<MseLossBackward0>)
tensor(560306.6875, grad_fn=<MseLossBackward0>)
tensor(541148.8125, grad_fn=<MseLossBackward0>)
tensor(524715.4375, grad_fn=<MseLossBackward0>)
tensor(507570.5938, grad_fn=<MseLossBackward0>)
tensor(487956.4688, grad_fn=<MseLossBack

[32m[I 2022-11-25 14:25:33,256][0m Trial 6 finished with value: 941.136962890625 and parameters: {'n_layers': 3, 'n_h_0': 50, 'n_h_1': 11, 'n_h_2': 32, 'lr': 0.33889983545149355, 'n_epochs': 3325}. Best is trial 5 with value: 894.4140014648438.[0m


tensor(251464.9688, grad_fn=<MseLossBackward0>)
tensor(238147.5938, grad_fn=<MseLossBackward0>)
3
Params:
          n_layers: 3
          n_hidden_units: [62, 46, 74]
          lr: 0.20557297914630485
          n_epochs: 6440
base_model(
  (linears): ModuleList(
    (0): Linear(in_features=24, out_features=62, bias=True)
    (1): Linear(in_features=62, out_features=46, bias=True)
    (2): Linear(in_features=46, out_features=74, bias=True)
    (3): Linear(in_features=74, out_features=1, bias=True)
  )
  (activation): ReLU()
)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., gr

[32m[I 2022-11-25 14:25:38,084][0m Trial 7 finished with value: 3057.548095703125 and parameters: {'n_layers': 3, 'n_h_0': 62, 'n_h_1': 46, 'n_h_2': 74, 'lr': 0.20557297914630485, 'n_epochs': 6440}. Best is trial 5 with value: 894.4140014648438.[0m


tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
0
Params:
          n_layers: 0
          n_hidden_units: []
          lr: 0.3562370967403257
          n_epochs: 1265
base_model(
  (linears): ModuleList(
    (0): Linear(in_features=24, out_features=1, bias=True)
  )
  (activation): ReLU()
)
tensor(9496436., grad_fn=<MseLossBackward0>)
tensor(9331908., grad_fn=<MseLossBackward0>)
tensor(9260991., grad_fn=<MseLossBackward0>)
tensor(9207479., grad_fn=<MseLossBackward0>)


[32m[I 2022-11-25 14:25:38,186][0m Trial 8 finished with value: 2955.663818359375 and parameters: {'n_layers': 0, 'lr': 0.3562370967403257, 'n_epochs': 1265}. Best is trial 5 with value: 894.4140014648438.[0m


tensor(9162967., grad_fn=<MseLossBackward0>)
tensor(9124219., grad_fn=<MseLossBackward0>)
tensor(9089587., grad_fn=<MseLossBackward0>)
tensor(9058059., grad_fn=<MseLossBackward0>)
tensor(9028965., grad_fn=<MseLossBackward0>)
tensor(9001849., grad_fn=<MseLossBackward0>)
tensor(8976383., grad_fn=<MseLossBackward0>)
tensor(8952290., grad_fn=<MseLossBackward0>)
tensor(8929379., grad_fn=<MseLossBackward0>)
5
Params:
          n_layers: 5
          n_hidden_units: [20, 71, 49, 21, 42]
          lr: 0.13984338139968042
          n_epochs: 9950
base_model(
  (linears): ModuleList(
    (0): Linear(in_features=24, out_features=20, bias=True)
    (1): Linear(in_features=20, out_features=71, bias=True)
    (2): Linear(in_features=71, out_features=49, bias=True)
    (3): Linear(in_features=49, out_features=21, bias=True)
    (4): Linear(in_features=21, out_features=42, bias=True)
    (5): Linear(in_features=42, out_features=1, bias=True)
  )
  (activation): ReLU()
)
tensor(9497148., grad_fn=<MseLos

[32m[I 2022-11-25 14:25:47,477][0m Trial 9 finished with value: 3057.548095703125 and parameters: {'n_layers': 5, 'n_h_0': 20, 'n_h_1': 71, 'n_h_2': 49, 'n_h_3': 21, 'n_h_4': 42, 'lr': 0.13984338139968042, 'n_epochs': 9950}. Best is trial 5 with value: 894.4140014648438.[0m


tensor(9497148., grad_fn=<MseLossBackward0>)


In [16]:
study.best_trial

FrozenTrial(number=5, values=[894.4140014648438], datetime_start=datetime.datetime(2022, 11, 25, 14, 25, 29, 6298), datetime_complete=datetime.datetime(2022, 11, 25, 14, 25, 31, 569139), params={'n_layers': 1, 'n_h_0': 55, 'lr': 0.4052954218938756, 'n_epochs': 9100}, distributions={'n_layers': IntDistribution(high=5, log=False, low=0, step=1), 'n_h_0': IntDistribution(high=100, log=False, low=1, step=1), 'lr': FloatDistribution(high=0.5, log=False, low=0.001, step=None), 'n_epochs': IntDistribution(high=10000, log=False, low=1000, step=1)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=5, state=TrialState.COMPLETE, value=None)