In [1]:
import numpy as np
import pandas as pd
import re
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import optuna
import torch
from torch.autograd import Variable
import plotly
import tqdm as notebook_tqdm


In [2]:
x_train = pd.read_csv(
    "../data/processed/x_train_w_OHE.csv", index_col=0, dtype=str
)
x_test = pd.read_csv(
    "../data/processed/x_test_w_OHE.csv", index_col=0, dtype=str
)
y_train = pd.read_csv(
    "../data/processed/y_train.csv", index_col=0, dtype=float
).squeeze("columns").reset_index(drop=True)
y_test = pd.read_csv(
    "../data/processed/y_test.csv", index_col=0, dtype=float
).squeeze("columns").reset_index(drop=True)

x_train, x_valid, y_train, y_valid = train_test_split(
    x_train, y_train, test_size=0.2, random_state=42)


In [3]:
x_train.head()

Unnamed: 0,circuit_name,deenergize_time,restoration_time,key_communities,hftd_tier,total_affected,residential_affected,zip_code,longitude,latitude,...,zip_is_96035,zip_is_96051,zip_is_96055,zip_is_96059,zip_is_96069,zip_is_96073,zip_is_96076,zip_is_96080,zip_is_96096,zip_is_96137
452,SWIFT2110,2019-10-10 00:05:00,2019-10-11 14:17:00,"SAN JOSE, LIVERMORE",2.0,2232.0,2047.0,95148,-121.796959988177,37.3225680192999,...,0,0,0,0,0,0,0,0,0,0
443,STATION E EUREKA1105,2019-10-09 01:20:00,2019-10-09 23:10:00,EUREKA,0.0,1618.0,1264.0,95501,-124.180313467792,40.7934324220744,...,0,0,0,0,0,0,0,0,0,0
1804,SILVERADO,2021-08-17 18:22:00,2021-08-18 23:55:00,NAPA,3.0,1815.0,1516.0,94574,-122.459110675812,38.4998179385502,...,0,0,0,0,0,0,0,0,0,0
340,OREGON TRAIL1103,2019-10-09 01:39:00,2019-10-11 11:32:00,"REDDING, BELLA VISTA",2.0,1706.0,1599.0,96003,-122.322060242415,40.619045588007,...,0,0,0,0,0,0,0,0,0,0
1390,OREGON TRAIL,2020-10-22 03:23:00,2020-10-23 11:30:00,"PALO CEDRO, REDDING",2.0,952.0,843.0,96003,-122.322060242415,40.619045588007,...,0,0,0,0,0,0,0,0,0,0


In [4]:
zip_cols = x_train.columns[
    [re.search('zip_is', col) is not None for col in x_train.columns]
]


In [5]:
def get_correct_types_x(df, numeric_cols):
    for col in ['deenergize_time', 'restoration_time']:
        df[col] = pd.to_datetime(df[col], format='%Y-%m-%d %H:%M:%S')
    for col in numeric_cols:
        df[col] = df[col].astype(float)
    return df


numeric_cols = [
    'hftd_tier', 'total_affected', 'residential_affected',
    'longitude', 'latitude', 'total_pop', 'median_age', 'median_income',
    'white_pct', 'tmin_d-5', 'tmax_d-5', 'wspd_d-5', 'tmin_d-4', 'tmax_d-4',
    'wspd_d-4', 'tmin_d-3', 'tmax_d-3', 'wspd_d-3', 'tmin_d-2', 'tmax_d-2',
    'wspd_d-2', 'tmin_d-1', 'tmax_d-1', 'wspd_d-1'
]
x_train = get_correct_types_x(x_train, numeric_cols)
x_valid = get_correct_types_x(x_valid, numeric_cols)
x_test = get_correct_types_x(x_test, numeric_cols)
rel_x_train = x_train[numeric_cols]
rel_x_valid = x_valid[numeric_cols]
rel_x_test = x_test[numeric_cols]

scaler = StandardScaler()
scaler.fit(rel_x_train)
scaled_x_train = scaler.transform(rel_x_train)
scaled_x_valid = scaler.transform(rel_x_valid)
scaled_x_test = scaler.transform(rel_x_test)


In [6]:
class base_model(torch.nn.Module):

    def __init__(self, n_hidden_layers, n_hidden_units, activation=torch.nn.ReLU()):
        super(base_model, self).__init__()
        if n_hidden_layers == 0:
            self.linears =torch.nn.ModuleList([
                torch.nn.Linear(scaled_x_train.shape[1], 1)
            ])
            self.activation = activation
        else:
            assert len(n_hidden_units) == n_hidden_layers
            self.layers = []
            
            for layer, n_units in enumerate(n_hidden_units):
                if layer == 0:
                    curr_layer = torch.nn.Linear(scaled_x_train.shape[1], n_units)
                else:
                    curr_layer = torch.nn.Linear(n_hidden_units[layer - 1], n_units)
                self.layers.append(curr_layer)
            self.layers.append(torch.nn.Linear(n_hidden_units[-1], 1))
            self.linears = torch.nn.ModuleList(self.layers)
            self.activation = activation

    def forward(self, x):
        for layer in self.linears:
            x = self.activation(layer(x))
        return x


In [7]:
# x = torch.from_numpy(scaled_x_train).float()
# y = torch.from_numpy(y_train.values.reshape(-1, 1)).float()

# inputs = Variable(x)
# targets = Variable(y)

# # base = base_model(1, [1], activation=torch.nn.Tanh())
# base = base_model(2, [6, 3])
# print(base)
# optimizer = torch.optim.Adagrad(base.parameters(), lr=0.2)
# loss_func = torch.nn.MSELoss()

# for i in range(100000):
#    prediction = base(inputs)
#    loss = loss_func(prediction, targets)
#    if i % 100 == 0:
#       print(loss)
#    optimizer.zero_grad()
#    loss.backward()
#    optimizer.step()


- Used TanH instead of ReLU
- Adagrad instead of SGD -> SGD just returned 0 for all predictions
- More layers -> more overfitting, run simpler networks for more epochs gets better test error

In [8]:
# pd.Series(prediction.detach().numpy().reshape(-1))

In [9]:
# np.sqrt(loss.detach().numpy())

In [10]:
# test_x = Variable(torch.from_numpy(scaled_x_test).float())
# test_y = Variable(torch.from_numpy(y_test.values.reshape(-1, 1)).float())
# test_predictions = base(test_x)
# valid_x = Variable(torch.from_numpy(scaled_x_valid).float())
# valid_y = Variable(torch.from_numpy(y_valid.values.reshape(-1, 1)).float())
# valid_predictions = base(valid_x)

In [11]:
# loss = loss_func(valid_predictions, valid_y)
# print(np.sqrt(loss.detach().numpy()))


In [12]:
# 1. Define an objective function to be maximized.
def objective(trial):

    # 2. Suggest values of the hyperparameters using a trial object.
    n_layers = trial.suggest_int('n_layers', 0, 3)
    n_hidden_units = [0] * n_layers
    print(n_layers)
    for i in range(n_layers):
        n_hidden_units[i] = trial.suggest_int(f"n_h_{i}", 1, 100)
    lr = trial.suggest_float("lr", 1e-5, 5e-1, log=True)
    n_epochs = trial.suggest_int("n_epochs", 1000, 100000)
    print(f"""Params:
          n_layers: {n_layers}
          n_hidden_units: {n_hidden_units}
          lr: {lr}
          n_epochs: {n_epochs}""")
        
    x = torch.from_numpy(scaled_x_train).float()
    y = torch.from_numpy(y_train.values.reshape(-1, 1)).float()

    inputs = Variable(x)
    targets = Variable(y)

    # base = base_model(1, [1], activation=torch.nn.Tanh())
    base = base_model(n_layers, n_hidden_units)
    optimizer = torch.optim.Adagrad(base.parameters(), lr=lr)
    loss_func = torch.nn.MSELoss()

    for i in range(n_epochs):
        prediction = base(inputs)
        loss = loss_func(prediction, targets)
        if i % 1000 == 0:
            print(loss)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        
    valid_x = Variable(torch.from_numpy(scaled_x_valid).float())
    valid_y = Variable(torch.from_numpy(y_valid.values.reshape(-1, 1)).float())
    valid_predictions = base(valid_x)
    loss = loss_func(valid_predictions, valid_y)
    print(f"Final valid loss: {loss}")
    print("#################")
    return np.sqrt(loss.detach().numpy())


In [14]:
# 3. Create a study object and optimize the objective function.
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)
pd.DataFrame.from_dict({"value": study.best_trial.values, "params": str(
    study.best_trial.params)}).to_csv("nn_hpo/run_1.csv", index=False)

fig = optuna.visualization.plot_optimization_history(study)
# fig.show()
fig.write_image("nn_hpo/run_1.png")


[32m[I 2022-11-25 14:37:51,456][0m A new study created in memory with name: no-name-b499b87c-2199-4190-81d6-4d240c6b8871[0m


3
Params:
          n_layers: 3
          n_hidden_units: [75, 85, 75]
          lr: 1.637924578685429e-05
          n_epochs: 81348
tensor(9497003., grad_fn=<MseLossBackward0>)
tensor(9496752., grad_fn=<MseLossBackward0>)
tensor(9496636., grad_fn=<MseLossBackward0>)
tensor(9496543., grad_fn=<MseLossBackward0>)
tensor(9496460., grad_fn=<MseLossBackward0>)
tensor(9496385., grad_fn=<MseLossBackward0>)
tensor(9496315., grad_fn=<MseLossBackward0>)
tensor(9496247., grad_fn=<MseLossBackward0>)
tensor(9496181., grad_fn=<MseLossBackward0>)
tensor(9496118., grad_fn=<MseLossBackward0>)
tensor(9496057., grad_fn=<MseLossBackward0>)
tensor(9495995., grad_fn=<MseLossBackward0>)
tensor(9495934., grad_fn=<MseLossBackward0>)
tensor(9495875., grad_fn=<MseLossBackward0>)
tensor(9495818., grad_fn=<MseLossBackward0>)
tensor(9495760., grad_fn=<MseLossBackward0>)
tensor(9495701., grad_fn=<MseLossBackward0>)
tensor(9495644., grad_fn=<MseLossBackward0>)
tensor(9495586., grad_fn=<MseLossBackward0>)
tensor(94955

[32m[I 2022-11-25 14:39:03,927][0m Trial 0 finished with value: 3056.619873046875 and parameters: {'n_layers': 3, 'n_h_0': 75, 'n_h_1': 85, 'n_h_2': 75, 'lr': 1.637924578685429e-05, 'n_epochs': 81348}. Best is trial 0 with value: 3056.619873046875.[0m


Final valid loss: 9342925.0
#################
0
Params:
          n_layers: 0
          n_hidden_units: []
          lr: 0.03193453545811293
          n_epochs: 97086
tensor(9496054., grad_fn=<MseLossBackward0>)
tensor(9446492., grad_fn=<MseLossBackward0>)
tensor(9425732., grad_fn=<MseLossBackward0>)
tensor(9409883., grad_fn=<MseLossBackward0>)
tensor(9396577., grad_fn=<MseLossBackward0>)
tensor(9384892., grad_fn=<MseLossBackward0>)
tensor(9374359., grad_fn=<MseLossBackward0>)
tensor(9364698., grad_fn=<MseLossBackward0>)
tensor(9355729., grad_fn=<MseLossBackward0>)
tensor(9347324., grad_fn=<MseLossBackward0>)
tensor(9339393., grad_fn=<MseLossBackward0>)
tensor(9331864., grad_fn=<MseLossBackward0>)
tensor(9324683., grad_fn=<MseLossBackward0>)
tensor(9317810., grad_fn=<MseLossBackward0>)
tensor(9311208., grad_fn=<MseLossBackward0>)
tensor(9304850., grad_fn=<MseLossBackward0>)
tensor(9298710., grad_fn=<MseLossBackward0>)
tensor(9292769., grad_fn=<MseLossBackward0>)
tensor(9287009., grad_f

[32m[I 2022-11-25 14:39:11,382][0m Trial 1 finished with value: 2975.86279296875 and parameters: {'n_layers': 0, 'lr': 0.03193453545811293, 'n_epochs': 97086}. Best is trial 1 with value: 2975.86279296875.[0m


tensor(9029086., grad_fn=<MseLossBackward0>)
tensor(9026810., grad_fn=<MseLossBackward0>)
Final valid loss: 8855760.0
#################
1
Params:
          n_layers: 1
          n_hidden_units: [16]
          lr: 0.004665052206171935
          n_epochs: 75289
tensor(9497147., grad_fn=<MseLossBackward0>)
tensor(9444583., grad_fn=<MseLossBackward0>)
tensor(9386754., grad_fn=<MseLossBackward0>)
tensor(9325733., grad_fn=<MseLossBackward0>)
tensor(9265119., grad_fn=<MseLossBackward0>)
tensor(9205007., grad_fn=<MseLossBackward0>)
tensor(9145410., grad_fn=<MseLossBackward0>)
tensor(9086318., grad_fn=<MseLossBackward0>)
tensor(9027710., grad_fn=<MseLossBackward0>)
tensor(8969576., grad_fn=<MseLossBackward0>)
tensor(8911910., grad_fn=<MseLossBackward0>)
tensor(8854711., grad_fn=<MseLossBackward0>)
tensor(8797959., grad_fn=<MseLossBackward0>)
tensor(8741659., grad_fn=<MseLossBackward0>)
tensor(8685791., grad_fn=<MseLossBackward0>)
tensor(8630342., grad_fn=<MseLossBackward0>)
tensor(8575297., gra

[32m[I 2022-11-25 14:39:22,226][0m Trial 2 finished with value: 2417.63134765625 and parameters: {'n_layers': 1, 'n_h_0': 16, 'lr': 0.004665052206171935, 'n_epochs': 75289}. Best is trial 2 with value: 2417.63134765625.[0m


tensor(5997677.5000, grad_fn=<MseLossBackward0>)
Final valid loss: 5844941.5
#################
2
Params:
          n_layers: 2
          n_hidden_units: [3, 54]
          lr: 0.0037085756817440765
          n_epochs: 22655
tensor(9497132., grad_fn=<MseLossBackward0>)
tensor(9319807., grad_fn=<MseLossBackward0>)
tensor(8981219., grad_fn=<MseLossBackward0>)
tensor(8554631., grad_fn=<MseLossBackward0>)
tensor(8081075.5000, grad_fn=<MseLossBackward0>)
tensor(7585001., grad_fn=<MseLossBackward0>)
tensor(7085138.5000, grad_fn=<MseLossBackward0>)
tensor(6590692.5000, grad_fn=<MseLossBackward0>)
tensor(6109843., grad_fn=<MseLossBackward0>)
tensor(5650161.5000, grad_fn=<MseLossBackward0>)
tensor(5225003.5000, grad_fn=<MseLossBackward0>)
tensor(4835146., grad_fn=<MseLossBackward0>)
tensor(4481948., grad_fn=<MseLossBackward0>)
tensor(4167104.7500, grad_fn=<MseLossBackward0>)
tensor(3891951., grad_fn=<MseLossBackward0>)
tensor(3652882., grad_fn=<MseLossBackward0>)
tensor(3446578.5000, grad_fn=<Mse

[32m[I 2022-11-25 14:39:30,557][0m Trial 3 finished with value: 1674.7156982421875 and parameters: {'n_layers': 2, 'n_h_0': 3, 'n_h_1': 54, 'lr': 0.0037085756817440765, 'n_epochs': 22655}. Best is trial 3 with value: 1674.7156982421875.[0m


Final valid loss: 2804672.5
#################
2
Params:
          n_layers: 2
          n_hidden_units: [79, 57]
          lr: 6.021243073181496e-05
          n_epochs: 46471
tensor(9497134., grad_fn=<MseLossBackward0>)
tensor(9496372., grad_fn=<MseLossBackward0>)
tensor(9495890., grad_fn=<MseLossBackward0>)
tensor(9495501., grad_fn=<MseLossBackward0>)
tensor(9495153., grad_fn=<MseLossBackward0>)
tensor(9494830., grad_fn=<MseLossBackward0>)
tensor(9494521., grad_fn=<MseLossBackward0>)
tensor(9494225., grad_fn=<MseLossBackward0>)
tensor(9493933., grad_fn=<MseLossBackward0>)
tensor(9493647., grad_fn=<MseLossBackward0>)
tensor(9493365., grad_fn=<MseLossBackward0>)
tensor(9493084., grad_fn=<MseLossBackward0>)
tensor(9492806., grad_fn=<MseLossBackward0>)
tensor(9492531., grad_fn=<MseLossBackward0>)
tensor(9492256., grad_fn=<MseLossBackward0>)
tensor(9491981., grad_fn=<MseLossBackward0>)
tensor(9491708., grad_fn=<MseLossBackward0>)
tensor(9491435., grad_fn=<MseLossBackward0>)
tensor(9491162.

[32m[I 2022-11-25 14:39:56,559][0m Trial 4 finished with value: 3055.2568359375 and parameters: {'n_layers': 2, 'n_h_0': 79, 'n_h_1': 57, 'lr': 6.021243073181496e-05, 'n_epochs': 46471}. Best is trial 3 with value: 1674.7156982421875.[0m


Final valid loss: 9334594.0
#################
2
Params:
          n_layers: 2
          n_hidden_units: [51, 23]
          lr: 7.038211240965335e-05
          n_epochs: 68086
tensor(9497125., grad_fn=<MseLossBackward0>)
tensor(9496886., grad_fn=<MseLossBackward0>)
tensor(9496547., grad_fn=<MseLossBackward0>)
tensor(9496245., grad_fn=<MseLossBackward0>)
tensor(9495970., grad_fn=<MseLossBackward0>)
tensor(9495709., grad_fn=<MseLossBackward0>)
tensor(9495462., grad_fn=<MseLossBackward0>)
tensor(9495231., grad_fn=<MseLossBackward0>)
tensor(9495009., grad_fn=<MseLossBackward0>)
tensor(9494795., grad_fn=<MseLossBackward0>)
tensor(9494588., grad_fn=<MseLossBackward0>)
tensor(9494385., grad_fn=<MseLossBackward0>)
tensor(9494189., grad_fn=<MseLossBackward0>)
tensor(9493995., grad_fn=<MseLossBackward0>)
tensor(9493804., grad_fn=<MseLossBackward0>)
tensor(9493615., grad_fn=<MseLossBackward0>)
tensor(9493429., grad_fn=<MseLossBackward0>)
tensor(9493246., grad_fn=<MseLossBackward0>)
tensor(9493062.

[32m[I 2022-11-25 14:40:22,157][0m Trial 5 finished with value: 3055.42626953125 and parameters: {'n_layers': 2, 'n_h_0': 51, 'n_h_1': 23, 'lr': 7.038211240965335e-05, 'n_epochs': 68086}. Best is trial 3 with value: 1674.7156982421875.[0m


tensor(9484317., grad_fn=<MseLossBackward0>)
Final valid loss: 9335629.0
#################
1
Params:
          n_layers: 1
          n_hidden_units: [60]
          lr: 0.00018927773057705275
          n_epochs: 15882
tensor(9496569., grad_fn=<MseLossBackward0>)
tensor(9495174., grad_fn=<MseLossBackward0>)
tensor(9494464., grad_fn=<MseLossBackward0>)
tensor(9493867., grad_fn=<MseLossBackward0>)
tensor(9493317., grad_fn=<MseLossBackward0>)
tensor(9492814., grad_fn=<MseLossBackward0>)
tensor(9492348., grad_fn=<MseLossBackward0>)
tensor(9491914., grad_fn=<MseLossBackward0>)
tensor(9491501., grad_fn=<MseLossBackward0>)
tensor(9491105., grad_fn=<MseLossBackward0>)
tensor(9490722., grad_fn=<MseLossBackward0>)
tensor(9490353., grad_fn=<MseLossBackward0>)
tensor(9489993., grad_fn=<MseLossBackward0>)
tensor(9489638., grad_fn=<MseLossBackward0>)
tensor(9489291., grad_fn=<MseLossBackward0>)
tensor(9488948., grad_fn=<MseLossBackward0>)


[32m[I 2022-11-25 14:40:26,629][0m Trial 6 finished with value: 3056.16943359375 and parameters: {'n_layers': 1, 'n_h_0': 60, 'lr': 0.00018927773057705275, 'n_epochs': 15882}. Best is trial 3 with value: 1674.7156982421875.[0m


Final valid loss: 9340171.0
#################
3
Params:
          n_layers: 3
          n_hidden_units: [55, 62, 62]
          lr: 0.006581640845845318
          n_epochs: 63722
tensor(9496508., grad_fn=<MseLossBackward0>)
tensor(1020378., grad_fn=<MseLossBackward0>)
tensor(885837.8750, grad_fn=<MseLossBackward0>)
tensor(820588.2500, grad_fn=<MseLossBackward0>)
tensor(776168.7500, grad_fn=<MseLossBackward0>)
tensor(744366.7500, grad_fn=<MseLossBackward0>)
tensor(719408.9375, grad_fn=<MseLossBackward0>)
tensor(698061.6250, grad_fn=<MseLossBackward0>)
tensor(679130.1875, grad_fn=<MseLossBackward0>)
tensor(661247.1875, grad_fn=<MseLossBackward0>)
tensor(644418.5625, grad_fn=<MseLossBackward0>)
tensor(628135.5000, grad_fn=<MseLossBackward0>)
tensor(612661.8125, grad_fn=<MseLossBackward0>)
tensor(598861.6250, grad_fn=<MseLossBackward0>)
tensor(585950.7500, grad_fn=<MseLossBackward0>)
tensor(574102.1875, grad_fn=<MseLossBackward0>)
tensor(563015.5625, grad_fn=<MseLossBackward0>)
tensor(55211

[32m[I 2022-11-25 14:41:14,970][0m Trial 7 finished with value: 908.2461547851562 and parameters: {'n_layers': 3, 'n_h_0': 55, 'n_h_1': 62, 'n_h_2': 62, 'lr': 0.006581640845845318, 'n_epochs': 63722}. Best is trial 7 with value: 908.2461547851562.[0m


Final valid loss: 824911.0625
#################
2
Params:
          n_layers: 2
          n_hidden_units: [10, 84]
          lr: 1.5264029860954243e-05
          n_epochs: 33208
tensor(9497147., grad_fn=<MseLossBackward0>)
tensor(9497143., grad_fn=<MseLossBackward0>)
tensor(9497137., grad_fn=<MseLossBackward0>)
tensor(9497132., grad_fn=<MseLossBackward0>)
tensor(9497126., grad_fn=<MseLossBackward0>)
tensor(9497119., grad_fn=<MseLossBackward0>)
tensor(9497111., grad_fn=<MseLossBackward0>)
tensor(9497104., grad_fn=<MseLossBackward0>)
tensor(9497095., grad_fn=<MseLossBackward0>)
tensor(9497084., grad_fn=<MseLossBackward0>)
tensor(9497073., grad_fn=<MseLossBackward0>)
tensor(9497060., grad_fn=<MseLossBackward0>)
tensor(9497044., grad_fn=<MseLossBackward0>)
tensor(9497028., grad_fn=<MseLossBackward0>)
tensor(9497011., grad_fn=<MseLossBackward0>)
tensor(9496993., grad_fn=<MseLossBackward0>)
tensor(9496973., grad_fn=<MseLossBackward0>)
tensor(9496953., grad_fn=<MseLossBackward0>)
tensor(94969

[32m[I 2022-11-25 14:41:28,799][0m Trial 8 finished with value: 3057.467529296875 and parameters: {'n_layers': 2, 'n_h_0': 10, 'n_h_1': 84, 'lr': 1.5264029860954243e-05, 'n_epochs': 33208}. Best is trial 7 with value: 908.2461547851562.[0m


tensor(9496662., grad_fn=<MseLossBackward0>)
Final valid loss: 9348108.0
#################
2
Params:
          n_layers: 2
          n_hidden_units: [90, 28]
          lr: 0.05427120975261596
          n_epochs: 67955
tensor(9496990., grad_fn=<MseLossBackward0>)
tensor(721103.1875, grad_fn=<MseLossBackward0>)
tensor(651679.6250, grad_fn=<MseLossBackward0>)
tensor(615077.3750, grad_fn=<MseLossBackward0>)
tensor(547621.6250, grad_fn=<MseLossBackward0>)
tensor(480261.5625, grad_fn=<MseLossBackward0>)
tensor(424269.3125, grad_fn=<MseLossBackward0>)
tensor(367163.0938, grad_fn=<MseLossBackward0>)
tensor(319390.9688, grad_fn=<MseLossBackward0>)
tensor(283337.8750, grad_fn=<MseLossBackward0>)
tensor(254803.1094, grad_fn=<MseLossBackward0>)
tensor(227973.5312, grad_fn=<MseLossBackward0>)
tensor(205236.1562, grad_fn=<MseLossBackward0>)
tensor(188791.6719, grad_fn=<MseLossBackward0>)
tensor(174752., grad_fn=<MseLossBackward0>)
tensor(163428.7344, grad_fn=<MseLossBackward0>)
tensor(153638.7812, g

[32m[I 2022-11-25 14:42:03,337][0m Trial 9 finished with value: 1193.8299560546875 and parameters: {'n_layers': 2, 'n_h_0': 90, 'n_h_1': 28, 'lr': 0.05427120975261596, 'n_epochs': 67955}. Best is trial 7 with value: 908.2461547851562.[0m


Final valid loss: 1425230.0
#################
3
Params:
          n_layers: 3
          n_hidden_units: [33, 3, 22]
          lr: 0.3739739949663906
          n_epochs: 1072
tensor(9496591., grad_fn=<MseLossBackward0>)


[32m[I 2022-11-25 14:42:03,787][0m Trial 10 finished with value: 905.126953125 and parameters: {'n_layers': 3, 'n_h_0': 33, 'n_h_1': 3, 'n_h_2': 22, 'lr': 0.3739739949663906, 'n_epochs': 1072}. Best is trial 10 with value: 905.126953125.[0m


tensor(565486.3125, grad_fn=<MseLossBackward0>)
Final valid loss: 819254.75
#################
3
Params:
          n_layers: 3
          n_hidden_units: [31, 5, 11]
          lr: 0.4972894309648099
          n_epochs: 5738
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)


[32m[I 2022-11-25 14:42:05,987][0m Trial 11 finished with value: 3057.548095703125 and parameters: {'n_layers': 3, 'n_h_0': 31, 'n_h_1': 5, 'n_h_2': 11, 'lr': 0.4972894309648099, 'n_epochs': 5738}. Best is trial 10 with value: 905.126953125.[0m


Final valid loss: 9348601.0
#################
3
Params:
          n_layers: 3
          n_hidden_units: [30, 66, 36]
          lr: 0.22383589460596637
          n_epochs: 48487
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(949714

[32m[I 2022-11-25 14:42:37,362][0m Trial 12 finished with value: 3057.548095703125 and parameters: {'n_layers': 3, 'n_h_0': 30, 'n_h_1': 66, 'n_h_2': 36, 'lr': 0.22383589460596637, 'n_epochs': 48487}. Best is trial 10 with value: 905.126953125.[0m


Final valid loss: 9348601.0
#################
3
Params:
          n_layers: 3
          n_hidden_units: [37, 36, 60]
          lr: 0.018436813724756335
          n_epochs: 55695
tensor(9496651., grad_fn=<MseLossBackward0>)
tensor(751946.1875, grad_fn=<MseLossBackward0>)
tensor(673356., grad_fn=<MseLossBackward0>)
tensor(631153.8750, grad_fn=<MseLossBackward0>)
tensor(603694.5000, grad_fn=<MseLossBackward0>)
tensor(578915.3750, grad_fn=<MseLossBackward0>)
tensor(548667.7500, grad_fn=<MseLossBackward0>)
tensor(509101.4062, grad_fn=<MseLossBackward0>)
tensor(462989.0312, grad_fn=<MseLossBackward0>)
tensor(423797.0938, grad_fn=<MseLossBackward0>)
tensor(385250.6875, grad_fn=<MseLossBackward0>)
tensor(353181.9375, grad_fn=<MseLossBackward0>)
tensor(324254.4062, grad_fn=<MseLossBackward0>)
tensor(302170.9375, grad_fn=<MseLossBackward0>)
tensor(286339.6875, grad_fn=<MseLossBackward0>)
tensor(273803.3438, grad_fn=<MseLossBackward0>)
tensor(262067.4062, grad_fn=<MseLossBackward0>)
tensor(250537

[32m[I 2022-11-25 14:43:13,658][0m Trial 13 finished with value: 1069.722900390625 and parameters: {'n_layers': 3, 'n_h_0': 37, 'n_h_1': 36, 'n_h_2': 60, 'lr': 0.018436813724756335, 'n_epochs': 55695}. Best is trial 10 with value: 905.126953125.[0m


Final valid loss: 1144307.0
#################
3
Params:
          n_layers: 3
          n_hidden_units: [60, 1, 33]
          lr: 0.0007205399924720446
          n_epochs: 33812
tensor(9494927., grad_fn=<MseLossBackward0>)
tensor(9485486., grad_fn=<MseLossBackward0>)
tensor(9472533., grad_fn=<MseLossBackward0>)
tensor(9455856., grad_fn=<MseLossBackward0>)
tensor(9435522., grad_fn=<MseLossBackward0>)
tensor(9411632., grad_fn=<MseLossBackward0>)
tensor(9384274., grad_fn=<MseLossBackward0>)
tensor(9353431., grad_fn=<MseLossBackward0>)
tensor(9319039., grad_fn=<MseLossBackward0>)
tensor(9281191., grad_fn=<MseLossBackward0>)
tensor(9240011., grad_fn=<MseLossBackward0>)
tensor(9195618., grad_fn=<MseLossBackward0>)
tensor(9148114., grad_fn=<MseLossBackward0>)
tensor(9097604., grad_fn=<MseLossBackward0>)
tensor(9044191., grad_fn=<MseLossBackward0>)
tensor(8987971., grad_fn=<MseLossBackward0>)
tensor(8929060., grad_fn=<MseLossBackward0>)
tensor(8867567., grad_fn=<MseLossBackward0>)
tensor(88035

[32m[I 2022-11-25 14:43:30,429][0m Trial 14 finished with value: 2713.61181640625 and parameters: {'n_layers': 3, 'n_h_0': 60, 'n_h_1': 1, 'n_h_2': 33, 'lr': 0.0007205399924720446, 'n_epochs': 33812}. Best is trial 10 with value: 905.126953125.[0m


Final valid loss: 7363689.0
#################
0
Params:
          n_layers: 0
          n_hidden_units: []
          lr: 0.10556182540566317
          n_epochs: 89761
tensor(9495638., grad_fn=<MseLossBackward0>)
tensor(9326009., grad_fn=<MseLossBackward0>)
tensor(9256459., grad_fn=<MseLossBackward0>)
tensor(9203862., grad_fn=<MseLossBackward0>)
tensor(9160037., grad_fn=<MseLossBackward0>)
tensor(9121816., grad_fn=<MseLossBackward0>)
tensor(9087573., grad_fn=<MseLossBackward0>)
tensor(9056342., grad_fn=<MseLossBackward0>)
tensor(9027497., grad_fn=<MseLossBackward0>)
tensor(9000599., grad_fn=<MseLossBackward0>)
tensor(8975330., grad_fn=<MseLossBackward0>)
tensor(8951451., grad_fn=<MseLossBackward0>)
tensor(8928775., grad_fn=<MseLossBackward0>)
tensor(8907156., grad_fn=<MseLossBackward0>)
tensor(8886470., grad_fn=<MseLossBackward0>)
tensor(8866620., grad_fn=<MseLossBackward0>)
tensor(8847521., grad_fn=<MseLossBackward0>)
tensor(8829102., grad_fn=<MseLossBackward0>)
tensor(8811306., grad_f

[32m[I 2022-11-25 14:43:37,644][0m Trial 15 finished with value: 2814.10498046875 and parameters: {'n_layers': 0, 'lr': 0.10556182540566317, 'n_epochs': 89761}. Best is trial 10 with value: 905.126953125.[0m


Final valid loss: 7919187.0
#################
3
Params:
          n_layers: 3
          n_hidden_units: [44, 99, 99]
          lr: 0.0009991450988346797
          n_epochs: 5537
tensor(9496680., grad_fn=<MseLossBackward0>)
tensor(7977422.5000, grad_fn=<MseLossBackward0>)
tensor(5146032., grad_fn=<MseLossBackward0>)
tensor(3135366.7500, grad_fn=<MseLossBackward0>)
tensor(2262713.7500, grad_fn=<MseLossBackward0>)
tensor(1910363.1250, grad_fn=<MseLossBackward0>)


[32m[I 2022-11-25 14:43:42,636][0m Trial 16 finished with value: 1426.623046875 and parameters: {'n_layers': 3, 'n_h_0': 44, 'n_h_1': 99, 'n_h_2': 99, 'lr': 0.0009991450988346797, 'n_epochs': 5537}. Best is trial 10 with value: 905.126953125.[0m


Final valid loss: 2035253.375
#################
1
Params:
          n_layers: 1
          n_hidden_units: [21]
          lr: 0.010241839671891299
          n_epochs: 58430
tensor(9495751., grad_fn=<MseLossBackward0>)
tensor(9125443., grad_fn=<MseLossBackward0>)
tensor(8762312., grad_fn=<MseLossBackward0>)
tensor(8416325., grad_fn=<MseLossBackward0>)
tensor(8086725.5000, grad_fn=<MseLossBackward0>)
tensor(7773101.5000, grad_fn=<MseLossBackward0>)
tensor(7474946.5000, grad_fn=<MseLossBackward0>)
tensor(7191674.5000, grad_fn=<MseLossBackward0>)
tensor(6922742., grad_fn=<MseLossBackward0>)
tensor(6667507., grad_fn=<MseLossBackward0>)
tensor(6425351.5000, grad_fn=<MseLossBackward0>)
tensor(6195697., grad_fn=<MseLossBackward0>)
tensor(5977945., grad_fn=<MseLossBackward0>)
tensor(5771539.5000, grad_fn=<MseLossBackward0>)
tensor(5575950.5000, grad_fn=<MseLossBackward0>)
tensor(5390628.5000, grad_fn=<MseLossBackward0>)
tensor(5215158.5000, grad_fn=<MseLossBackward0>)
tensor(5049065.5000, grad_f

[32m[I 2022-11-25 14:43:52,472][0m Trial 17 finished with value: 1591.6282958984375 and parameters: {'n_layers': 1, 'n_h_0': 21, 'lr': 0.010241839671891299, 'n_epochs': 58430}. Best is trial 10 with value: 905.126953125.[0m


tensor(2336030.7500, grad_fn=<MseLossBackward0>)
Final valid loss: 2533280.5
#################
3
Params:
          n_layers: 3
          n_hidden_units: [60, 40, 7]
          lr: 0.48947991033166127
          n_epochs: 23584
tensor(9495950., grad_fn=<MseLossBackward0>)
tensor(536932.7500, grad_fn=<MseLossBackward0>)
tensor(399711.6250, grad_fn=<MseLossBackward0>)
tensor(316271.5312, grad_fn=<MseLossBackward0>)
tensor(257168.5938, grad_fn=<MseLossBackward0>)
tensor(205870.2969, grad_fn=<MseLossBackward0>)
tensor(171267.1875, grad_fn=<MseLossBackward0>)
tensor(160489.0156, grad_fn=<MseLossBackward0>)
tensor(131510.7344, grad_fn=<MseLossBackward0>)
tensor(115400.0547, grad_fn=<MseLossBackward0>)
tensor(104529.3594, grad_fn=<MseLossBackward0>)
tensor(92541.9375, grad_fn=<MseLossBackward0>)
tensor(90239.1094, grad_fn=<MseLossBackward0>)
tensor(92923.5312, grad_fn=<MseLossBackward0>)
tensor(81395.1172, grad_fn=<MseLossBackward0>)
tensor(79000.8594, grad_fn=<MseLossBackward0>)
tensor(71310.79

[32m[I 2022-11-25 14:44:06,266][0m Trial 18 finished with value: 1208.7030029296875 and parameters: {'n_layers': 3, 'n_h_0': 60, 'n_h_1': 40, 'n_h_2': 7, 'lr': 0.48947991033166127, 'n_epochs': 23584}. Best is trial 10 with value: 905.126953125.[0m


Final valid loss: 1460963.0
#################
2
Params:
          n_layers: 2
          n_hidden_units: [49, 72]
          lr: 0.0008116970767319741
          n_epochs: 36619
tensor(9496134., grad_fn=<MseLossBackward0>)
tensor(9437412., grad_fn=<MseLossBackward0>)
tensor(9349835., grad_fn=<MseLossBackward0>)
tensor(9241966., grad_fn=<MseLossBackward0>)
tensor(9118231., grad_fn=<MseLossBackward0>)
tensor(8981553., grad_fn=<MseLossBackward0>)
tensor(8834094., grad_fn=<MseLossBackward0>)
tensor(8677694., grad_fn=<MseLossBackward0>)
tensor(8513885., grad_fn=<MseLossBackward0>)
tensor(8344116., grad_fn=<MseLossBackward0>)
tensor(8169557., grad_fn=<MseLossBackward0>)
tensor(7991293.5000, grad_fn=<MseLossBackward0>)
tensor(7810351.5000, grad_fn=<MseLossBackward0>)
tensor(7627633.5000, grad_fn=<MseLossBackward0>)
tensor(7443964.5000, grad_fn=<MseLossBackward0>)
tensor(7260119., grad_fn=<MseLossBackward0>)
tensor(7076811., grad_fn=<MseLossBackward0>)
tensor(6894650., grad_fn=<MseLossBackward0>)

[32m[I 2022-11-25 14:44:27,776][0m Trial 19 finished with value: 1997.413330078125 and parameters: {'n_layers': 2, 'n_h_0': 49, 'n_h_1': 72, 'lr': 0.0008116970767319741, 'n_epochs': 36619}. Best is trial 10 with value: 905.126953125.[0m


Final valid loss: 3989660.25
#################
3
Params:
          n_layers: 3
          n_hidden_units: [72, 16, 54]
          lr: 0.07447066715529675
          n_epochs: 61864
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(94971

[32m[I 2022-11-25 14:45:03,703][0m Trial 20 finished with value: 3057.548095703125 and parameters: {'n_layers': 3, 'n_h_0': 72, 'n_h_1': 16, 'n_h_2': 54, 'lr': 0.07447066715529675, 'n_epochs': 61864}. Best is trial 10 with value: 905.126953125.[0m


Final valid loss: 9348601.0
#################
3
Params:
          n_layers: 3
          n_hidden_units: [36, 39, 63]
          lr: 0.015514618594816615
          n_epochs: 58057
tensor(9496923., grad_fn=<MseLossBackward0>)
tensor(792114.6250, grad_fn=<MseLossBackward0>)
tensor(718880.5625, grad_fn=<MseLossBackward0>)
tensor(683376., grad_fn=<MseLossBackward0>)
tensor(655851.7500, grad_fn=<MseLossBackward0>)
tensor(628703.7500, grad_fn=<MseLossBackward0>)
tensor(592357.8750, grad_fn=<MseLossBackward0>)
tensor(557649.5000, grad_fn=<MseLossBackward0>)
tensor(521993.7812, grad_fn=<MseLossBackward0>)
tensor(488734.0625, grad_fn=<MseLossBackward0>)
tensor(459335.4375, grad_fn=<MseLossBackward0>)
tensor(432830.1562, grad_fn=<MseLossBackward0>)
tensor(410095.2500, grad_fn=<MseLossBackward0>)
tensor(389537.3750, grad_fn=<MseLossBackward0>)
tensor(370808.6562, grad_fn=<MseLossBackward0>)
tensor(352800.4688, grad_fn=<MseLossBackward0>)
tensor(335893.5938, grad_fn=<MseLossBackward0>)
tensor(319521

[32m[I 2022-11-25 14:45:42,826][0m Trial 21 finished with value: 1014.9299926757812 and parameters: {'n_layers': 3, 'n_h_0': 36, 'n_h_1': 39, 'n_h_2': 63, 'lr': 0.015514618594816615, 'n_epochs': 58057}. Best is trial 10 with value: 905.126953125.[0m


tensor(143820.5000, grad_fn=<MseLossBackward0>)
Final valid loss: 1030082.9375
#################
3
Params:
          n_layers: 3
          n_hidden_units: [24, 43, 74]
          lr: 0.010775276784238239
          n_epochs: 45392
tensor(9497143., grad_fn=<MseLossBackward0>)
tensor(925930.1875, grad_fn=<MseLossBackward0>)
tensor(812126.1250, grad_fn=<MseLossBackward0>)
tensor(762751.6875, grad_fn=<MseLossBackward0>)
tensor(729000.3125, grad_fn=<MseLossBackward0>)
tensor(702865.1875, grad_fn=<MseLossBackward0>)
tensor(681068.6250, grad_fn=<MseLossBackward0>)
tensor(666342.0625, grad_fn=<MseLossBackward0>)
tensor(653377.7500, grad_fn=<MseLossBackward0>)
tensor(641374.4375, grad_fn=<MseLossBackward0>)
tensor(631179.3125, grad_fn=<MseLossBackward0>)
tensor(622816.5000, grad_fn=<MseLossBackward0>)
tensor(615635.7500, grad_fn=<MseLossBackward0>)
tensor(608658.1875, grad_fn=<MseLossBackward0>)
tensor(602365.9375, grad_fn=<MseLossBackward0>)
tensor(596465.1875, grad_fn=<MseLossBackward0>)
tensor

[32m[I 2022-11-25 14:46:11,892][0m Trial 22 finished with value: 933.3225708007812 and parameters: {'n_layers': 3, 'n_h_0': 24, 'n_h_1': 43, 'n_h_2': 74, 'lr': 0.010775276784238239, 'n_epochs': 45392}. Best is trial 10 with value: 905.126953125.[0m


Final valid loss: 871091.0625
#################
3
Params:
          n_layers: 3
          n_hidden_units: [22, 64, 86]
          lr: 0.006016377752319457
          n_epochs: 83140
tensor(9497017., grad_fn=<MseLossBackward0>)
tensor(1171613.5000, grad_fn=<MseLossBackward0>)
tensor(1000120.0625, grad_fn=<MseLossBackward0>)
tensor(923314.9375, grad_fn=<MseLossBackward0>)
tensor(875468., grad_fn=<MseLossBackward0>)
tensor(839266.5000, grad_fn=<MseLossBackward0>)
tensor(810418.8125, grad_fn=<MseLossBackward0>)
tensor(788705.1250, grad_fn=<MseLossBackward0>)
tensor(772341.3750, grad_fn=<MseLossBackward0>)
tensor(758108.3125, grad_fn=<MseLossBackward0>)
tensor(743727.0625, grad_fn=<MseLossBackward0>)
tensor(730416.2500, grad_fn=<MseLossBackward0>)
tensor(718577.6875, grad_fn=<MseLossBackward0>)
tensor(707724.8125, grad_fn=<MseLossBackward0>)
tensor(697632.2500, grad_fn=<MseLossBackward0>)
tensor(688632.8125, grad_fn=<MseLossBackward0>)
tensor(680260.8125, grad_fn=<MseLossBackward0>)
tensor(67

[32m[I 2022-11-25 14:47:05,590][0m Trial 23 finished with value: 871.9281616210938 and parameters: {'n_layers': 3, 'n_h_0': 22, 'n_h_1': 64, 'n_h_2': 86, 'lr': 0.006016377752319457, 'n_epochs': 83140}. Best is trial 23 with value: 871.9281616210938.[0m


tensor(348284.6875, grad_fn=<MseLossBackward0>)
Final valid loss: 760258.6875
#################
2
Params:
          n_layers: 2
          n_hidden_units: [42, 66]
          lr: 0.0027540249910313072
          n_epochs: 82047
tensor(9497126., grad_fn=<MseLossBackward0>)
tensor(8272772., grad_fn=<MseLossBackward0>)
tensor(6479374.5000, grad_fn=<MseLossBackward0>)
tensor(4908023., grad_fn=<MseLossBackward0>)
tensor(3739771.5000, grad_fn=<MseLossBackward0>)
tensor(2944042.2500, grad_fn=<MseLossBackward0>)
tensor(2437111.7500, grad_fn=<MseLossBackward0>)
tensor(2121111.7500, grad_fn=<MseLossBackward0>)
tensor(1919596.2500, grad_fn=<MseLossBackward0>)
tensor(1785034.7500, grad_fn=<MseLossBackward0>)
tensor(1689094.3750, grad_fn=<MseLossBackward0>)
tensor(1617001.5000, grad_fn=<MseLossBackward0>)
tensor(1560425.7500, grad_fn=<MseLossBackward0>)
tensor(1513443., grad_fn=<MseLossBackward0>)
tensor(1472967.2500, grad_fn=<MseLossBackward0>)
tensor(1437644.2500, grad_fn=<MseLossBackward0>)
tensor(

[32m[I 2022-11-25 14:47:46,296][0m Trial 24 finished with value: 999.8745727539062 and parameters: {'n_layers': 2, 'n_h_0': 42, 'n_h_1': 66, 'lr': 0.0027540249910313072, 'n_epochs': 82047}. Best is trial 23 with value: 871.9281616210938.[0m


tensor(878657.1250, grad_fn=<MseLossBackward0>)
Final valid loss: 999749.125
#################
3
Params:
          n_layers: 3
          n_hidden_units: [15, 76, 100]
          lr: 0.0003171445838498449
          n_epochs: 99502
tensor(9497111., grad_fn=<MseLossBackward0>)
tensor(9487158., grad_fn=<MseLossBackward0>)
tensor(9472587., grad_fn=<MseLossBackward0>)
tensor(9451269., grad_fn=<MseLossBackward0>)
tensor(9422735., grad_fn=<MseLossBackward0>)
tensor(9386789., grad_fn=<MseLossBackward0>)
tensor(9343450., grad_fn=<MseLossBackward0>)
tensor(9292766., grad_fn=<MseLossBackward0>)
tensor(9235120., grad_fn=<MseLossBackward0>)
tensor(9170699., grad_fn=<MseLossBackward0>)
tensor(9099786., grad_fn=<MseLossBackward0>)
tensor(9022599., grad_fn=<MseLossBackward0>)
tensor(8939342., grad_fn=<MseLossBackward0>)
tensor(8850464., grad_fn=<MseLossBackward0>)
tensor(8756260., grad_fn=<MseLossBackward0>)
tensor(8657042., grad_fn=<MseLossBackward0>)
tensor(8553176., grad_fn=<MseLossBackward0>)
tensor

[32m[I 2022-11-25 14:48:59,420][0m Trial 25 finished with value: 1588.843017578125 and parameters: {'n_layers': 3, 'n_h_0': 15, 'n_h_1': 76, 'n_h_2': 100, 'lr': 0.0003171445838498449, 'n_epochs': 99502}. Best is trial 23 with value: 871.9281616210938.[0m


Final valid loss: 2524422.0
#################
2
Params:
          n_layers: 2
          n_hidden_units: [1, 52]
          lr: 0.11194790219030716
          n_epochs: 71526
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., grad_fn=<MseLossBackward0>)
tensor(9497148., g

[32m[I 2022-11-25 14:49:22,822][0m Trial 26 finished with value: 3057.548095703125 and parameters: {'n_layers': 2, 'n_h_0': 1, 'n_h_1': 52, 'lr': 0.11194790219030716, 'n_epochs': 71526}. Best is trial 23 with value: 871.9281616210938.[0m


tensor(9497148., grad_fn=<MseLossBackward0>)
Final valid loss: 9348601.0
#################
3
Params:
          n_layers: 3
          n_hidden_units: [23, 61, 34]
          lr: 0.00586695277364326
          n_epochs: 88143
tensor(9496862., grad_fn=<MseLossBackward0>)
tensor(1358856.3750, grad_fn=<MseLossBackward0>)
tensor(1091305.7500, grad_fn=<MseLossBackward0>)
tensor(994731.7500, grad_fn=<MseLossBackward0>)
tensor(940896., grad_fn=<MseLossBackward0>)
tensor(902833.3125, grad_fn=<MseLossBackward0>)
tensor(873032.3750, grad_fn=<MseLossBackward0>)
tensor(850200.6875, grad_fn=<MseLossBackward0>)
tensor(831840.3125, grad_fn=<MseLossBackward0>)
tensor(816738.9375, grad_fn=<MseLossBackward0>)
tensor(804435.4375, grad_fn=<MseLossBackward0>)
tensor(793761.8125, grad_fn=<MseLossBackward0>)
tensor(784314.8750, grad_fn=<MseLossBackward0>)
tensor(775704.8125, grad_fn=<MseLossBackward0>)
tensor(767322.3125, grad_fn=<MseLossBackward0>)
tensor(759490.7500, grad_fn=<MseLossBackward0>)
tensor(752236.3

[32m[I 2022-11-25 14:50:17,766][0m Trial 27 finished with value: 889.8110961914062 and parameters: {'n_layers': 3, 'n_h_0': 23, 'n_h_1': 61, 'n_h_2': 34, 'lr': 0.00586695277364326, 'n_epochs': 88143}. Best is trial 23 with value: 871.9281616210938.[0m


tensor(581514.4375, grad_fn=<MseLossBackward0>)
Final valid loss: 791763.75
#################
1
Params:
          n_layers: 1
          n_hidden_units: [26]
          lr: 0.0017864622819866562
          n_epochs: 90657
tensor(9496664., grad_fn=<MseLossBackward0>)
tensor(9481816., grad_fn=<MseLossBackward0>)
tensor(9469656., grad_fn=<MseLossBackward0>)
tensor(9457017., grad_fn=<MseLossBackward0>)
tensor(9443874., grad_fn=<MseLossBackward0>)
tensor(9430663., grad_fn=<MseLossBackward0>)
tensor(9417455., grad_fn=<MseLossBackward0>)
tensor(9404260., grad_fn=<MseLossBackward0>)
tensor(9391080., grad_fn=<MseLossBackward0>)
tensor(9377916., grad_fn=<MseLossBackward0>)
tensor(9364779., grad_fn=<MseLossBackward0>)
tensor(9351667., grad_fn=<MseLossBackward0>)
tensor(9338570., grad_fn=<MseLossBackward0>)
tensor(9325498., grad_fn=<MseLossBackward0>)
tensor(9312448., grad_fn=<MseLossBackward0>)
tensor(9299418., grad_fn=<MseLossBackward0>)
tensor(9286407., grad_fn=<MseLossBackward0>)
tensor(9273422.,

[32m[I 2022-11-25 14:50:34,158][0m Trial 28 finished with value: 2861.944580078125 and parameters: {'n_layers': 1, 'n_h_0': 26, 'lr': 0.0017864622819866562, 'n_epochs': 90657}. Best is trial 23 with value: 871.9281616210938.[0m


Final valid loss: 8190726.5
#################
3
Params:
          n_layers: 3
          n_hidden_units: [17, 47, 26]
          lr: 0.037183220524202844
          n_epochs: 81475
tensor(9497147., grad_fn=<MseLossBackward0>)
tensor(704139.6875, grad_fn=<MseLossBackward0>)
tensor(630032.6875, grad_fn=<MseLossBackward0>)
tensor(557363.2500, grad_fn=<MseLossBackward0>)
tensor(460855.5000, grad_fn=<MseLossBackward0>)
tensor(397785.8750, grad_fn=<MseLossBackward0>)
tensor(367869.2188, grad_fn=<MseLossBackward0>)
tensor(347284.3750, grad_fn=<MseLossBackward0>)
tensor(333226.2812, grad_fn=<MseLossBackward0>)
tensor(320805.0938, grad_fn=<MseLossBackward0>)
tensor(311670.6875, grad_fn=<MseLossBackward0>)
tensor(304426.3438, grad_fn=<MseLossBackward0>)
tensor(299748.8125, grad_fn=<MseLossBackward0>)
tensor(295938.1250, grad_fn=<MseLossBackward0>)
tensor(292913.2500, grad_fn=<MseLossBackward0>)
tensor(290486., grad_fn=<MseLossBackward0>)
tensor(288549.6875, grad_fn=<MseLossBackward0>)
tensor(286286

In [16]:
study.best_trial

FrozenTrial(number=5, values=[894.4140014648438], datetime_start=datetime.datetime(2022, 11, 25, 14, 25, 29, 6298), datetime_complete=datetime.datetime(2022, 11, 25, 14, 25, 31, 569139), params={'n_layers': 1, 'n_h_0': 55, 'lr': 0.4052954218938756, 'n_epochs': 9100}, distributions={'n_layers': IntDistribution(high=5, log=False, low=0, step=1), 'n_h_0': IntDistribution(high=100, log=False, low=1, step=1), 'lr': FloatDistribution(high=0.5, log=False, low=0.001, step=None), 'n_epochs': IntDistribution(high=10000, log=False, low=1000, step=1)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=5, state=TrialState.COMPLETE, value=None)