In [18]:
import torch
from torch_geometric.loader import DataLoader
from pathlib import Path
from gnn_example.graphdataset import GraphDataset
import numpy as np
import torch.nn.functional as F
from torch_geometric.nn.models import EdgeCNN
from bayes_opt import BayesianOptimization
import bayes_opt.acquisition as acq
import json
import os

In [19]:
DATA_DIR = Path("gnn_example") / "data"
TRAIN_PARQUET_FILE = DATA_DIR / "train_data.parquet"
VAL_PARQUET_FILE = DATA_DIR /  "val_data.parquet"
TEST_PARQUET_FILE = DATA_DIR /  "test_sequences.parquet"

In [20]:
def loss_fn(output, target):
    clipped_target = torch.clip(target, min=0, max=1)
    mses = F.mse_loss(output, clipped_target, reduction='mean')
    return mses

def mae_fn(output, target):
    clipped_target = torch.clip(target, min=0, max=1)
    maes = F.l1_loss(output, clipped_target, reduction='mean')
    return maes

In [21]:
def black_box_func(EDGE_DISTANCE, N_DATA, LR, WEIGHT_DECAY, DROPOUT_RATE, ALLOW_LOOPS, save_loc:Path=None) -> float:
    EDGE_DISTANCE = int(EDGE_DISTANCE)
    N_DATA = int(N_DATA)
    ALLOW_LOOPS = bool(ALLOW_LOOPS)
    
    train_dataset = GraphDataset(TRAIN_PARQUET_FILE, edge_distance=EDGE_DISTANCE, allow_loops=ALLOW_LOOPS)
    val_dataset = GraphDataset(VAL_PARQUET_FILE, edge_distance=EDGE_DISTANCE, allow_loops=ALLOW_LOOPS)

    train_dataloader = DataLoader(train_dataset[:N_DATA], batch_size=128, shuffle=True)
    val_dataloader = DataLoader(val_dataset[:2048], batch_size=128, shuffle=False)

    model = EdgeCNN(in_channels=train_dataset.num_features, 
                    hidden_channels=128,
                    num_layers=4, 
                    out_channels=1,
                    dropout=DROPOUT_RATE)
    
    if save_loc is not None:
        torch.save(model.state_dict(), save_loc / "temp.pt" )
    
    optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    
    N_EPOCHS = 10

    for epoch in range(N_EPOCHS):
        train_losses = []
        train_maes = []
        model.train()
        for batch in train_dataloader:
            optimizer.zero_grad()
            out = model(batch.x, batch.edge_index)
            out = torch.squeeze(out)
            loss = loss_fn(out, batch.y)
            mae = mae_fn(out, batch.y)
            loss.backward()
            train_losses.append(loss.detach().cpu().numpy())
            train_maes.append(mae.detach().cpu().numpy())
            optimizer.step()
        
        val_losses = []
        val_maes = []
        model.eval()
        for batch in val_dataloader:
            optimizer.zero_grad()
            out = model(batch.x, batch.edge_index)
            out = torch.squeeze(out)
            loss = loss_fn(out, batch.y)
            mae = mae_fn(out, batch.y)
            val_losses.append(loss.detach().cpu().numpy())
            val_maes.append(mae.detach().cpu().numpy())
        
        print(f"Epoch {epoch+1}/{N_EPOCHS} | MSE: {np.mean(train_losses):.3f} | MAE: {np.mean(train_maes):.3f} | Val MSE: {np.mean(val_losses):.3f} | Val MAE: {np.mean(val_maes):.3f}")
        
        if save_loc is not None:
            print("Saving model from epoch ", epoch)
            torch.save(model.state_dict(), save_loc / f"model_epoch_{epoch}.pt")
    

    return -np.min(val_maes) # want to minimize the MAE

In [22]:
pbounds = {"EDGE_DISTANCE": (1, 7, int),
           "N_DATA": (5000, 20000, int),
           "LR": (1e-5, 1e-2),
           "WEIGHT_DECAY": (1e-5, 1e-2),
           "DROPOUT_RATE": (0.0, 0.8),
           "ALLOW_LOOPS": (0, 1, int)  # This will be handled in the dataset initialization
           }

optimizer = BayesianOptimization(
    f=black_box_func,
    pbounds=pbounds,
    verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
    random_state=1,
    acquisition_function=acq.GPHedge([
        acq.UpperConfidenceBound(kappa=2.576, random_state=1), 
        acq.ExpectedImprovement(xi=0.05, random_state=1),
        ]),
)

  optimizer = BayesianOptimization(


In [23]:
TOT_ITERATIONS = 25
N_INIT = 4
output_dir = Path("gnn_example/outputs/02")

if not (output_dir/"results.json").exists():
    optimizer.maximize(
        init_points=N_INIT,
        n_iter=TOT_ITERATIONS - N_INIT,
    )

    with open(output_dir/"results.json", "a+") as f:
        json.dump(optimizer.res, f, indent=4)

    optimizer.save_state(output_dir/"optimizer_state.json")

else:
    optimizer.load_state(output_dir/"optimizer_state.json")

| [39m1        [39m | [39m-0.276353[39m | [39m6        [39m | [39m5235     [39m | [39m0.0072060[39m | [39m-.114e-05[39m | [39m0.2418660[39m | [39m1        [39m |
| [35m2        [39m | [35m-0.275258[39m | [35m1        [39m | [35m5144     [39m | [35m0.0039718[39m | [35m0.0038852[39m | [35m0.5357968[39m | [35m1        [39m |
| [35m3        [39m | [35m-0.239257[39m | [35m3        [39m | [35m10396    [39m | [35m0.0041977[39m | [35m0.0068553[39m | [35m0.1635617[39m | [35m0        [39m |
| [35m4        [39m | [35m-0.193271[39m | [35m5        [39m | [35m13093    [39m | [35m0.0053487[39m | [35m0.0091404[39m | [35m0.3657638[39m | [35m1        [39m |
| [39m5        [39m | [39m-0.193436[39m | [39m4        [39m | [39m16443    [39m | [39m0.0006108[39m | [39m0.0071743[39m | [39m0.5956617[39m | [39m1        [39m |
| [39m6        [39m | [39m-0.232513[39m | [39m1        [39m | [39m20000    [39m | [39m0.0030558[39m 

In [30]:
# save model from iteration 4
if not os.listdir(output_dir/"Iteration_4"): # check if the directory is empty
    key_dict = optimizer.res[3]['params']
    black_box_func(**key_dict, save_loc=output_dir/"Iteration_4")

Epoch 1/10 | MSE: 0.106 | MAE: 0.271 | Val MSE: 0.105 | Val MAE: 0.252
Saving model from epoch  0
Epoch 2/10 | MSE: 0.106 | MAE: 0.269 | Val MSE: 0.105 | Val MAE: 0.252
Saving model from epoch  1
Epoch 3/10 | MSE: 0.106 | MAE: 0.269 | Val MSE: 0.105 | Val MAE: 0.253
Saving model from epoch  2
Epoch 4/10 | MSE: 0.106 | MAE: 0.269 | Val MSE: 0.104 | Val MAE: 0.255
Saving model from epoch  3
Epoch 5/10 | MSE: 0.106 | MAE: 0.269 | Val MSE: 0.105 | Val MAE: 0.253
Saving model from epoch  4
Epoch 6/10 | MSE: 0.106 | MAE: 0.269 | Val MSE: 0.105 | Val MAE: 0.252
Saving model from epoch  5
Epoch 7/10 | MSE: 0.106 | MAE: 0.269 | Val MSE: 0.105 | Val MAE: 0.252
Saving model from epoch  6
Epoch 8/10 | MSE: 0.106 | MAE: 0.269 | Val MSE: 0.104 | Val MAE: 0.254
Saving model from epoch  7
Epoch 9/10 | MSE: 0.106 | MAE: 0.269 | Val MSE: 0.105 | Val MAE: 0.250
Saving model from epoch  8
Epoch 10/10 | MSE: 0.106 | MAE: 0.269 | Val MSE: 0.104 | Val MAE: 0.254
Saving model from epoch  9


In [28]:
# save model from iteration 4
if not os.listdir(output_dir/"Iteration_4b"): # check if the directory is empty
    key_dict = optimizer.res[3]['params']
    black_box_func(**key_dict, save_loc=output_dir/"Iteration_4b")

Epoch 1/10 | MSE: 1.083 | MAE: 0.978 | Val MSE: 0.671 | Val MAE: 0.753
Saving model from epoch  0
Epoch 2/10 | MSE: 0.413 | MAE: 0.544 | Val MSE: 0.260 | Val MAE: 0.395
Saving model from epoch  1
Epoch 3/10 | MSE: 0.177 | MAE: 0.285 | Val MSE: 0.137 | Val MAE: 0.238
Saving model from epoch  2
Epoch 4/10 | MSE: 0.118 | MAE: 0.252 | Val MSE: 0.110 | Val MAE: 0.239
Saving model from epoch  3
Epoch 5/10 | MSE: 0.107 | MAE: 0.261 | Val MSE: 0.106 | Val MAE: 0.248
Saving model from epoch  4
Epoch 6/10 | MSE: 0.106 | MAE: 0.267 | Val MSE: 0.105 | Val MAE: 0.252
Saving model from epoch  5
Epoch 7/10 | MSE: 0.106 | MAE: 0.269 | Val MSE: 0.105 | Val MAE: 0.253
Saving model from epoch  6
Epoch 8/10 | MSE: 0.106 | MAE: 0.269 | Val MSE: 0.105 | Val MAE: 0.253
Saving model from epoch  7
Epoch 9/10 | MSE: 0.106 | MAE: 0.269 | Val MSE: 0.105 | Val MAE: 0.253
Saving model from epoch  8
Epoch 10/10 | MSE: 0.106 | MAE: 0.269 | Val MSE: 0.105 | Val MAE: 0.253
Saving model from epoch  9


In [25]:
# save model from iteration 5
if not os.listdir(output_dir/"Iteration_5"): # check if the directory is empty
    key_dict = optimizer.res[4]['params']
    black_box_func(**key_dict, save_loc=output_dir/"Iteration_5")

Epoch 1/10 | MSE: 0.268 | MAE: 0.473 | Val MSE: 0.223 | Val MAE: 0.442
Saving model from epoch  0
Epoch 2/10 | MSE: 0.214 | MAE: 0.424 | Val MSE: 0.180 | Val MAE: 0.398
Saving model from epoch  1
Epoch 3/10 | MSE: 0.175 | MAE: 0.385 | Val MSE: 0.151 | Val MAE: 0.362
Saving model from epoch  2
Epoch 4/10 | MSE: 0.149 | MAE: 0.354 | Val MSE: 0.131 | Val MAE: 0.332
Saving model from epoch  3
Epoch 5/10 | MSE: 0.131 | MAE: 0.329 | Val MSE: 0.118 | Val MAE: 0.309
Saving model from epoch  4
Epoch 6/10 | MSE: 0.120 | MAE: 0.311 | Val MSE: 0.111 | Val MAE: 0.292
Saving model from epoch  5
Epoch 7/10 | MSE: 0.114 | MAE: 0.298 | Val MSE: 0.107 | Val MAE: 0.280
Saving model from epoch  6
Epoch 8/10 | MSE: 0.110 | MAE: 0.289 | Val MSE: 0.105 | Val MAE: 0.271
Saving model from epoch  7
Epoch 9/10 | MSE: 0.108 | MAE: 0.282 | Val MSE: 0.104 | Val MAE: 0.264
Saving model from epoch  8
Epoch 10/10 | MSE: 0.107 | MAE: 0.278 | Val MSE: 0.104 | Val MAE: 0.260
Saving model from epoch  9


In [26]:
# save model from iteration 5
if not os.listdir(output_dir/"Iteration_5b"): # check if the directory is empty
    key_dict = optimizer.res[4]['params']
    black_box_func(**key_dict, save_loc=output_dir/"Iteration_5b")

Epoch 1/10 | MSE: 0.214 | MAE: 0.357 | Val MSE: 0.160 | Val MAE: 0.253
Saving model from epoch  0
Epoch 2/10 | MSE: 0.109 | MAE: 0.263 | Val MSE: 0.151 | Val MAE: 0.243
Saving model from epoch  1
Epoch 3/10 | MSE: 0.107 | MAE: 0.263 | Val MSE: 0.157 | Val MAE: 0.249
Saving model from epoch  2
Epoch 4/10 | MSE: 0.106 | MAE: 0.261 | Val MSE: 0.183 | Val MAE: 0.285
Saving model from epoch  3
Epoch 5/10 | MSE: 0.105 | MAE: 0.261 | Val MSE: 0.204 | Val MAE: 0.317
Saving model from epoch  4
Epoch 6/10 | MSE: 0.105 | MAE: 0.262 | Val MSE: 0.212 | Val MAE: 0.330
Saving model from epoch  5
Epoch 7/10 | MSE: 0.105 | MAE: 0.263 | Val MSE: 0.217 | Val MAE: 0.337
Saving model from epoch  6
Epoch 8/10 | MSE: 0.105 | MAE: 0.263 | Val MSE: 0.229 | Val MAE: 0.356
Saving model from epoch  7
Epoch 9/10 | MSE: 0.105 | MAE: 0.263 | Val MSE: 0.238 | Val MAE: 0.367
Saving model from epoch  8
Epoch 10/10 | MSE: 0.105 | MAE: 0.263 | Val MSE: 0.239 | Val MAE: 0.369
Saving model from epoch  9
