In [1]:
import os
os.chdir('..')

In [2]:
import torch
import optuna
import numpy as np

import instance_generator as ig
import torch_converter as tc
from torch_geometric.loader import DataLoader
from gnn_library.util import train, save
from util import Dataset
from optuna.trial import TrialState

%load_ext autoreload
%autoreload 2

In [3]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
print("PyTorch has version {}".format(torch.__version__))
print('Using device:', device)

PyTorch has version 2.0.0+cu117
Using device: cuda:2


In [6]:
def define_model(trial):
    args = {
        'processor':         'GENConv',
        'head':              'regression',
        'num_layers':        trial.suggest_int("num_layers{}", 1, 6),
        'num_mlp_layers':    trial.suggest_int("num_mlp_layers{}", 1, 5),
        'aggr':              'max',
        'batch_size':        2**trial.suggest_int("log_batch_size", 1, 6), 
        'node_feature_dim':  5,
        'edge_feature_dim':  1,
        'graph_feature_dim': 2,
        'hidden_dim':        2**trial.suggest_int("hidden_dim", 1, 7),
        'output_dim':        1,
        'dropout':           trial.suggest_float("dropout", 0, 0.5),
        'epochs':            2**trial.suggest_int("epochs", 2, 8),
        'opt':               trial.suggest_categorical("optimizer", ["adam", "adagrad"]),
        'opt_scheduler':     'none',
        'opt_restart':       0,
        'weight_decay':      5e-3,
        'lr':                trial.suggest_float("lr", 1e-5, 1e-1, log=True),
        'device':            device,
        'noise':             0
    }
    return args

def objective(trial):
	args = define_model(trial)
	train_num = 200; test_num = 100

	er_config = {
	'graph_type': 'ER',
	'p': 0.75,
	'weighted': True
	}
	ba_config = {
	'graph_type': 'BA',
	'ba_param': 4,
	'weighted': True
	}
	geom_config = {
	'graph_type': 'GEOM',
	'q': 0.25,
	'd': 2
	}

	rng = np.random.default_rng()


	train_instances = [
		*ig.sample_instances(10, 6, train_num, rng, args, **er_config),
		*ig.sample_instances(10, 6, train_num, rng, args, **ba_config),
		*ig.sample_instances(10, 6, train_num, rng, args, **geom_config),
	]

	test_instances = [
		*ig.sample_instances(10, 6, test_num, rng, args, **er_config),
		*ig.sample_instances(10, 6, test_num, rng, args, **ba_config),
		*ig.sample_instances(10, 6, test_num, rng, args, **geom_config),
	]


	train_data = Dataset(tc._instances_to_train_samples(train_instances, args['head']))
	test_data = Dataset(tc._instances_to_train_samples(test_instances, args['head']))

	train_loader = DataLoader(
	train_data,
	batch_size=args['batch_size'],
	shuffle=True,
	num_workers=4
	)

	test_loader = DataLoader(
	test_data,
	batch_size=args['batch_size'],
	shuffle=True,
	num_workers=4
	)

	### Training

	_, _, test_accuracies, GNN, _ = train(train_loader, test_loader, args, trial)
	model_accuracy = test_accuracies[-1]
	
	if model_accuracy > best_model[1]:
		print("FOUND A NEW BEST MODEL")
		best_model = (GNN, model_accuracy)

	return model_accuracy

In [7]:
study = optuna.create_study(study_name='hyperparam-study', direction='maximize', storage='sqlite:///hyperparam.db', load_if_exists=True)
study.optimize(objective, n_trials=1, timeout=60000)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
	print("    {}: {}".format(key, value))

[I 2024-02-01 21:56:48,527] Using an existing study with name 'hyperparam-study' instead of creating a new one.
Training:   0%|          | 0/64 [00:05<?, ?Epochs/s]
[W 2024-02-01 21:57:07,239] Trial 1 failed with parameters: {'num_layers{}': 5, 'num_mlp_layers{}': 3, 'log_batch_size': 5, 'hidden_dim': 5, 'dropout': 0.28214284608944196, 'epochs': 6, 'optimizer': 'adagrad', 'lr': 0.0006449588314032681} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/wikum/.local/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_3770604/495614295.py", line 81, in objective
    _, _, test_accuracies, GNN, _ = train(train_loader, test_loader, args, trial)
  File "/home/wikum/GNN-OBM/gnn_library/util.py", line 183, in train
    return _train(
  File "/home/wikum/GNN-OBM/gnn_library/util.py", line 276, in _train
    test_loss, test_accuracy = _test(test_loader, model, l

KeyboardInterrupt: 

In [None]:
save(best_model, study.best_trial, 'base_learner_GNN2')