In [1]:
import math
import numpy as np
import wandb

import torch
import torch_geometric
from torch_geometric.data import Data

import sys
import os
from tqdm import tqdm
import random
import joblib

# Add the 'scripts' directory to the Python path
scripts_path = os.path.abspath(os.path.join('..'))
if scripts_path not in sys.path:
    sys.path.append(scripts_path)
import gnn_io as gio
import gnn_architectures as garch


## 1. Define model and parameters

In [2]:
# Define parameters 
project_name = "test_different_parameters"
path_to_save_dataloader = "../../data/data_created_during_training_needed_for_testing/"

loss_fct = torch.nn.MSELoss()
early_stopping_patience = 10

base_config={
        "epochs": 1000,
        "batch_size": 16,
        "output_layer": 'gat',
        "in_channels": 6, # dimensions of the x vector + 2 (pos)
        "out_channels": 1,
        "early_stopping_patience": early_stopping_patience,
        "learning_rate": 0.001
    }

# unique_model_description = ''

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
sweep_config = {
    'method': 'random',
    'metric': {
        'name': 'val_loss',
        'goal': 'minimize'
    },
    'parameters': {
        'optimizer': {
            'values': ['adam', 'sgd']
        },
        'hidden_layer_size': {
            'values': [16, 32, 64]
        },
        'gat_layers':{
            'values': [0, 1, 2]
        },
        'gcn_layers': {
            'values': [0, 1]
        }
        # 'learning_rate': {
        #     'distribution': 'uniform',
        #     'min': 0,
        #     'max': 0.1
        # },
    }
}

import pprint
pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'val_loss'},
 'parameters': {'gat_layers': {'values': [0, 1, 2]},
                'gcn_layers': {'values': [0, 1]},
                'hidden_layer_size': {'values': [16, 32, 64]},
                'optimizer': {'values': ['adam', 'sgd']}}}


## 2. Load data

In [5]:
# Reconstruct the Data objects
data_dict_list = torch.load('../../data/train_data/dataset_1pm_0-3100.pt')
datalist = [Data(x=d['x'], edge_index=d['edge_index'], pos=d['pos'], y=d['y']) for d in data_dict_list]
dataset = gio.normalize_dataset(datalist, y_scalar=None, x_scalar_list=None, pos_scalar=None, directory_path=path_to_save_dataloader)

In [6]:
def train():
    wandb.init(
    project=project_name, config = base_config)
    config = wandb.config
    
    # Initialize model with parameters from config
    gnn_instance = garch.MyGnn(
        in_channels=config.in_channels, 
        out_channels=config.out_channels, 
        hidden_size=config.hidden_layer_size, 
        gat_layers=config.gat_layers, 
        gcn_layers=config.gcn_layers, 
        output_layer=config.output_layer
    )
    model = gnn_instance.to(device)
    wandb.watch(model)

    
    if config.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    else:
        optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate)

    train_dl, valid_dl, test_dl = gio.create_dataloaders(batch_size = config.batch_size, dataset=dataset, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)
    garch.train(model, config=config, loss_fct=loss_fct, 
                                optimizer=optimizer,
                                train_dl=train_dl, valid_dl=valid_dl,
                                device=device, early_stopping=gio.EarlyStopping(patience=early_stopping_patience, verbose=True))

In [7]:
wandb.login()
sweep_id = wandb.sweep(sweep_config, project="pytorch-sweeps-demo")
wandb.agent(sweep_id=sweep_id, function=train, count=5)

# model.to_onnx(path_to_save_dataloader + 'model_' + unique_model_description + '.onnx', test_dl, device)
# wandb.save(path_to_save_dataloader + 'model_' + unique_model_description + '.onnx')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33menatterer[0m ([33mtum-traffic-engineering[0m). Use [1m`wandb login --relogin`[0m to force relogin


Create sweep with ID: dxzzevf1
Sweep URL: https://wandb.ai/tum-traffic-engineering/pytorch-sweeps-demo/sweeps/dxzzevf1


[34m[1mwandb[0m: Agent Starting Run: koqcnv3y with config:
[34m[1mwandb[0m: 	gat_layers: 1
[34m[1mwandb[0m: 	gcn_layers: 0
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	optimizer: adam
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Model initialized
MyGnn(
  (pointLayer): PointNetConv(local_nn=Sequential(
    (0): Linear(in_features=6, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=32, bias=True)
  ), global_nn=Sequential(
    (0): Linear(in_features=32, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=32, bias=True)
  ))
  (graph_layers): Sequential(
    (0) - GATConv(32, 32, heads=1): x, edge_index -> x
    (1) - ReLU(inplace=True): x -> x
  )
  (output_layer): GATConv(32, 1, heads=1)
)
Total dataset length: 3079
Training subset length: 2155
Validation subset length: 461
Test subset length: 463


135it [10:58,  4.88s/it]


epoch: 0, validation loss: 0.003258088370785117, R^2: -5.005572319030762


135it [10:32,  4.68s/it]


: 

: 