# Implement Graph Neural Network

In [34]:
import os

import numpy as np
import pandas as pd
from  pathlib import Path
import torch
import torch.nn as nn

from rdkit import Chem
from rdkit.Chem import rdmolops, rdDistGeom
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

from sklearn.metrics import mean_absolute_error, root_mean_squared_error



In [2]:
# Create class to transform SMILE into graph

class Graph():

    def __init__(self,
                molecule_smiles: str,
                node_vec_len: int,
                max_atoms: int = None,
                ) -> None:
        
        # Store properties
        self.smiles = molecule_smiles
        self.node_vec_len = node_vec_len
        self.max_atoms = max_atoms

        # Convert SMILES to RDKit mol
        self.smiles_to_mol()

        # Check if valid mol was created and generate graph
        if self.mol:
            self.smiles_to_graph()
    
    def smiles_to_mol(self):
        
        mol = Chem.MolFromSmiles(self.smiles)

        if mol is None:
            self.mol = None
            return
        
        self.mol = Chem.AddHs(mol)

    def smiles_to_graph(self):
        
        # Get list of atoms in molecule
        atoms = self.mol.GetAtoms()

        # If max_atoms is not provided, max_atoms = len(atoms)
        if self.max_atoms is None:
            n_atoms = len(list(atoms))
        else:
            n_atoms = self.max_atoms
        
        # Create empty node matrix
        node_mat = np.zeros((n_atoms, self.node_vec_len))

        # Iterate over atoms and add note to matrix
        for atom in atoms:
            # Get atom index and atomic number
            atom_index = atom.GetIdx()
            atom_no = atom.GetAtomicNum()

            # Assign to node matrix
            node_mat[atom_index, atom_no] = 1
        
        # Get adjacency matrix using RDKit
        adj_mat = rdmolops.GetAdjacencyMatrix(self.mol)
        self.std_adj_mat = np.copy(adj_mat)

        # Get ditance matrix using RDKit
        dist_mat = rdDistGeom.GetMoleculeBoundsMatrix(self.mol)
        dist_mat[dist_mat == 0.] = 1 # Avoids division by 0

        # Get modified adjacency matrix with inverse bond lengths
        adj_mat = adj_mat * (1 / dist_mat)

        # Par the adjacency matrix with 0s
        dim_add = n_atoms - adj_mat.shape[0]
        adj_mat = np.pad(
            adj_mat,
            pad_width=((0, dim_add), (0, dim_add)),
            mode='constant',
        )

        # Add an identity matrix to adjacency matrix,
        # this makes an atom its own neighbor
        adj_mat = adj_mat + np.eye(n_atoms)

        # Save adjacency and node matrices
        self.node_mat = node_mat
        self.adj_mat = adj_mat

In [3]:
# Create Pytorch dataset class

class GraphDataset(Dataset):

    def __init__(self,
                 dataset_path: str,
                 node_vec_len: int,
                 max_atoms: int) -> None:
        self.node_vec_len = node_vec_len
        self.max_atoms = max_atoms

        # Open dataset file
        df = pd.read_csv(dataset_path)

        # Create lists
        self.indices = df.index.to_list()
        self.smiles = df['smiles'].to_list()
        self.outputs = df['calc'].to_list()

    def __len__(self):
        return len(self.indices)
    
    def __getitem__(self, i: int):
        # Get SMILE
        smile = self.smiles[i]

        # Create graph using the Graph class
        mol = Graph(smile,
                    self.node_vec_len,
                    self.max_atoms)
        
        # Get the matrices
        node_mat = torch.Tensor(mol.node_mat)
        adj_mat = torch.Tensor(mol.adj_mat)

        # Get output
        output = torch.Tensor([self.outputs[i]])

        return (node_mat, adj_mat), output, smile

In [4]:
# Create custom collate function for DataLoader
def collate_graph_dataset(dataset: Dataset):
    # Create empty lists
    node_mats = []
    adj_mats = []
    outputs = []
    smiles = []

    # iterate over the dataset and assign components to the correct list
    for i in range(len(dataset)):
        (node_mat, adj_mat), output, smile = dataset[i]
        node_mats.append(node_mat)
        adj_mats.append(adj_mat)
        outputs.append(output)
        smiles.append(smile)

    # Create tensors
    node_mats_tensor = torch.cat(node_mats, dim=0)
    adj_mats_tensor = torch.cat(adj_mats, dim=0)
    outputs_tensor = torch.stack(outputs, dim=0)

    # Return tensors
    return (node_mats_tensor, adj_mats_tensor), outputs_tensor, smiles

In [5]:
# Construction of convolution layer

class Convolutionlayer(nn.Module):
    """
    Note:
    The ConvolutionLayer essentially does three things 
    - Calculation of the inverse diagonal degree matrix from the adjacency matrix
    - Multiplication of the four matrices (D⁻¹ANW)
    - Application of a non-linear activation function to the layer output.
    """

    def __init__(self,
                 node_in_len: int,
                 node_out_len: int,
                 ) -> None:
        super().__init__()

        # Create linear layer for node matrix
        self.conv_linear = nn.Linear(node_in_len, node_out_len)

        # Create activation function
        self.conv_activation = nn.LeakyReLU()

    def forward(self,
                node_mat,
                adj_mat,
                ):
        # Calculate number of neighbors
        n_neighbors = adj_mat.sum(dim=-1, keepdims=True)

        # Create identity tensor
        self.idx_mat = torch.eye(
            adj_mat.shape[-2],
            adj_mat.shape[-1],
            device=n_neighbors.device
        )

        # Add new (batch) dimension and expand
        idx_mat = self.idx_mat.unsqueeze(0).expand(*adj_mat.shape)
        # Get inverse degree matrix
        inv_degree_mat = torch.mul(idx_mat, 1 / n_neighbors)

        # Perform matrix multiplication (D⁻¹AN)
        node_fea = torch.bmm(inv_degree_mat, adj_mat)
        node_fea = torch.bmm(node_fea, node_mat)

        # Perfom linear transformation to node features (node_fea * W)
        node_fea = self.conv_linear(node_fea)

        # Apply activation
        node_fea = self.conv_activation(node_fea)

        return node_fea


In [6]:
# Construction of pooling layer

class PoolingLayer(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self,
                node_fea):
        # Pool the node matrix
        pooled_node_fea = node_fea.mean(dim=1)
        return pooled_node_fea

In [7]:
# Create the Graph Neural Network

class ChemGCN(nn.Module):
    def __init__(
            self,
            node_vec_len: int,
            node_fea_len: int,
            hidden_fea_len: int,
            n_conv: int,
            n_hidden: int,
            n_outputs: int,
            p_dropout: float = 0.0,
    ):
        super().__init__()

        # Define layers
        # Initial transformation from node matrix to node features
        self.init_transform = nn.Linear(node_vec_len, node_fea_len)
        
        # Convolution layers
        self.conv_layers = nn.ModuleList(
            [Convolutionlayer(node_in_len=node_fea_len,
                             node_out_len=node_fea_len,
                             )
                             for i in range(n_conv)]
        )

        # Pool convolution outputs
        self.pooling = PoolingLayer()
        pooled_node_fea_len = node_fea_len

        # Pooling activation
        self.pooling_activation = nn.LeakyReLU()

        # From pooling layers to hidden layers
        self.pooled_to_hidden = nn.Linear(pooled_node_fea_len, hidden_fea_len)

        # Hidden layer
        self.hidden_layer = nn.Linear(hidden_fea_len, hidden_fea_len)

        # Hidden layer activation function
        self.hidden_activation = nn.LeakyReLU()

        # Hidden layer dropout
        self.dropout = nn.Dropout(p=p_dropout)

        # If hidden layer > 1, add more hidden layers
        self.n_hidden = n_hidden
        if self.n_hidden > 1:
            self.hidden_layers = nn.ModuleList(
                [self.hidden_layer for _ in range(n_hidden -1)]
            )
            self.hidden_activation_layers = nn.ModuleList(
                [self.hidden_activation for _ in range(n_hidden - 1)]
                )
            self.hidden_dropout_layers = nn.ModuleList(
                [self.dropout for _ in range(n_hidden - 1)]
            )
        
        # Final layer going to output
        self. hidden_to_output = nn.Linear(hidden_fea_len, n_outputs)

    def forward(self, node_mat, adj_mat):
        # Perform initial transform on node_mat
        node_fea = self.init_transform(node_mat)

        # Perform convolutions
        for conv in self.conv_layers:
            node_fea = conv(node_fea, adj_mat)
        
        # Perform pooling
        pooled_node_fea = self.pooling(node_fea)
        pooled_node_fea = self.pooling_activation(pooled_node_fea)

        # First hidden layer
        hidden_node_fea = self.pooled_to_hidden(pooled_node_fea)
        hidden_node_fea = self.hidden_activation(hidden_node_fea)
        hidden_node_fea = self.dropout(hidden_node_fea)

        # Subsequent hidden layer
        if self.n_hidden > 1:
            for i in range(self.n_hidden -1):
                hidden_node_fea = self.hidden_layers[i](hidden_node_fea)
                hidden_node_fea = self.hidden_activation_layers[i](hidden_node_fea)
                hidden_node_fea = self.hidden_dropout_layers[i](hidden_node_fea)
        # Output
        out = self.hidden_to_output(hidden_node_fea)

        return out

In [8]:
# Define standardizer

class Standardizer:
    def __init__(self, X):
        self.mean = torch.mean(X)
        self.std = torch.std(X)
    
    def standardize(self, X):
        Z = (X - self.mean) / self.std
        return Z
    
    def restore(self, Z):
        X = self.mean + Z * self.std
        return X
    
    def state(self):
        return {'mean': self.mean, 'std': self.std}
    
    def load(self, state):
        self.mean = state['mean']
        self.std = state['std']

In [28]:
# Create train and test functions

def train_model(
        epoch,
        model,
        training_dataloader,
        optimizer,
        loss_fn,
        standardizer,
        use_GPU,
        max_atoms,
        node_vec_len,
):
    # Variables to store losses and error
    avg_loss = 0
    avg_mae = 0
    count = 0

    # Switch model to train mode
    model.train()

    # Go over each batch
    for i, dataset in enumerate(training_dataloader):
        # Unpack data
        node_mat = dataset[0][0]
        adj_mat = dataset[0][1]
        output = dataset[1]

        # Reshape inputs
        first_dim = int((torch.numel(node_mat)) / (max_atoms * node_vec_len))
        node_mat = node_mat.reshape(first_dim, max_atoms, node_vec_len)
        adj_mat = adj_mat.reshape(first_dim, max_atoms, max_atoms)

        # Standardize output
        output_std = standardizer.standardize(output)

        # Package inputs, outputs; check GPU
        if use_GPU:
            nn_input = (node_mat.cuda(), adj_mat.cuda())
            nn_output = output_std.cuda()
        else:
            nn_input = (node_mat, adj_mat)
            nn_output = output_std
        
        # Compute output from network
        nn_prediction = model(*nn_input)

        # Calculate loss
        loss = loss_fn(nn_output, nn_prediction)
        avg_loss += loss

        # Calculate MAE
        prediction = standardizer.restore(nn_prediction.detach().cpu())
        mae = mean_absolute_error(output, prediction)
        avg_mae += mae

        # Set zero gradients for all tensors
        optimizer.zero_grad()

        # Do backward propagation
        loss.backward()

        # Update optimizer
        optimizer.step()

        # Increase count
        count += 1
    
    # Calculate avg loss and MAE
    avg_loss = avg_loss / count
    avg_mae = avg_mae / count

    # # Print stats
    # if epoch % 10 == 0:
    #     print(
    #         'Epoch: [{0}]\tTraining Loss: [{1:.2f}]\tTraining MAE: [{2:.2f}]'\
    #         .format(
    #             epoch, avg_loss, avg_mae
    #         )
    #     )

    # Return loss and MAE
    return avg_loss, avg_mae

In [35]:
# Create test function

def test_model(
        model,
        test_dataloader,
        loss_fn,
        standardizer,
        use_GPU,
        max_atoms,
        node_vec_len,):
    
    # Store loss and error
    test_loss = 0
    test_mae = 0
    count = 0
    
    # Store all outputs and predictions for R2
    all_outputs = []
    all_predictions = []

    # Switch to inference mode
    model.eval()

    # Go over batches of test set
    for i, dataset in enumerate(test_dataloader):
        # Unpack data
        node_mat = dataset[0][0]
        adj_mat = dataset[0][1]
        output = dataset[1]

        # Reshape
        first_dim = int((torch.numel(node_mat)) / (max_atoms * node_vec_len))
        node_mat = node_mat.reshape(first_dim, max_atoms, node_vec_len)
        adj_mat = adj_mat.reshape(first_dim, max_atoms, max_atoms)

        # Standardize output
        output_std = standardizer.standardize(output)

        # Package inputs and outputs to GPU
        if use_GPU:
            nn_input = (node_mat.cuda(), adj_mat.cuda())
            nn_output = output_std.cuda()
        else:
            nn_input = (node_mat, adj_mat)
            nn_output = output_std
        
        # Compute output
        nn_prediction = model(*nn_input)

        # Calculate loss
        loss = loss_fn(nn_output, nn_prediction)
        test_loss += loss

        # Calculate MAE
        prediction = standardizer.restore(nn_prediction.detach().cpu())
        mae = mean_absolute_error(output, prediction)
        test_mae += mae

        # Store predictions and actual values for R²
        all_predictions.extend(prediction.numpy().flatten())
        all_outputs.extend(output.numpy().flatten())


        # Increase count
        count += 1

    # Calculate avg loss, MAE, R2
    test_loss = test_loss / count
    test_mae = test_mae / count
    test_rmse = root_mean_squared_error(all_outputs, all_predictions)

    return test_loss, test_mae, test_rmse


In [11]:
# Workflow

## Fix Seeds
np.random.seed(42)
torch.manual_seed(42)
use_GPU = torch.cuda.is_available()

## Inputs
max_atoms = 100
node_vec_len = 60
train_size = 0.7
batch_size = 32
hidden_nodes = 60
n_conv_layers = 4
n_hidden_layers = 2
learning_rate = 0.01
n_epochs = 100

## Create dataset
main_path = Path().resolve().parent
data_path = main_path / 'data' / 'train.csv'
dataset = GraphDataset(dataset_path=data_path,
                       max_atoms=max_atoms,
                       node_vec_len=node_vec_len)

## Split data into train and test
# Get sizes
dataset_indices = np.arange(0, len(dataset), 1)
train_size = int(np.round(train_size * len(dataset)))
test_size = len(dataset) - train_size

# Randomly sample train and test indices
train_indices = np.random.choice(dataset_indices,
                                 size=train_size,
                                 replace=False)
test_indices = np.array(list(set(dataset_indices) - set(train_indices)))

# Create dataloaders
train_sampler = SubsetRandomSampler(train_indices)
test_sampler = SubsetRandomSampler(test_indices)
train_loader = DataLoader(dataset,
                          batch_size=batch_size,
                          sampler=train_sampler,
                          collate_fn=collate_graph_dataset)
test_loader = DataLoader(dataset,
                         batch_size=batch_size,
                         sampler=test_sampler,
                         collate_fn=collate_graph_dataset)

## Initialize model, standardizer, optimizer and loss functions
# Model
model = ChemGCN(node_vec_len=node_vec_len,
                node_fea_len=hidden_nodes,
                hidden_fea_len=hidden_nodes,
                n_conv=n_conv_layers,
                n_hidden=n_hidden_layers,
                n_outputs=1,
                p_dropout=0.1)
# Transfer to GPU
if use_GPU:
    model.cuda()

# Standardizer
outputs = [dataset[i][1] for i in range(len(dataset))]
standardizer = Standardizer(torch.Tensor(outputs))

# Optimizer
optimizer = torch.optim.Adam(model.parameters(),
                             lr=learning_rate)

# Loss function
loss_fn = torch.nn.MSELoss()

## Train model
loss = []
mae = []
epoch = []
for i in range(n_epochs):
    epoch_loss, epoch_mae = train_model(
        epoch=i,
        model=model,
        training_dataloader=train_loader,
        optimizer=optimizer,
        loss_fn=loss_fn,
        standardizer=standardizer,
        use_GPU=use_GPU,
        max_atoms=max_atoms,
        node_vec_len=node_vec_len,
    )
    loss.append(epoch_loss)
    mae.append(epoch_mae)
    epoch.append(i)

Epoch: [0]	Training Loss: [1.03]	Training MAE: [3.24]
Epoch: [10]	Training Loss: [0.32]	Training MAE: [1.76]
Epoch: [20]	Training Loss: [0.14]	Training MAE: [1.15]
Epoch: [30]	Training Loss: [0.14]	Training MAE: [1.23]
Epoch: [40]	Training Loss: [0.18]	Training MAE: [1.33]
Epoch: [50]	Training Loss: [0.11]	Training MAE: [1.04]
Epoch: [60]	Training Loss: [0.18]	Training MAE: [1.34]
Epoch: [70]	Training Loss: [0.16]	Training MAE: [1.21]
Epoch: [80]	Training Loss: [0.11]	Training MAE: [1.02]
Epoch: [90]	Training Loss: [0.09]	Training MAE: [0.97]


In [None]:
## Test model

test_loss, test_mae, test_rmse = test_model(model=model,
                                 test_dataloader=test_loader,
                                 loss_fn=loss_fn,
                                 standardizer=standardizer,
                                 use_GPU=use_GPU,
                                 max_atoms=max_atoms,
                                 node_vec_len=node_vec_len,
                                 )

# Print final results
print(f"Training Loss: {loss[-1]:.2f}")
print(f"Training MAE: {mae[-1]:.2f}")
print(f"Test Loss: {test_loss:.2f}")
print(f"Test MAE: {test_mae:.2f}")
print(f"Test RMSE: {test_rmse:.2f}")

Training Loss: 0.09
Training MAE: 0.95
Test Loss: 0.14
Test MAE: 1.02
Test R²: 0.88


# Optimize using Ax Platform

In [29]:
#Set seeds
def set_seeds(seed=42):
    """Set all random seeds for reproducibility"""
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # for multi-GPU
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False


def train_test(parametrization):
    ## Inputs
    max_atoms = 100
    node_vec_len = 60
    train_size = 0.8
    batch_size = 32
    hidden_nodes = parametrization['hidden_nodes'] # Default = 60
    n_conv_layers = parametrization['n_conv_layers'] # Default = 4
    n_hidden_layers = parametrization['n_hidden_layers'] # Default = 2
    learning_rate = parametrization['learning_rate'] # Default = 0.01
    n_epochs = 100

    ## Create dataset
    main_path = Path().resolve().parent
    data_path = main_path / 'data' / 'train.csv'
    dataset = GraphDataset(dataset_path=data_path,
                        max_atoms=max_atoms,
                        node_vec_len=node_vec_len)

    ## Split data into train and test
    # Get sizes
    dataset_indices = np.arange(0, len(dataset), 1)
    train_size = int(np.round(train_size * len(dataset)))
    test_size = len(dataset) - train_size

    # Randomly sample train and test indices
    train_indices = np.random.choice(dataset_indices,
                                    size=train_size,
                                    replace=False)
    test_indices = np.array(list(set(dataset_indices) - set(train_indices)))

    # Create dataloaders
    train_sampler = SubsetRandomSampler(train_indices)
    test_sampler = SubsetRandomSampler(test_indices)
    train_loader = DataLoader(dataset,
                            batch_size=batch_size,
                            sampler=train_sampler,
                            collate_fn=collate_graph_dataset)
    test_loader = DataLoader(dataset,
                            batch_size=batch_size,
                            sampler=test_sampler,
                            collate_fn=collate_graph_dataset)

    ## Initialize model, standardizer, optimizer and loss functions
    # Model
    model = ChemGCN(node_vec_len=node_vec_len,
                    node_fea_len=hidden_nodes,
                    hidden_fea_len=hidden_nodes,
                    n_conv=n_conv_layers,
                    n_hidden=n_hidden_layers,
                    n_outputs=1,
                    p_dropout=0.1)
    # Transfer to GPU
    if use_GPU:
        model.cuda()

    # Standardizer
    outputs = [dataset[i][1] for i in range(len(dataset))]
    standardizer = Standardizer(torch.Tensor(outputs))

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                lr=learning_rate)

    # Loss function
    loss_fn = torch.nn.MSELoss()

    ## Train model
    for i in range(n_epochs):
        epoch_loss, epoch_mae = train_model(
            epoch=i,
            model=model,
            training_dataloader=train_loader,
            optimizer=optimizer,
            loss_fn=loss_fn,
            standardizer=standardizer,
            use_GPU=use_GPU,
            max_atoms=max_atoms,
            node_vec_len=node_vec_len,
        )

    _, test_mae, _ = test_model(model=model,
                                    test_dataloader=test_loader,
                                    loss_fn=loss_fn,
                                    standardizer=standardizer,
                                    use_GPU=use_GPU,
                                    max_atoms=max_atoms,
                                    node_vec_len=node_vec_len,
                                    )

    return test_mae

In [None]:
    # max_atoms = 100
    # node_vec_len = 60
    # train_size = 0.7
    # batch_size = 32
    # hidden_nodes = 60
    # n_conv_layers = 4
    # n_hidden_layers = 2
    # learning_rate = 0.01
    # n_epochs = 100

    # hidden_nodes = parametrization['hidden_nodes'] # Default = 60
    # n_conv_layers = parametrization['n_conv_layers'] # Default = 4
    # n_hidden_layers = parametrization['n_hidden_layers'] # Default = 2
    # learning_rate = parametrization['learning_rate'] # Default = 0.01

In [30]:
from ax.service.ax_client import AxClient, ObjectiveProperties
from ax.service.utils.report_utils import exp_to_df
from ax.utils.notebook.plotting import init_notebook_plotting, render



In [31]:
ax_client = AxClient()

[INFO 11-01 16:14:10] ax.service.ax_client: Starting optimization with verbose logging. To disable logging, set the `verbose_logging` argument to `False`. Note that float values in the logs are rounded to 6 decimal points.


In [36]:

# Create an experiment with required arguments: name, parameters, and objective_name.
ax_client.create_experiment(
    name="GCN_hyperparameter_search",  # The name of the experiment.
    parameters=[
        {
            "name": "hidden_nodes",  # The name of the parameter.
            "type": "range",  # The type of the parameter ("range", "choice" or "fixed").
            "bounds": [10, 100],  # The bounds for range parameters. 
            "value_type": "int",  # Optional, the value type ("int", "float", "bool" or "str"). Defaults to inference from type of "bounds".
            "log_scale": False,  # Optional, whether to use a log scale for range parameters. Defaults to False.
        },
        {
            "name": "n_conv_layers",  
            "type": "range",  
            "bounds": [1, 10],
            "value_type": "int" 
        },
        {
            "name": "n_hidden_layers",
            "type": "range",
            "bounds": [1, 10],
            "value_type": "int",
        },
        {
            "name": "learning_rate",
            "type": "range",
            "bounds": [1e-5, 0.1],
            "value_type": "float",
            "log_scale": True,
        },
    ],
    objectives={"rmse": ObjectiveProperties(minimize=True)},  # The objective name and minimization setting.
    # parameter_constraints: Optional, a list of strings of form "p1 >= p2" or "p1 + p2 <= some_bound".
    # outcome_constraints: Optional, a list of strings of form "constrained_metric <= some_bound".
    overwrite_existing_experiment=True,
)

[INFO 11-01 16:22:29] ax.service.utils.instantiation: Created search space: SearchSpace(parameters=[RangeParameter(name='hidden_nodes', parameter_type=INT, range=[10, 100]), RangeParameter(name='n_conv_layers', parameter_type=INT, range=[1, 10]), RangeParameter(name='n_hidden_layers', parameter_type=INT, range=[1, 10]), RangeParameter(name='learning_rate', parameter_type=FLOAT, range=[1e-05, 0.1], log_scale=True)], parameter_constraints=[]).
[INFO 11-01 16:22:29] ax.modelbridge.dispatch_utils: Using Models.BOTORCH_MODULAR since there is at least one ordered parameter and there are no unordered categorical parameters.
[INFO 11-01 16:22:29] ax.modelbridge.dispatch_utils: Calculating the number of remaining initialization trials based on num_initialization_trials=None max_initialization_trials=None num_tunable_parameters=4 num_trials=None use_batch_trials=False
[INFO 11-01 16:22:29] ax.modelbridge.dispatch_utils: calculated num_initialization_trials=8
[INFO 11-01 16:22:29] ax.modelbridge.

In [37]:

# hidden_nodes = parametrization['hidden_nodes'] # Default = 60
# n_conv_layers = parametrization['n_conv_layers'] # Default = 4
# n_hidden_layers = parametrization['n_hidden_layers'] # Default = 2
# learning_rate = parametrization['learning_rate'] # Default = 0.01

# Create base trial
base_params = {'hidden_nodes': 60,
               'n_conv_layers': 4,
               'n_hidden_layers': 2,
               'learning_rate': 0.01}

ax_client.attach_trial(
    parameters=base_params
)

[INFO 11-01 16:24:10] ax.core.experiment: Attached custom parameterizations [{'hidden_nodes': 60, 'n_conv_layers': 4, 'n_hidden_layers': 2, 'learning_rate': 0.01}] as trial 0.


({'hidden_nodes': 60,
  'n_conv_layers': 4,
  'n_hidden_layers': 2,
  'learning_rate': 0.01},
 0)

In [38]:
baseline_parameters = ax_client.get_trial_parameters(trial_index=0)
ax_client.complete_trial(trial_index=0, raw_data=train_test(baseline_parameters))

[INFO 11-01 16:26:26] ax.service.ax_client: Completed trial 0 with data: {'rmse': (0.710071, None)}.


In [39]:
for i in range(20):
    parameters, trial_index = ax_client.get_next_trial()
    # Local evaluation here can be replaced with deployment to external system.
    ax_client.complete_trial(trial_index=trial_index, raw_data=train_test(parameters))


Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 11-01 16:27:29] ax.service.ax_client: Generated new trial 1 with parameters {'hidden_nodes': 30, 'n_conv_layers': 6, 'n_hidden_layers': 4, 'learning_rate': 0.009725} using model Sobol.
[INFO 11-01 16:28:09] ax.service.ax_client: Completed trial 1 with data: {'rmse': (2.122675, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 11-01 16:28:09] ax.service.ax_client: Generated new trial 2 with parameters {'hidden_nodes': 84, 'n_conv_layers': 3, 'n_hidden_layers': 8, 'learning_rate': 0.000364} using model Sobol.
[INFO 11-01 16:28:49] ax.service.ax_client: Completed trial 2 with data: {'rmse': (1.406683, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 11-01 16:28:49] ax.service.ax_client: Generated new trial 3 with parameters {'hidden_nodes': 58, 'n_c

In [40]:
ax_client.get_trials_data_frame()



Unnamed: 0,trial_index,arm_name,trial_status,generation_method,rmse,hidden_nodes,n_conv_layers,n_hidden_layers,learning_rate
0,0,0_0,COMPLETED,Manual,0.710071,60,4,2,0.01
1,1,1_0,COMPLETED,Sobol,2.122675,30,6,4,0.009725
2,2,2_0,COMPLETED,Sobol,1.406683,84,3,8,0.000364
3,3,3_0,COMPLETED,Sobol,419.461685,58,8,1,0.013787
4,4,4_0,COMPLETED,Sobol,3.174679,45,1,9,2.1e-05
5,5,5_0,COMPLETED,Sobol,3.95642,33,10,6,6.5e-05
6,6,6_0,COMPLETED,Sobol,8163.855835,70,2,5,0.054562
7,7,7_0,COMPLETED,Sobol,2.751391,96,7,9,0.000116
8,8,8_0,COMPLETED,Sobol,1.136037,18,5,3,0.002457
9,9,9_0,COMPLETED,BoTorch,1.553821,51,4,2,0.000334


In [41]:
best_parameters, values = ax_client.get_best_parameters()
best_parameters

{'hidden_nodes': 100,
 'n_conv_layers': 4,
 'n_hidden_layers': 1,
 'learning_rate': 0.003701669360913142}

In [42]:
ax_client.get_trials_data_frame().to_csv("../data/optimization_results/GCN_optimization.csv")



In [45]:
render(ax_client.get_contour_plot(param_x="learning_rate", param_y="n_conv_layers", metric_name="rmse"))

[INFO 11-01 16:42:06] ax.service.ax_client: Retrieving contour plot with parameter 'learning_rate' on X-axis and 'n_conv_layers' on Y-axis, for metric 'rmse'. Remaining parameters are affixed to the middle of their range.


In [44]:
render(
    ax_client.get_optimization_trace()
)  