In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
from itertools import product
from collections import defaultdict

import sys
sys.path.append('../src/')

import logging
tf.get_logger().setLevel(logging.ERROR)

from gcn import datasets as gcn_datasets
from gcn import models as gcn_models
from rgcn import datasets as rgcn_datasets
from rgcn import models as rgcn_models

from utils import metrics

import configuration

from notebook_utils import fit_and_predict_gnn as fit_and_predict

In [2]:

parameters = {
    'num_gconv_layers': lambda:       np.random.randint(*[3, 5+1]),
    'num_gconv_units':  lambda:       np.random.randint(*[128, 256+1]),
    'learning_rate':    lambda:       np.random.uniform(*[2e-4, 2e-3]),
    'num_epochs':       lambda:       np.random.randint(*[100, 300+1]),
    'batch_size':       lambda:       np.random.choice([32, 64, 128]),
    'weight_decay':     lambda:   10**np.random.uniform(*[-10, -5]),
    'num_dense_layers': lambda:       np.random.randint(*[1, 2+1]),
    'num_dense_units':  lambda:       np.random.randint(*[256, 1024]),
    'dense_dropout':    lambda:       np.random.uniform(*[0.0, 0.3]),
}


models = {
    'gcn': {
        'dataset': gcn_datasets.GCNDataset,
        'model': gcn_models.GCNModel
    },
    'rgcn': {
        'dataset': rgcn_datasets.RGCNDataset,
        'model': rgcn_models.RGCNModel
    },
}


dataset_names = list(configuration.datasets.keys())

NUM_REPL = configuration.NUM_REPLICATES
NUM_SEARCHES = configuration.NUM_SEARCHES

In [4]:

for model_name in models.keys():
    
    for dataset_name in dataset_names:

        best_error = float('inf')

        for i in range(NUM_SEARCHES):

            np.random.seed(42+i)

            num_gconv_layers = parameters['num_gconv_layers']()
            num_gconv_units = parameters['num_gconv_units']()
            learning_rate = parameters['learning_rate']()
            batch_size = parameters['batch_size']()
            num_epochs = parameters['num_epochs']()
            weight_decay = parameters['weight_decay']()
            num_dense_layers = parameters['num_dense_layers']()
            num_dense_units = parameters['num_dense_units']()
            dense_dropout = parameters['dense_dropout']()

            print('Model                     : {}'.format(model_name))
            print('Dataset                   : {}'.format(dataset_name))
            print('Number of gconv layers    : {}'.format(num_gconv_layers))
            print('Learning rate             : {}'.format(learning_rate))
            print('Batch size                : {}'.format(batch_size))
            print('Number of epochs          : {}'.format(num_epochs))
            print('Weight decay              : {}'.format(weight_decay))
            print('Number of dense layers    : {}'.format(num_dense_layers))
            print('Number of dense units     : {}'.format(num_dense_units))
            print('Dropout                   : {}'.format(dense_dropout))

   
            params = {
                "gconv_units": [num_gconv_units] * num_gconv_layers,
                "gconv_regularizer": tf.keras.regularizers.L2(weight_decay),
                "initial_learning_rate": learning_rate,
                'dense_units': [num_dense_units] * num_dense_layers,
                'dense_dropout': dense_dropout,
            }
         
            train_dataset = models[model_name]['dataset'](
                f'../input/tfrecords/{dataset_name}/train.tfrec', batch_size, True)
            valid_dataset = models[model_name]['dataset'](
                f'../input/tfrecords/{dataset_name}/valid.tfrec', batch_size, False)
            test_dataset = models[model_name]['dataset'](
                f'../input/tfrecords/{dataset_name}/test.tfrec', batch_size, False)


            model = models[model_name]['model'](**params)

            model.fit(
                train_dataset.get_iterator(), 
                epochs=num_epochs, verbose=0
            )


            trues, preds = model.predict(valid_dataset.get_iterator(), verbose=0)

            error = metrics.get('rmse')(trues, preds)
            print('RMSE                      : {}\n'.format(error) + '---'*20)

            if error < best_error:
                best_error = error
                best_params = params.copy()
                best_weights = model.get_weights()
                best_params['batch_size'] = batch_size
                best_params['num_epochs'] = num_epochs
            
        fit_and_predict(
            model_obj=models[model_name]['model'],
            model_params=best_params,
            model_weights=best_weights,
            datasets=[train_dataset, valid_dataset, test_dataset],
            num_repl=NUM_REPL,
            save_path='../output/predictions/{}/{}'.format(
                dataset_name, model_name)
        )
