In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
from tqdm import tqdm

import sys

sys.path.append('../src/')

from mlp import datasets as ann_datasets
from mlp import models as ann_models

from utils import metrics

import configuration

from notebook_utils import fit_and_predict_mlp as fit_and_predict

In [2]:
parameters = {
    'num_layers':    lambda:       np.random.randint(*[1, 3+1]),
    'num_units':     lambda:       np.random.randint(*[256, 1024+1]),
    'learning_rate': lambda:       np.random.uniform(*[2e-4, 2e-3]),
    'num_epochs':    lambda:       np.random.randint(*[100, 300+1]),
    'batch_size':    lambda:       np.random.choice([32, 64, 128]),
    'dropout_rate':  lambda:       np.random.uniform(*[0.0, 0.3]),
    'bits':          lambda:       np.random.randint(*[512, 2048+1]),
    'radius':        lambda:       np.random.randint(*[1, 3+1])
}

model_name = 'mlp'

dataset_names = list(configuration.datasets.keys())

NUM_REPL = configuration.NUM_REPLICATES
NUM_SEARCHES = configuration.NUM_SEARCHES

In [1]:
for dataset_name in dataset_names:
    
    best_error = float('inf')
    
    for i in range(NUM_SEARCHES):
        
        np.random.seed(42+i)
        
        num_layers = parameters['num_layers']()
        num_units = parameters['num_units']()
        learning_rate = parameters['learning_rate']()
        batch_size = parameters['batch_size']()
        num_epochs = parameters['num_epochs']()
        dropout_rate = parameters['dropout_rate']()
        bits = parameters['bits']()
        radius = parameters['radius']()
        
    
        print('Dataset          : {}'.format(dataset_name))
        print('Number of layers : {}'.format(num_layers))
        print('Number of units  : {}'.format(num_units))
        print('Learning rate    : {}'.format(learning_rate))
        print('Batch size       : {}'.format(batch_size))
        print('Number of epochs : {}'.format(num_epochs))
        print('Dropout rate     : {}'.format(dropout_rate))
        print('Number of bits   : {}'.format(bits))
        print('Radius           : {}'.format(radius))

        
        train, valid, test = ann_datasets.get_ecfp_datasets(
            '../input/datasets/{}.csv'.format(dataset_name),
            bits=bits, radius=radius,
        )
        
        model = ann_models.ANNModel(
            hidden_units=[num_units] * num_layers,
            dropout_rate=dropout_rate,
            loss_fn=tf.keras.losses.Huber,
            optimizer=tf.keras.optimizers.Adam,
            initial_learning_rate=learning_rate,
        )
        
        model.fit(train['X'], train['y'], 
                  batch_size=batch_size, verbose=0,
                  epochs=num_epochs)
        
        trues, preds = model.predict(valid['X'], valid['y'])

        error = metrics.get('rmse')(trues, preds)
        print('RMSE             : {}\n'.format(error) + '---'*20)
        
        if error < best_error:
            best_error = error
            best_params = {
                "num_epochs": num_epochs,
                "batch_size": batch_size,
                "hidden_units": [num_units] * num_layers,
                "initial_learning_rate": learning_rate,
                "dropout_rate": dropout_rate,
                "bits": bits,
                "radius": radius,
            }
            best_weights = model.get_weights()

    fit_and_predict(
        model_obj=ann_models.ANNModel,
        model_params=best_params,
        model_weights=best_weights,
        datasets=[train, valid, test],
        num_repl=NUM_REPL,
        save_path='../output/predictions/{}/{}'.format(
            dataset_name, model_name)
    )