Hyperparameter optimization for GCNN models

In [None]:
# Loading modules

from __future__ import print_function, division

import os

import numpy as np
import torch
import multiprocessing

from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.suggest.bayesopt import BayesOptSearch
from ase import io
from ase.db import connect

from tinnet.feature.voronoi import Voronoi
from tinnet.regression.regression import Regression

In [None]:
# Train the network

class TrainGCNN(tune.Trainable):
    def _setup(self, config):
        
        self.lr = config.get('lr', 0.01)
        self.atom_fea_len = int(config.get('atom_fea_len', 64))
        self.n_conv = int(config.get('n_conv', 3))
        self.h_fea_len = int(config.get('h_fea_len', 128))
        self.n_h = int(config.get('n_h', 1))
        
        descriptor = Voronoi(max_num_nbr=12,
                             radius=8,
                             dmin=0,
                             step=0.2,
                             dict_atom_fea=None)
        
        db = connect('../Database.db')
        
        images = np.array([r.toatoms() for r in db.select()])
        
        self.d_cen = np.array([r['data']['d_cen'] for r in db.select()], dtype=np.float32)
        self.full_width = np.array([r['data']['full_width'] for r in db.select()], dtype=np.float32)
        
        self.features = multiprocessing.Pool().map(descriptor.feas, images)
    
    def _train(self):
        
        self.model = Regression(self.features,
                                self.d_cen,
                                phys_model='gcnn_multitask',
                                optim_algorithm='AdamW',
                                weight_decay=0.0001,
                                idx_validation=0,
                                idx_test=1,
                                lr=self.lr,
                                atom_fea_len=self.atom_fea_len,
                                n_conv=self.n_conv,
                                h_fea_len=self.h_fea_len,
                                n_h=self.n_h,
                                full_width=self.full_width,
                                batch_size=64)
        
        final_ans_val_mae, final_ans_val_mse,\
            final_ans_test_mae, final_ans_test_mse\
                = self.model.train(25000)
        
        np.savetxt('final_ans_val_mae_'
                   + str(self.lr)
                   + '_'
                   + str(self.atom_fea_len)
                   + '_'
                   + str(self.n_conv)
                   + '_'
                   + str(self.h_fea_len)
                   + '_'
                   + str(self.n_h)
                   + '.txt', [final_ans_val_mae])
        
        np.savetxt('final_ans_val_mse_'
                   + str(self.lr)
                   + '_'
                   + str(self.atom_fea_len)
                   + '_'
                   + str(self.n_conv)
                   + '_'
                   + str(self.h_fea_len)
                   + '_'
                   + str(self.n_h)
                   + '.txt', [final_ans_val_mse])
        
        np.savetxt('final_ans_test_mae_'
                   + str(self.lr)
                   + '_'
                   + str(self.atom_fea_len)
                   + '_'
                   + str(self.n_conv)
                   + '_'
                   + str(self.h_fea_len)
                   + '_'
                   + str(self.n_h)
                   + '.txt', [final_ans_test_mae])
        
        np.savetxt('final_ans_test_mse_'
                   + str(self.lr)
                   + '_'
                   + str(self.atom_fea_len)
                   + '_'
                   + str(self.n_conv)
                   + '_'
                   + str(self.h_fea_len)
                   + '_'
                   + str(self.n_h)
                   + '.txt', [final_ans_test_mse])
        
        return {'mean_loss': final_ans_test_mse}
    
    def _save(self, checkpoint_dir):
        checkpoint_path = os.path.join(checkpoint_dir, 'model.pth')
        torch.save(self.model.state_dict(), checkpoint_path)
        return checkpoint_path

    def _restore(self, checkpoint_path):
        self.model.load_state_dict(torch.load(checkpoint_path))

In [None]:
if __name__ == '__main__':
    
    algo = BayesOptSearch(utility_kwargs={
        'kind': 'ucb',
        'kappa': 2.5,
        'xi': 0.0
    })
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
    
    analysis = tune.run(
        TrainGCNN,
        name='TrainGCNN',
        metric='mean_loss',
        mode='min',
        search_alg=algo,
        scheduler=scheduler,
        stop={
            'mean_loss': 0.001,
            'training_iteration': 20,
        },
        resources_per_trial={
            'cpu': 12,
            'gpu': 1
        },
        num_samples= 500,
        checkpoint_at_end=True,
        checkpoint_freq=20,
        config={
            'lr': tune.loguniform(lower=0.0005, upper=0.004, base=10),
            'atom_fea_len': tune.uniform(lower=100, upper=256),
            'n_conv': tune.uniform(lower=3, upper=7),
            'h_fea_len': tune.uniform(lower=50, upper=156),
            'n_h': tune.uniform(lower=1, upper=5),
        })
    
    print('Best config is:', analysis.get_best_config(metric='mean_loss',
                                                      mode='min'))