In [4]:
import sys
sys.path.append('../src')

import numpy as np
import torch
import time
import sys
import resource
import copy
from torch.utils.data import DataLoader

from data_classes import *
from read_input import *
from read_trainset import *
from network import *
from prepare_batches import *
from traininit import *
from data_set import *
from data_loader import *
from optimization_step import *
from output_nn import *
from py_aeio import *
from bnn import BayesianNeuralNetwork
from bnn import get_batch

device = "cpu"
tin_file = "train.in"
tin = read_train_in(tin_file)
torch.manual_seed(3)
np.random.seed(tin.numpy_seed)
tin.train_forces = False

tin.train_file = 'Cu.active_learning'
list_structures_energy, _, list_removed, max_nnb, tin = read_list_structures(tin)

net = NetAtom(tin.networks_param["input_size"], tin.networks_param["hidden_size"],
			    tin.sys_species, tin.networks_param["activations"], tin.alpha, device)

bnn = BayesianNeuralNetwork(net)

np.random.seed(42)
dataset_size = len(list_structures_energy)
indices = list(range(dataset_size))
np.random.shuffle(indices)

training_indices = indices[:5000]
test_indices = indices[5000:6000]
valid_indices = indices[6000:]

training_structures_energy = [list_structures_energy[x] for x in training_indices]
test_structures_energy     = [list_structures_energy[x] for x in test_indices]
valid_structure_energy     = [list_structures_energy[x] for x in valid_indices]

training_batch = get_batch(tin, training_structures_energy, max_nnb)
test_batch     = get_batch(tin, test_structures_energy, max_nnb)
valid_batch    = get_batch(tin, valid_structure_energy, max_nnb)

EPOCHS = 100001
NUM_SAMPLES = 10000
LR = 0.01

bnn.train(training_batch, EPOCHS, initial_lr=LR, verbose=True)

valid_pred = bnn.predict(valid_batch,num_samples=NUM_SAMPLES)
std_valid_batch = torch.std(valid_pred['obs'],0)

test_pred = bnn.predict(test_batch,num_samples=NUM_SAMPLES)
std_test_batch = torch.std(test_pred['obs'],0)
idx_test_sorted = np.argsort(std_test_batch)

[EPOCH LOSS 0001] loss: 193.0858
[EPOCH RMSD 0001] loss: 215186.9110
[EPOCH LOSS 0101] loss: 3.2539
[EPOCH LOSS 0201] loss: 3.2216
[EPOCH LOSS 0301] loss: 3.1180
[EPOCH LOSS 0401] loss: 3.3538
[EPOCH LOSS 0501] loss: 3.3414
[EPOCH LOSS 0601] loss: 3.5835
[EPOCH LOSS 0701] loss: 2.7345
[EPOCH LOSS 0801] loss: 3.6482
[EPOCH LOSS 0901] loss: 5.2787
[EPOCH LOSS 1001] loss: 2.8550
[EPOCH RMSD 1001] loss: 1410.6767
[EPOCH LOSS 1101] loss: 3.8542
[EPOCH LOSS 1201] loss: 3.4709
[EPOCH LOSS 1301] loss: 3.9652
[EPOCH LOSS 1401] loss: 3.5799
[EPOCH LOSS 1501] loss: 4.6143
[EPOCH LOSS 1601] loss: 3.3072
[EPOCH LOSS 1701] loss: 2.9674
[EPOCH LOSS 1801] loss: 2.6206
[EPOCH LOSS 1901] loss: 2.5001
[EPOCH LOSS 2001] loss: 2.2847
[EPOCH RMSD 2001] loss: 384.8911
[EPOCH LOSS 2101] loss: 2.8950
[EPOCH LOSS 2201] loss: 2.2632
[EPOCH LOSS 2301] loss: 2.1656
[EPOCH LOSS 2401] loss: 4.1017
[EPOCH LOSS 2501] loss: 2.5047
[EPOCH LOSS 2601] loss: 3.2506
[EPOCH LOSS 2701] loss: 3.2261
[EPOCH LOSS 2801] loss: 3.3

In [6]:
l2 = bnn.get_loss_RMSE(valid_batch, num_samples=NUM_SAMPLES)

print('RMSD valid set pre train {}'.format(l2[0].item()))
print('STD valid set before training: {}'.format(torch.mean(std_valid_batch).item()))

with open('std_test_multi.txt', 'w') as out:
    out.write('RMSD valid set pre train {}\n'.format(l2[0].item()))
    out.write('STD valid set before training: {}\n'.format(torch.mean(std_valid_batch).item()))
    for i in range(0, 10): 
        
        l = list(range(i*100,i*100+100))

        indices = [test_indices[x] for x in idx_test_sorted[l]]
        std_new_points = [std_test_batch[x] for x in idx_test_sorted[l]]
        print('\nTraining with {} datapoints of std {}'.format(len(indices), np.array(std_new_points).mean()))
        out.write('\nTraining with {} datapoints of std {}\n'.format(len(indices), np.array(std_new_points).mean()))

        new_structure_energy = [list_structures_energy[x] for x in indices]
        new_training_structures_energy = training_structures_energy + new_structure_energy
        new_training_batch = get_batch(tin, new_training_structures_energy, max_nnb)

        bnn1 = copy.deepcopy(bnn)
        bnn1.train(new_training_batch, int(EPOCHS/10), initial_lr=LR, verbose=False)

        valid_pred = bnn1.predict(valid_batch,num_samples=NUM_SAMPLES)
        std_valid_batch = torch.mean(torch.std(valid_pred['obs'],0))
        l2 = bnn1.get_loss_RMSE(valid_batch, num_samples=NUM_SAMPLES)
    
        print('RMSD after training: {}'.format(l2[0].item()))
        out.write('RMSD after training: {}\n'.format(l2[0].item()))

        print('STD valid set after training: {}'.format(std_valid_batch.item()))
        out.write('STD valid set after training: {}\n'.format(std_valid_batch.item()))

RMSD valid set pre train 11.920079591814018
STD valid set before training: 1.0670581566495563

Training with 100 datapoints of std 1.0605366717638558
RMSD after training: 14.316173594033275
STD valid set after training: 1.28906418444484

Training with 100 datapoints of std 1.083576337591988
RMSD after training: 16.65581754959018
STD valid set after training: 1.3399556927129934

Training with 100 datapoints of std 1.0985997600319233
RMSD after training: 19.81772685823072
STD valid set after training: 1.383811709440195

Training with 100 datapoints of std 1.1116332767906814
RMSD after training: 9.758071655667099
STD valid set after training: 1.3327547978555618

Training with 100 datapoints of std 1.1266501952941137
RMSD after training: 17.86366938695825
STD valid set after training: 1.2334755221431193

Training with 100 datapoints of std 1.1456185786436555
RMSD after training: 16.823528940605318
STD valid set after training: 1.2366481392482491

Training with 100 datapoints of std 1.17845