In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
import sys
sys.path.append('../src')

In [7]:
import numpy as np
import torch
import time
import sys
import resource
from torch.utils.data import DataLoader

from data_classes import *
from read_input import *
from read_trainset import *
from network import *
from prepare_batches import *
from traininit import *
from data_set import *
from data_loader import *
from optimization_step import *
from output_nn import *
from py_aeio import *

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
device = "cpu"
tin_file = "train.in"
tin = read_train_in(tin_file)
torch.manual_seed(3)
np.random.seed(tin.numpy_seed)
tin.train_forces = False

In [5]:
tin.train_file = 'Cu.active_learning'
list_structures_energy, _, list_removed, max_nnb, tin = read_list_structures(tin)

In [6]:
from bnn import BayesianNeuralNetwork
from bnn import get_batch

net = NetAtom(tin.networks_param["input_size"], tin.networks_param["hidden_size"],
			    tin.sys_species, tin.networks_param["activations"], tin.alpha, device)

bnn = BayesianNeuralNetwork(net)

In [7]:
np.random.seed(42)
dataset_size = len(list_structures_energy)
indices = list(range(dataset_size))
np.random.shuffle(indices)

In [8]:
training_indices = indices[:5000]
test_indices = indices[5000:6000]
valid_indices = indices[6000:]

In [9]:
training_structures_energy = [list_structures_energy[x] for x in training_indices]
test_structures_energy     = [list_structures_energy[x] for x in test_indices]
valid_structure_energy     = [list_structures_energy[x] for x in valid_indices]

In [10]:
training_batch = get_batch(tin, training_structures_energy, max_nnb)
test_batch     = get_batch(tin, test_structures_energy, max_nnb)
valid_batch    = get_batch(tin, valid_structure_energy, max_nnb)

In [4]:
EPOCHS = 1000000

In [24]:
bnn.train(training_batch, EPOCHS, initial_lr=0.01, verbose=True)

[EPOCH LOSS 0001] loss: 10.5788
[EPOCH RMSD 0001] loss: 183.7037
[EPOCH LOSS 0101] loss: 1.8492
[EPOCH LOSS 0201] loss: 4.8630
[EPOCH LOSS 0301] loss: 2.0905
[EPOCH LOSS 0401] loss: 1.5916
[EPOCH LOSS 0501] loss: 4.4344
[EPOCH LOSS 0601] loss: 1.5649
[EPOCH LOSS 0701] loss: 1.6004
[EPOCH LOSS 0801] loss: 1.5291
[EPOCH LOSS 0901] loss: 15.7572
[EPOCH LOSS 1001] loss: 2.0339
[EPOCH RMSD 1001] loss: 296.1186
[EPOCH LOSS 1101] loss: 1.6032
[EPOCH LOSS 1201] loss: 2.0730
[EPOCH LOSS 1301] loss: 2.0113
[EPOCH LOSS 1401] loss: 1.7114
[EPOCH LOSS 1501] loss: 1.6081
[EPOCH LOSS 1601] loss: 1.6360
[EPOCH LOSS 1701] loss: 3.0816
[EPOCH LOSS 1801] loss: 5.3890
[EPOCH LOSS 1901] loss: 1.7784
[EPOCH LOSS 2001] loss: 1.3785
[EPOCH RMSD 2001] loss: 112.7052


KeyboardInterrupt: 

In [30]:
valid_pred = bnn.predict(valid_batch,num_samples=1000)
std_valid_batch = torch.std(valid_pred['obs'],0)

test_pred = bnn.predict(test_batch,num_samples=1000)
std_test_batch = torch.std(test_pred['obs'],0)
idx_test_sorted = np.argsort(std_test_batch)

In [31]:
idx = test_indices[idx_test_sorted[-1]]
new_structure_energy = [list_structures_energy[idx]]
new_training_structures_energy = training_structures_energy + new_structure_energy
new_training_batch = get_batch(tin, new_training_structures_energy, max_nnb)

In [33]:
NUM_SAMPLES = 10000

In [34]:
import copy

l2 = bnn.get_loss_RMSE(valid_batch, num_samples=NUM_SAMPLES)
print('RMSD valid set pre train {}'.format(l2))

with open('std_test.txt', 'w') as out:
    for x in range(0,1000,100): 
        
        idx = test_indices[idx_test_sorted[x]]
        std_new_point = std_test_batch[idx_test_sorted[x]]
        new_structure_energy = [list_structures_energy[idx]]
        new_training_structures_energy = training_structures_energy + new_structure_energy
        new_training_batch = get_batch(tin, new_training_structures_energy, max_nnb)

        bnn1 = copy.deepcopy(bnn)
        bnn1.train(new_training_batch, int(EPOCHS/ ), initial_lr=0.01, verbose=False)

        valid_pred = bnn1.predict(valid_batch,num_samples=NUM_SAMPLES)
        std_valid_batch = torch.mean(torch.std(valid_pred['obs'],0))
        l2 = bnn1.get_loss_RMSE(valid_batch, num_samples=NUM_SAMPLES)
        out.write('{} {} {}\n'.format(std_new_point, std_valid_batch, l2))
        print(l2, std_new_point, std_valid_batch)


RMSD valid set pre train (tensor(93.3421, dtype=torch.float64), tensor(365.9643, dtype=torch.float64))
(tensor(44.0775, dtype=torch.float64), tensor(351.6858, dtype=torch.float64)) tensor(2.3626, dtype=torch.float64) tensor(1.9920, dtype=torch.float64)
(tensor(32.2377, dtype=torch.float64), tensor(228.9130, dtype=torch.float64)) tensor(2.5639, dtype=torch.float64) tensor(1.8053, dtype=torch.float64)
(tensor(21.5564, dtype=torch.float64), tensor(185.1047, dtype=torch.float64)) tensor(2.6573, dtype=torch.float64) tensor(1.5650, dtype=torch.float64)
(tensor(30.2174, dtype=torch.float64), tensor(235.5293, dtype=torch.float64)) tensor(2.7478, dtype=torch.float64) tensor(1.6847, dtype=torch.float64)
(tensor(32.5660, dtype=torch.float64), tensor(232.7162, dtype=torch.float64)) tensor(2.7985, dtype=torch.float64) tensor(1.8011, dtype=torch.float64)
(tensor(39.1014, dtype=torch.float64), tensor(227.1222, dtype=torch.float64)) tensor(2.8514, dtype=torch.float64) tensor(1.9808, dtype=torch.float6

In [None]:
with open('x.txt', 'w') as out:
    for x in range(10):
        out.write('{}\n'.format(x))