In [1]:
import sys
sys.path.append('../src')

import numpy as np
import torch
import time
import sys
import resource
import copy
from torch.utils.data import DataLoader

from data_classes import *
from read_input import *
from read_trainset import *
from network import *
from prepare_batches import *
from traininit import *
from data_set import *
from data_loader import *
from optimization_step import *
from output_nn import *
from py_aeio import *
from bnn import BayesianNeuralNetwork
from bnn import get_batch

device = "cpu"
tin_file = "train.in"
tin = read_train_in(tin_file)
torch.manual_seed(3)
np.random.seed(tin.numpy_seed)
tin.train_forces = False

tin.train_file = 'Cu.active_learning'
list_structures_energy, _, list_removed, max_nnb, tin = read_list_structures(tin)

net = NetAtom(tin.networks_param["input_size"], tin.networks_param["hidden_size"],
			    tin.sys_species, tin.networks_param["activations"], tin.alpha, device)

bnn = BayesianNeuralNetwork(net)

np.random.seed(42)
dataset_size = len(list_structures_energy)
indices = list(range(dataset_size))
np.random.shuffle(indices)

training_indices = indices[:5000]
test_indices = indices[5000:6000]
valid_indices = indices[6000:]

training_structures_energy = [list_structures_energy[x] for x in training_indices]
test_structures_energy     = [list_structures_energy[x] for x in test_indices]
valid_structure_energy     = [list_structures_energy[x] for x in valid_indices]

training_batch = get_batch(tin, training_structures_energy, max_nnb)
test_batch     = get_batch(tin, test_structures_energy, max_nnb)
valid_batch    = get_batch(tin, valid_structure_energy, max_nnb)

EPOCHS = 10000
NUM_SAMPLES = 10000
LR = 0.01

bnn.train(training_batch, EPOCHS, initial_lr=LR, verbose=True)

valid_pred = bnn.predict(valid_batch,num_samples=NUM_SAMPLES)
std_valid_batch = torch.std(valid_pred['obs'],0)

test_pred = bnn.predict(test_batch,num_samples=NUM_SAMPLES)
std_test_batch = torch.std(test_pred['obs'],0)
idx_test_sorted = np.argsort(std_test_batch)

  from .autonotebook import tqdm as notebook_tqdm


[EPOCH LOSS 0001] loss: 193.0858
[EPOCH RMSD 0001] loss: 215191.6212
[EPOCH LOSS 0101] loss: 3.3805
[EPOCH LOSS 0201] loss: 3.2439
[EPOCH LOSS 0301] loss: 3.1360
[EPOCH LOSS 0401] loss: 3.7334
[EPOCH LOSS 0501] loss: 4.1230
[EPOCH LOSS 0601] loss: 4.9404
[EPOCH LOSS 0701] loss: 2.6198
[EPOCH LOSS 0801] loss: 2.9549
[EPOCH LOSS 0901] loss: 2.6255
[EPOCH LOSS 1001] loss: 2.5830
[EPOCH RMSD 1001] loss: 839.4378
[EPOCH LOSS 1101] loss: 2.8438
[EPOCH LOSS 1201] loss: 2.9262
[EPOCH LOSS 1301] loss: 2.7728
[EPOCH LOSS 1401] loss: 9.3064
[EPOCH LOSS 1501] loss: 4.4778
[EPOCH LOSS 1601] loss: 2.5190
[EPOCH LOSS 1701] loss: 2.5377
[EPOCH LOSS 1801] loss: 3.1116
[EPOCH LOSS 1901] loss: 3.2372
[EPOCH LOSS 2001] loss: 3.0098
[EPOCH RMSD 2001] loss: 711.3995
[EPOCH LOSS 2101] loss: 4.8444
[EPOCH LOSS 2201] loss: 3.1050
[EPOCH LOSS 2301] loss: 2.5494
[EPOCH LOSS 2401] loss: 3.3365
[EPOCH LOSS 2501] loss: 3.0020
[EPOCH LOSS 2601] loss: 2.5169
[EPOCH LOSS 2701] loss: 5.5828
[EPOCH LOSS 2801] loss: 5.04

In [38]:
l2 = bnn.get_loss_RMSE(valid_batch, num_samples=NUM_SAMPLES)

print('RMSD valid set pre train {}'.format(l2[0].item()))
print('STD valid set before training: {}'.format(torch.mean(std_valid_batch).item()))
print('STD test set before training: {}'.format(torch.mean(std_test_batch).item()))

with open('std_test_multi_testalso.txt', 'w') as out:
    out.write('RMSD valid set pre train {}\n'.format(l2[0].item()))
    out.write('STD valid set before training: {}\n'.format(torch.mean(std_valid_batch).item()))
    out.write('STD test set before training: {}\n'.format(torch.mean(std_test_batch).item()))

    for i in range(0, 10): 
        
        l = list(range(i*100,i*100+10))

        indices = [test_indices[x] for x in idx_test_sorted[l]]
        std_new_points = [std_test_batch[x] for x in idx_test_sorted[l]]
        print('\nTraining with {} datapoints of std {}'.format(len(indices), np.array(std_new_points).mean()))
        out.write('\nTraining with {} datapoints of std {}\n'.format(len(indices), np.array(std_new_points).mean()))

        new_structure_energy = [list_structures_energy[x] for x in indices]
        new_training_structures_energy = training_structures_energy + new_structure_energy
        new_training_batch = get_batch(tin, new_training_structures_energy, max_nnb)

        bnn1 = copy.deepcopy(bnn)
        bnn1.train(new_training_batch, EPOCHS, initial_lr=LR, verbose=False)

        valid_pred = bnn1.predict(valid_batch,num_samples=NUM_SAMPLES)
        std_valid_batch = torch.mean(torch.std(valid_pred['obs'],0))
        l2 = bnn1.get_loss_RMSE(valid_batch, num_samples=NUM_SAMPLES)
    
        print('RMSD after training: {}'.format(l2[0].item()))
        out.write('RMSD after training: {}\n'.format(l2[0].item()))

        print('STD valid set after training: {}'.format(std_valid_batch.item()))
        out.write('STD valid set after training: {}\n'.format(std_valid_batch.item()))

        test_pred = bnn1.predict(test_batch,num_samples=NUM_SAMPLES)
        std_test_batch = torch.mean(torch.std(test_pred['obs'],0))

        print('STD test set after training: {}'.format(std_test_batch.item()))
        out.write('STD test set after training: {}\n'.format(std_test_batch.item()))


RMSD valid set pre train 73.70998508396451
STD valid set before training: 2.9060710028649224

Training with 10 datapoints of std 2.272781467712229
RMSD after training: 55.88661375316724
STD valid set after training: 2.512414081468009

Training with 10 datapoints of std 2.434894878076799
RMSD after training: 56.19159301982575
STD valid set after training: 2.4968948035325145

Training with 10 datapoints of std 2.6256434976881464
RMSD after training: 66.42992809616236
STD valid set after training: 2.79984864442066

Training with 10 datapoints of std 2.757753295772476
RMSD after training: 33.702417532399984
STD valid set after training: 1.9198488944367949

Training with 10 datapoints of std 2.820945679456239
RMSD after training: 65.49171845910185
STD valid set after training: 2.7846856093749803

Training with 10 datapoints of std 2.8764563980774716
RMSD after training: 52.364929478276636
STD valid set after training: 2.199613750124623

Training with 10 datapoints of std 2.9604755382437045


In [2]:
l2_valid = bnn.get_loss_RMSE(valid_batch, num_samples=NUM_SAMPLES)
l2_test = bnn.get_loss_RMSE(test_batch, num_samples=NUM_SAMPLES)

print('RMSD valid set pre train {}'.format(l2[0].item()))
print('RMSD test set pre train {}'.format(l2[0].item()))

print('STD valid set before training: {}'.format(torch.mean(std_valid_batch).item()))
print('STD test set before training: {}'.format(torch.mean(std_test_batch).item()))

with open('std_test_multi_testalso.txt', 'w') as out:
    out.write('RMSD valid set pre train {}\n'.format(l2_valid[0].item()))
    out.write('RMSD test set pre train {}\n'.format(l2_test[0].item()))
    out.write('STD valid set before training: {}\n'.format(torch.mean(std_valid_batch).item()))
    out.write('STD test set before training: {}\n'.format(torch.mean(std_test_batch).item()))

    for i in range(0, 10): 
        
        l = list(range(i*100,i*100+10))

        indices = [test_indices[x] for x in idx_test_sorted[l]]
        std_new_points = [std_test_batch[x] for x in idx_test_sorted[l]]
        print('\nTraining with {} datapoints of std {}'.format(len(indices), np.array(std_new_points).mean()))
        out.write('\nTraining with {} datapoints of std {}\n'.format(len(indices), np.array(std_new_points).mean()))

        new_structure_energy = [list_structures_energy[x] for x in indices]
        new_training_structures_energy = training_structures_energy + new_structure_energy
        new_training_batch = get_batch(tin, new_training_structures_energy, max_nnb)

        bnn1 = copy.deepcopy(bnn)
        bnn1.train(new_training_batch, EPOCHS, initial_lr=LR, verbose=False)

        valid_pred = bnn1.predict(valid_batch,num_samples=NUM_SAMPLES)
        std_valid_batch = torch.mean(torch.std(valid_pred['obs'],0))
        l2 = bnn1.get_loss_RMSE(valid_batch, num_samples=NUM_SAMPLES)
    
        print('RMSD valid after training: {}'.format(l2[0].item()))
        out.write('RMSD valid after training: {}\n'.format(l2[0].item()))

        print('STD valid set after training: {}'.format(std_valid_batch.item()))
        out.write('STD valid set after training: {}\n'.format(std_valid_batch.item()))

        test_pred = bnn1.predict(test_batch,num_samples=NUM_SAMPLES)
        new_std_test_batch = torch.mean(torch.std(test_pred['obs'],0))
        l2_test = bnn1.get_loss_RMSE(test_batch, num_samples=NUM_SAMPLES)

        print('RMSD test after training: {}'.format(l2_test[0].item()))
        out.write('RMSD test after training: {}\n'.format(l2_test[0].item()))

        print('STD test set after training: {}'.format(new_std_test_batch.item()))
        out.write('STD test set after training: {}\n'.format(new_std_test_batch.item()))


NameError: name 'l2' is not defined