In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [2]:
S, P = np.load("butadien/dataset.npy")
molecules = np.load("butadien/molecules.npy")

In [None]:
from SCFInitialGuess.utilities.dataset import Dataset

dim = 26

ind_cut = 150
index = np.arange(200)
np.random.shuffle(index)

S_test = np.array(S)[index[150:]]
P_test = np.array(P)[index[150:]]
molecules_test = [molecules[index[i]] for i in range(150, 200)]

S_train = np.array(S)[index[:150]]
P_train = np.array(P)[index[:150]]
molecules_train = [molecules[index[i]] for i in range(150)]

dataset = Dataset(np.array(S_train), np.array(P_train), split_test=0.0)

dataset.testing = (Dataset.normalize(S_test, mean=dataset.x_mean, std=dataset.x_std)[0], P_test)

[-] 2018-03-23 18:01:24: Data set normalized. Mean value std: 0.00869629472968855


In [None]:
from SCFInitialGuess.nn.networks import EluTrNNN
from SCFInitialGuess.nn.training import Trainer, RegularizedMSE

trainer = Trainer(
    EluTrNNN([dim**2, 1000, 600, 400, 200, 100, dim**2]),
    cost_function=RegularizedMSE(alpha=1e-7),
    optimizer=tf.train.AdamOptimizer(learning_rate=1e-5)
)

trainer.setup()
network_orig, sess_orig = trainer.train(
    dataset,
    convergence_threshold=1e-7
)
graph_orig = trainer.graph

[-] 2018-03-23 18:01:25: No target graph specified for Trainer setup. Creating new graph ...
[-] 2018-03-23 18:01:25: Setting up the training in the target graph ...
[-] 2018-03-23 18:01:25: network ...
[-] 2018-03-23 18:01:25: error function ...
[-] 2018-03-23 18:01:25: cost function ...
[-] 2018-03-23 18:01:34: training step
[-] 2018-03-23 18:01:34: Starting network training ...
[ ] 2018-03-23 18:01:34: Val. Cost: 5.775E-02. Error: 5.774E-02. Diff: 1.0E+10
[ ] 2018-03-23 18:01:38: Val. Cost: 5.082E-02. Error: 5.082E-02. Diff: 6.9E-03
[ ] 2018-03-23 18:01:43: Val. Cost: 5.050E-03. Error: 5.042E-03. Diff: 4.6E-02
[ ] 2018-03-23 18:01:47: Val. Cost: 6.250E-04. Error: 6.174E-04. Diff: 4.4E-03
[ ] 2018-03-23 18:01:51: Val. Cost: 4.969E-04. Error: 4.892E-04. Diff: 1.3E-04
[ ] 2018-03-23 18:01:55: Val. Cost: 4.658E-04. Error: 4.581E-04. Diff: 3.1E-05
[ ] 2018-03-23 18:01:59: Val. Cost: 4.354E-04. Error: 4.277E-04. Diff: 3.0E-05
[ ] 2018-03-23 18:02:04: Val. Cost: 4.344E-04. Error: 4.268E-04

$P'= \dfrac{3}{2} PSP - \dfrac{2}{2 \cdot 2}PSPSP $

In [None]:
def mc_wheeny_purification(p,s):
    p = p.reshape(dim, dim)
    s = s.reshape(dim, dim)
    return (3 * np.dot(np.dot(p, s), p) - np.dot(np.dot(np.dot(np.dot(p, s), p), s), p)) / 2

def multi_mc_wheeny(p, s, n_max=4):
    for i in range(n_max):
        p = mc_wheeny_purification(p, s)
    return p

def idemp_error(p, s):
    p = p.reshape(dim, dim)
    s = s.reshape(dim, dim)
    return np.mean(np.abs(np.dot(np.dot(p, s), p) - 2 * p))


In [None]:
#for (s, p) in zip(*dataset.testing):
for (s, p) in zip(S_test, P_test):
    
    #s_norm = s.reshape(1, dim**2)
    s_norm = dataset.input_transformation(s.reshape(1, dim**2))
    
    print("Orip:         {:0.3E}".format(idemp_error(p, s))) 
    print("Orig prurif:  {:0.3E}".format(idemp_error(mc_wheeny_purification(p, s), s)))
    
    with graph_orig.as_default():
        p_nn = network_orig.run(sess_orig, s_norm).reshape(dim, dim)
        
    print("NN:           {:0.3E}".format(idemp_error(p_nn, s)))
    print("NN pruified:  {:0.3E}".format(idemp_error(mc_wheeny_purification(p_nn, s), s)))
    p_nn_multi = multi_mc_wheeny(p_nn, s, n_max=5)
    print("NN multified: {:0.3E}".format(idemp_error(p_nn_multi, s))) 
    print("Value before: {:0.3E}".format(np.mean(np.abs(p.reshape(dim, dim) - p_nn))))
    print("Value after:  {:0.3E}".format(np.mean(np.abs(p.reshape(dim, dim) - p_nn_multi))))
    print("Is nan: " + str(np.sum(np.isnan(p_nn_multi))))
    print("Is inf: " + str(np.sum(np.isinf(p_nn_multi))))
    print("Is fin: " + str(np.sum(np.isfinite(p_nn_multi))))
    print("--------------------")

In [None]:
from pyscf.scf import hf
from SCFInitialGuess.utilities.plotutils import prediction_scatter

dim = 26



iterations = []
for i, (molecule, p) in enumerate(zip(molecules_test, P_test)):
    
    mol = molecule.get_pyscf_molecule()
    
    
    print("Calculating: " + str(i + 1) + "/" + str(len(molecules_test)))
    

    s_raw = hf.get_ovlp(mol)
    s_norm = dataset.input_transformation(s_raw.reshape(1, dim**2))
    
    with graph_orig.as_default():
        P_orig = network_orig.run(sess_orig, s_norm).reshape(dim, dim).astype('float64')
        
        P_orig_sym = (P_orig + P_orig.T) / 2
        P_orig_idem = multi_mc_wheeny(P_orig, s_raw, n_max=5)
        
        # check errors
        print("Accuracy (MSE):")
        print("Orig: {:0.3E}".format(np.mean(np.abs(p.reshape(dim, dim) - P_orig)**2)))
        print("Sym:  {:0.3E}".format(np.mean(np.abs(p.reshape(dim, dim) - P_orig_sym)**2)))
        print("Idem: {:0.3E}".format(np.mean(np.abs(p.reshape(dim, dim) - P_orig_idem)**2)))
        
        print("Idempotency:")
        print("Orig: {:0.3E}".format(idemp_error(P_orig, s_raw)))
        print("Sym:  {:0.3E}".format(idemp_error(P_orig_sym, s_raw)))
        print("Idem: {:0.3E}".format(idemp_error(P_orig_idem, s_raw)))

    
    # P_actual wi noise
    p_noise = p.reshape(dim, dim) + np.random.randn(dim, dim) * 1e-4
    
    
    iterations_molecule = []

    for guess in [p_noise, P_orig, P_orig_sym, P_orig_idem]:
        mf = hf.RHF(mol)
        mf.diis = None
        mf.verbose = 1
        mf.kernel(dm0=guess)
        iterations_molecule.append(mf.iterations)
    
    iterations.append(iterations_molecule)

iterations = np.array(iterations)

In [None]:
print(np.mean(iterations,0))