In [4]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import seaborn as sns

from os.path import join

plt.style.use(["seaborn", "thesis"])

In [5]:
plt.rc("figure", figsize=(8,4))

# Fetch Dataset 

In [6]:
from SCFInitialGuess.utilities.dataset import extract_triu_batch, AbstractDataset
from sklearn.model_selection import train_test_split

#data_path = "../../dataset/ButadieneT"
#postfix = "ButadieneT"
#n_electrons = 30
#dim = 130

data_path = "../../dataset/EthenT/3"
postfix = "EthenT3"
dim = 72
basis = "6-311++g**"
n_electrons = 16



def split(x, y, ind):
    return x[:ind], y[:ind], x[ind:], y[ind:]

S = np.load(join(data_path, "S" + postfix + ".npy")).reshape(-1, dim, dim)
P = np.load(join(data_path, "P" + postfix + ".npy")).reshape(-1, dim, dim)

molecules = np.load(join(data_path, "molecules" + postfix + ".npy"))


ind = int(0.8 * len(molecules))
ind_val = int(0.8 * ind)


molecules = (
    molecules[:ind_val], 
    molecules[ind_val:ind], 
    molecules[ind:]
)

#s_triu_norm, mu, std = AbstractDataset.normalize(S)


s_train, p_train, s_test, p_test = split(S, P, ind)
s_train, p_train, s_val, p_val = split(s_train, p_train, ind_val)



FileNotFoundError: [Errno 2] No such file or directory: '../../dataset/EthenT/3/SEthenT3.npy'

# SetUp descriptor And Normalisation

In [7]:
import pickle

model_path = "../../models/Descriptor"
target = "F"

descriptor_C = pickle.load(open(model_path + "C/" + target + "/descriptor.dump", "rb"))
descriptor_H = pickle.load(open(model_path + "H/" + target + "/descriptor.dump", "rb"))

descriptors = {
    "C": descriptor_C,
    "H": descriptor_H
}

FileNotFoundError: [Errno 2] No such file or directory: '../../models/DescriptorC/F/descriptor.dump'

In [5]:
descriptor_H

<SCFInitialGuess.descriptors.high_level.AtomicNumberWeighted at 0x7fd75601bba8>

In [6]:
normalisations = {
    "C": np.load(model_path + "C/" + target + "/normalisation.npy"),
    "H": np.load(model_path + "H/" + target + "/normalisation.npy")
}

# Fetch Networks 

In [7]:
path = model_path

name_C = model_path + "C/" + target + "/C_" + target + ".h5"
name_H = model_path + "H/" + target + "/H_" + target + ".h5"


model_C = keras.models.load_model(name_C)
model_H = keras.models.load_model(name_H)

networks = {
    "C": model_C,
    "H": model_H
}

# Guess Method 

In [19]:
from pyscf.scf import hf
h_test = [hf.get_hcore(mol.get_pyscf_molecule()) for mol in molecules[2]]

In [28]:
#f_gwh_test = []

k = np.ones((dim, dim)) * 1.75 - \
            np.diag(np.ones(dim)) * 0.75  

#for h,s in zip(h_test, s_test):
#    h_diag = np.diag(h)   
#    f_gwh_test.append(k * np.add.outer(h_diag, h_diag) * s / 2)
#f_gwh_test = np.array(f_gwh_test)

def gwh_fock(mol):
    h = hf.get_hcore(mol.get_pyscf_molecule())
    s = hf.get_ovlp(mol.get_pyscf_molecule())
    h_diag = np.diag(h)   
    
    return k * np.add.outer(h_diag, h_diag) * s / 2


In [3]:
from SCFInitialGuess.utilities.dataset import reconstruct_from_triu, AbstractDataset, density_from_fock
from SCFInitialGuess.utilities.constants import number_of_basis_functions as N_BASIS
from pyscf.scf.hf import init_guess_by_wolfsberg_helmholtz, get_ovlp


def init_guess_by_ann(mol):
    
    f_out = gwh_fock(mol)
    s = get_ovlp(mol)
    
    for atom_index, atom in enumerate(mol.species):
        
        mask = mol.make_atom_mask(atom_index)
        
        
        G = descriptors[atom].calculate_atom_descriptor(
            atom_index, 
            mol,
            descriptors[atom].number_of_descriptors
        )
        
        G_norm = AbstractDataset.normalize(G, *normalisations[atom])
        
        #f(G)
        f = networks[atom].predict(G.reshape(1, -1))
        
        f_batch = reconstruct_from_triu(
            f,
            N_BASIS[mol.basis][atom]
        )
        
        f_out[mask] = f_batch.flatten()
        
    
    p_out = density_from_fock(f_out, s, mol)
    
        
    return p_out
        
    

# Analysis 

## Utils 

In [22]:
from SCFInitialGuess.utilities.dataset import StaticDataset

dataset = StaticDataset(
    #train=(s_train, p_train),
    train=(None, None),
    validation=(None, None),
    test=(s_test, p_test),
    mu=0,
    std=1
)

 ## Execution 

In [32]:
f_nn = np.array([
    init_guess_by_ann(mol) for mol in molecules[2]
])

In [33]:
f_nn.shape

(201, 72, 72)

In [34]:
from SCFInitialGuess.utilities.analysis import \
    make_results_str, measure_all_quantities, mf_initializer
from  SCFInitialGuess.utilities.usermessages import Messenger as msg

msg.print_level=0

print(make_results_str(measure_all_quantities(
    f_nn,
    dataset,
    molecules[2],
    n_electrons,
    mf_initializer,
    dim=dim,
    is_triu=False,
    is_dataset_triu=False,
    s=S[ind:]
)))

--- Absolute Error ---
6.64424E-01 +- 4.06621E-01
--- Symmetry Error ---
0.00000E+00 +- 0.00000E+00
--- Idempotence Error ---
2.15945E+02 +- 5.00249E+01
--- Occupance Error ---
9.73139E+02 +- 1.09127E+02
--- HF Energy Error ---
1.13599E+05 +- 3.06702E+04
--- Avg. Iterations ---
9.96567E+01 +- 4.85476E+00
--- Avg. Iterations W/O Non Converged ---
3.10000E+01 +- 0.00000E+00
--- Num. Not Convd. ---
200

