In [3]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import seaborn as sns

from os.path import join

plt.style.use(["seaborn", "thesis"])

# Fetch Dataset 

In [31]:
from SCFInitialGuess.utilities.dataset import extract_triu_batch, AbstractDataset
from sklearn.model_selection import train_test_split

data_path = "../../dataset/TSmall_sto3g"
postfix = "TSmall_sto3g"
dim = 26
data_path = "../../dataset/EthenT/"
postfix = "EthenT"
dim = 72
basis = "6-311++g**"
n_electrons = 16
#data_path = "../butadien/data/"
#postfix = ""
#dim = 26


def split(x, y, ind):
    return x[:ind], y[:ind], x[ind:], y[ind:]

S = np.load(join(data_path, "S" + postfix + ".npy"))
P = np.load(join(data_path, "P" + postfix + ".npy"))

index = np.load(join(data_path, "index" + postfix + ".npy"))

molecules = np.load(join(data_path, "molecules" + postfix + ".npy"))


ind = int(0.8 * len(index))
ind_val = int(0.8 * ind)


molecules = (
    molecules[:ind_val], 
    molecules[ind_val:ind], 
    molecules[ind:]
)

s_triu_norm, mu, std = AbstractDataset.normalize(S)


s_train, p_train, s_test, p_test = split(S, P, ind)
s_train, p_train, s_val, p_val = split(s_train, p_train, ind_val)

# Utilities 

## Calculate Descriptors and extract center blocks 

In [50]:
from SCFInitialGuess.utilities.constants import number_of_basis_functions as N_BASIS
from SCFInitialGuess.descriptors.coordinate_descriptors import \
    AtomicNumberWeighted, Gaussians
from SCFInitialGuess.descriptors.coordinate_descriptors import \
    Gaussians, RADIAL_GAUSSIAN_MODELS, AZIMUTHAL_GAUSSIAN_MODELS, POLAR_GAUSSIAN_MODELS
from SCFInitialGuess.utilities.dataset import extract_triu
from SCFInitialGuess.utilities.dataset import StaticDataset


def make_mask(mol, species):

    masks = []
    current_dim = 0
    for atom in mol.species:
        # calculate block range
        index_start = current_dim
        current_dim += N_BASIS[mol.basis][atom] 
        index_end = current_dim

        if atom == species:

            # calculate logical vector
            L = np.arange(dim)
            L = np.logical_and(index_start <= L, L < index_end)

            masks.append(np.logical_and.outer(L, L))
            
    
    return masks




def extract_dataset(molecules, p_batch, species):    
    
    # make mask to extract central blocks
    masks = make_mask(molecules[0], species)
    
    descriptor = AtomicNumberWeighted(
        Gaussians(*RADIAL_GAUSSIAN_MODELS["Equidistant-Broadening_1"]),
        Gaussians(*AZIMUTHAL_GAUSSIAN_MODELS["Equisitant_1"]),
        Gaussians(*POLAR_GAUSSIAN_MODELS["Equisitant_1"])
    )
    
    descriptor_values, blocks = [], []
    for p, mol in zip(p_batch, molecules):
        for mask in masks:
            blocks.append(extract_triu(
                p.copy()[mask], 
                N_BASIS[mol.basis][species]
            ))
        
        for i, atom in enumerate(mol.species):
            if atom == species:
                descriptor_values.append(
                    descriptor.calculate_atom_descriptor(
                        i, 
                        mol,
                        descriptor.number_of_descriptors
                    )
                )
            
    return descriptor_values, blocks


def make_dataset(species):
    
    inputs_test, outputs_test = extract_dataset(
        molecules[2], 
        p_test.reshape(-1, dim, dim),
        species
    )
    
    inputs_validation, outputs_validation = extract_dataset(
        molecules[1], 
        p_val.reshape(-1, dim, dim),
        species
    )

    inputs_train, outputs_train = extract_dataset(
        molecules[0], 
        p_train.reshape(-1, dim, dim),
        species
    )
    
    
    _, mu, std = StaticDataset.normalize(inputs_train + inputs_validation + inputs_test)
    
    dataset = StaticDataset(
        train=(
            StaticDataset.normalize(inputs_train, mu, std)[0], 
            np.asarray(outputs_train)
        ),
        validation=(
            StaticDataset.normalize(inputs_validation, mu, std)[0], 
            np.asarray(outputs_validation)
        ),
        test=(
            StaticDataset.normalize(inputs_test, mu, std)[0], 
            np.asarray(outputs_test)
        ),
        mu=mu,
        std=std
    )
    
    return dataset

## Networks 

In [52]:
#keras.backend.clear_session()

#activation = "elu"
#learning_rate = 1e-5

intializer = keras.initializers.TruncatedNormal(mean=0.0, stddev=0.01)

def make_model(
        structure, 
        input_dim, 
        output_dim,
        activation="elu", 
        learning_rate=1e-5
    ):

    model = keras.Sequential()

    # input layer
    model.add(keras.layers.Dense(
        structure[0], 
        activation=activation, 
        input_dim=input_dim, 
        kernel_initializer=intializer
    ))

    for layer in structure[1:]:
        model.add(keras.layers.Dense(
            layer, 
            activation=activation, 
            kernel_initializer=intializer, 
            #bias_initializer='zeros',
            #kernel_regularizer=keras.regularizers.l2(1e-8)
        ))

    #output
    model.add(keras.layers.Dense(output_dim))

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate), 
        loss='MSE', 
        metrics=['mae', 'mse']
    )
    
    return model



In [29]:
filepath = "../../models/ParticleNumberIndependent/CenterBlocks/model_C_" + postfix + ".h5"

early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_mean_squared_error", 
    min_delta=1e-7, 
    patience=200, 
    verbose=1
)

reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', 
    factor=0.5, 
    patience=50, 
    verbose=1, 
    mode='auto', 
    min_delta=1e-4, 
    cooldown=50, 
    min_lr=1e-10
)

checkpoint = keras.callbacks.ModelCheckpoint(
    filepath, 
    monitor='val_loss', 
    verbose=1, 
    save_best_only=False, 
    save_weights_only=False, 
    mode='auto', 
    period=1
)


def train_model(model, dataset):
    
    error = []
    while True:
        keras.backend.set_value(model.optimizer.lr, learning_rate)
            
        history = model.fit(
            x = dataset.training[0],
            y = dataset.training[1],
            epochs=epochs,
            shuffle=True,
            validation_data=dataset.validation, 
            verbose=0, 
            callbacks=[
                early_stopping, 
                reduce_lr
            ]
        )

        error.append(model.evaluate(
            dataset.testing[0], 
            dataset.testing[1], 
            verbose=0
        )[1])
    
    return error
    

# C  

## Compute inputs 

In [22]:
from SCFInitialGuess.utilities.constants import number_of_basis_functions as N_BASIS

dim_C = N_BASIS[basis][species]
dim_C_triu = dim_C * (dim_C + 1) // 2

In [39]:
species = "C"
dataset_C = make_dataset(species)


In [53]:
structure_C = [dim_C_triu + 200, dim_C_triu + 100, dim_C_triu + 50]

model_C = make_model(
    structure=structure_C,
    input_dim=dataset_C.training[0].shape[1],
    output_dim=dim_C_triu
)

In [54]:
train_model(model_C, dataset_C)

NameError: name 'n_resets' is not defined