In [110]:
#------------------------------------------------------------------ 
# Script to perform a MC simulation of water clusters on the NVT 
# ensemble. The potential energy is predicted by a GNN previously
# trained. 
#------------------------------------------------------------------ 

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import os

from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import MeanAbsoluteError
from tensorflow.keras.optimizers import Adam
from spektral.data import Dataset, DisjointLoader, Graph
from spektral.layers import GlobalSumPool, GATConv
import scipy.sparse as sp
from sklearn.metrics import mean_absolute_error, mean_squared_error
import random
import time

from mendeleev import H, O

In [163]:
# System 
N = 237             # number of water molecules

# Simulation parameters
nblocks = 10        # number of blocks for block-averaging
nsteps = 100       # number of steps per block
temperature = 100   # in Kelvin
pmove = 0.3         # percentage of particles to be moved in each MC step
dr_max = 0.003            # length of the movement in atomic units (Bohrs)

# Constants
kb = 3.1668114e-6   # Boltzmann constant in atomic units (Hartree/Kelvin)
beta = 1e0 / (kb * temperature)

In [139]:
# Read initial geometry 

data_dir = "../dataset_WaterClusters_big/"
file_name = data_dir+"N"+str(N)+"-T100/1000.dat"

class MyDataset(Dataset):

    # Define an auxiliar dataset with one single graph

    def __init__(self, r2_cutoff, **kwargs):

        self.r2_cutoff = r2_cutoff
        self.geometry_path = file_name
        super().__init__(**kwargs)

    # Define data matrices of a graph (x,a,e)

    def read(self):
            
        def make_graph():

            # Process the header of the geometry file
            geom_path = self.geometry_path
            gfile = open(geom_path,'r')
            line = gfile.readline()
            n_molecules = int(gfile.readline())
            line = gfile.readline()
            line = gfile.readline()
            line = gfile.readline()
            columns = line.split()
            energy = float(columns[0])
            #energy = float(columns[1]) # electron contribution
            line = gfile.readline()
            line = gfile.readline()            

            # Node features (atom type and position [x,y,z])            
            n_nodes = 3*n_molecules
            num_physical_features = 4
            num_abstract_features = 0
            num_node_features = num_physical_features + num_abstract_features
            x = np.zeros((n_nodes, num_node_features))
            pos = np.zeros((n_nodes, 3))
            for inode in range(n_nodes):
                line = gfile.readline()
                columns = line.split()
                if (inode%3==0):
                    x[inode,0] = O.atomic_number
                else:
                    x[inode,0] = H.atomic_number
                pos[inode,0:] = columns[1:]
                x[inode,1:] = pos[inode,0:]
            gfile.close()                

            # Binary Adjacency matrix (two nodes/atoms are connected if rij2 < r2_cutoff)
            a = np.zeros((n_nodes,n_nodes))
            n_edges = 0
            for iatom in range(3*n_molecules):
                for jatom in range(iatom+1,3*n_molecules):
                    rij = pos[iatom,0:] - pos[jatom,0:]
                    r2 = np.dot(rij,rij)
                    if (r2 <= self.r2_cutoff):
                        a[iatom,jatom] = 1
                        n_edges = n_edges + 1            
            a = np.maximum(a, a.T)#.astype(int) # Adjacency matrix is symetric in this case
            a = sp.csr_matrix(a)
            #a = normalized_adjacency(a, symmetric=True)
            
            # No edge features in this case
                
            # Labels
            num_labels = 1
            self.num_labels = num_labels
            y = np.zeros(num_labels,)
            y[0,] = energy

            print(str(geom_path)+"    Energy "+str(y[0,]))

            return Graph(x=x, a=a, y=y)

        # We must return a list of Graph objects
        return [make_graph()]        

d_cutoff = 6.0
d2_cutoff = d_cutoff * d_cutoff
dataset0 = MyDataset( d2_cutoff ) # dataset with initial graph
dataset_aux = MyDataset( d2_cutoff ) # auxiliar dataset

../dataset_WaterClusters_big/N237-T100/1000.dat    Energy -4.10263458
../dataset_WaterClusters_big/N237-T100/1000.dat    Energy -4.10263458


In [140]:
# Initialize the GNN model

# Config 
learning_rate = 1e-3  # Learning rate
epochs = 1  # Number of training epochs
batch_size = 1  # Batch size
n_out = 1
loader_tr = DisjointLoader(dataset0, batch_size=batch_size, epochs=epochs)

# Build the model
class Net(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = GATConv(8,attn_heads=7,dropout_rate=0,activation="relu")
        self.conv2 = GATConv(8,attn_heads=7,dropout_rate=0,activation="relu")
        self.global_pool = GlobalSumPool()
        self.dense1 = Dense(128,activation="relu")
        self.dense2 = Dense(128,activation="relu")
        self.dense = Dense(n_out)

    def call(self, inputs):
        #x, a, e, i = inputs
        x, a, i = inputs
        x = self.conv1([x, a])
        x = self.conv2([x, a])
        output = self.global_pool([x, i])
        output = self.dense1(output)
        output = self.dense2(output)
        output = self.dense(output)

        return output

model = Net()
optimizer = Adam(learning_rate)
loss_fn = MeanAbsoluteError()

@tf.function(input_signature=loader_tr.tf_signature(), experimental_relax_shapes=True)
def train_step(inputs, target):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_fn(target, predictions) + sum(model.losses)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

# fit the model one epoch
nepoch = 1
step = loss = 0
loss_train = []
for batch in loader_tr:
    step += 1
    loss += train_step(*batch)
    if step == loader_tr.steps_per_epoch:
        step = 0
        print(str(nepoch)+" Loss: {}".format(loss / loader_tr.steps_per_epoch))
        loss_train.append(loss / loader_tr.steps_per_epoch)

        nepoch = nepoch + 1
        
        loss = 0

# Load pre-trained weights

# load model weights
model.load_weights('./Models/GATConv_weights.hdf5')

# Show the model architecture
model.summary()

1 Loss: 214.49659729003906
Model: "net_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gat_conv_14 (GATConv)       multiple                  392       
                                                                 
 gat_conv_15 (GATConv)       multiple                  3304      
                                                                 
 global_sum_pool_7 (GlobalSu  multiple                 0         
 mPool)                                                          
                                                                 
 dense_21 (Dense)            multiple                  7296      
                                                                 
 dense_22 (Dense)            multiple                  16512     
                                                                 
 dense_23 (Dense)            multiple                  129       
                                  

In [141]:
# Function to evaluate the energy of a new molecular geometry

def energy_GNN(inputs):

    # Inputs are data matrices of the graph
    x, a, e = inputs

    # We save the new graph into the auxiliar dataset and define
    # a disjoint loader to evaluate model
    graph_aux = Graph(x=x,a=a,e=e,y=0)
    dataset_aux[0] = graph_aux
    loader_aux = DisjointLoader(dataset_aux, epochs=1) 
    for batch in loader_aux:
        input, y = batch
        energy_predicted = model(input, training=False)

    return float(energy_predicted)

# Check the implementation
graph0 = dataset0[0]
x = graph0.x
a = graph0.a
e = graph0.e
y = graph0.y
inputs = [x, a, e]
print( "Predicted: "+str(energy_GNN(inputs)) )


Predicted: -4.099071979522705


In [142]:
# Function to decide if a random movement is accepted with Metropolist test

exponent_guard = 75.0

def metropolis(delta):

    # Input: delta is the negative of argument of exponential

    if(delta>exponent_guard): # Too high, reject without evaluationg
        accept = False
    elif(delta<0): # downhill, accept without evaluating
        accept = True
    else: # Metropolis test
        zeta = random.uniform(0,1) # uniform random number in (0,1)
        accept = np.exp(-delta) > zeta

    return accept

In [143]:
# Function to compute adjacency matrix from the atomic positions

def adjacency(position):

    nat,ndim = position.shape
    nat = int(nat)
    nmol = int(nat/3)
    
    a = np.zeros((nat,nat))

    for iatom in range(3*nmol):
        for jatom in range(iatom+1,3*nmol):
            rij = position[iatom,:] - position[jatom,:]
            r2 = np.dot(rij,rij)
            if (r2 <= d2_cutoff):
                a[iatom,jatom] = 1
    a = np.maximum(a, a.T)#.astype(int) # Adjacency matrix is symetric in this case
    a = sp.csr_matrix(a)
    
    return a

In [164]:
# MC loop

natoms, nfeatures = x.shape
atoms_list = list(range(natoms))
natoms_move = int(pmove * natoms)

# averages per block
potential_block = np.zeros(nblocks)

# Initial energy
graph0 = dataset0[0]
x_old = graph0.x
a_old = graph0.a
e = graph0.e
y_old = graph0.y
inputs = [x_old, a_old, e]
potential_old = energy_GNN(inputs)
r_old = x[:,1:4]
#print(potential_old)

n = 0
nmoves = 0
potential_total = 0
start_time = time.time()

for iblock in range(nblocks):

    nmoves_iblock = 0
    potential_iblock = 0

    for istep in range(nsteps):

        n=n+1
        #if(istep%10==0): print(istep)
        #print(istep)

        # multi-particle move (30% of particles)

        atoms_move_list = random.sample(atoms_list, natoms_move)
        
        r_new = r_old.copy()
        #r_new[atoms_move_list,:] = (1+random.uniform(-1,1)*dr_max)*r_old[atoms_move_list,:] 
        r_new[atoms_move_list,:] = r_old[atoms_move_list,:] + random.uniform(-1,1)*dr_max
        ############### Chequear que números aleatorios son distintos
        x_new = x_old.copy()
        x_new[atoms_move_list,1:] = r_new[atoms_move_list,:]
        #x_new[atoms_move_list,1:] = (1+random.uniform(-1,1)*dr_max)*x_old[atoms_move_list,1:]
        a_new = adjacency(r_new)
        #a_new = adjacency(x_new[:,1:])
        inputs = [x_new, a_new, e]
        potential_new = energy_GNN(inputs)
        #print(potential_new)

        # Accept of reject the movement

        delta = (potential_new - potential_old) * beta
        if( metropolis(delta) ):
            potential_total = potential_total + potential_new
            potential_iblock = potential_iblock + potential_new
            nmoves = nmoves + 1    
            nmoves_iblock = nmoves_iblock + 1    
            r_old = r_new.copy()
            x_old = x_new.copy()
            potential_old = potential_new
        #print(accept)

    # accumulate block averages
    potential_block[iblock] = potential_iblock / nmoves_iblock

end_time = time.time()

exe_time = end_time-start_time # in seconds

metro_ratio = float(nmoves)/float(n)*100    # should be around 50%
potential_avrg = potential_total / float(nmoves)
print("Acceptance ratio: "+str(metro_ratio))
print("Potential average: "+str(potential_avrg))
print("Exe time (seconds): "+str(exe_time))

Acceptance ratio: 53.800000000000004
Potential average: -4.113358972684158
Exe time (seconds): 391.7228436470032


In [178]:
# Compute uncertainty with block-averaging
print(potential_block)

sigma = 0
for iblock in range(nblocks):
    sigma = sigma + (potential_block[iblock]-potential_avrg)**2
sigma = sigma / (nblocks-1)
sigma = np.sqrt(sigma)
print(sigma)
print()

print("Avrg potential (au): "+str(potential_avrg))
print("Sigma: "+str(sigma))
print()
au_to_kcalmol = 627.52
print("Avrg potential/N (kcal/mol): "+str(potential_avrg*au_to_kcalmol/N))
print("Sigma: "+str(sigma*au_to_kcalmol/N))

    

[-4.10806401 -4.10981026 -4.11174465 -4.11383096 -4.11418518 -4.1143243
 -4.1153825  -4.11646233 -4.11588108 -4.11659373]
0.002898723178916973

Avrg potential (au): -4.113358972684158
Sigma: 0.002898723178916973

Avrg potential/N (kcal/mol): -10.891202626745834
Sigma: 0.0076751340474007545
