In [1]:
#importing libraries
import numpy as np

import qml

import matplotlib.pyplot as plt

import pandas as pd

from sklearn.utils import shuffle

#Import from functions.py file
from functions_utils import compounds
from functions_utils import file_splitter
from functions_utils import load_output


import pyro
import pyro.contrib.gp as gp
import pyro.distributions as dist
import pyro.ops.stats as stats

import torch
from torch.nn import Parameter

In [2]:
'''!!!TO RUN ONLY ONCE!!! when there is no files in splitted_data'''   
file_splitter("xyz_files/c6h6.xyz","splitted_data")

'''Importing the energies file and storing in variable '''
y_input = load_output("xyz_files/energies_benzene.xyz")

'''Getting coulomb matrix from each generated file from folder splitted_data'''
x_input=compounds("splitted_data")

In [3]:
x_input= np.array([mol.representation for mol in compounds("splitted_data")])
x=x_input[:9000]

# print(x)
x=shuffle(x)
print('Shape of input dataset:',x.shape)


Shape of input dataset: (9000, 78)


In [4]:
#converting dataset to tensor type from numpy
in_data=torch.tensor(x, dtype=torch.get_default_dtype())
y=in_data.t()
print('Shape of input dataset after transposing:',y.shape)

Shape of input dataset after transposing: torch.Size([78, 9000])


In [5]:
#for clearing the parameters from previous run
pyro.clear_param_store()

#Initializing the prior mean
X_init=torch.zeros(9000,20)
X = Parameter(X_init.clone())
#Using exponential kernel for Sparse GP Regression
kernel=gp.kernels.Exponential(input_dim=20, lengthscale=torch.ones(20))

#Inducing points for Sparse GP Regression
Xu= stats.resample(X_init.clone(),800)

gplvm=gp.models.SparseGPRegression(X, y, kernel, Xu, jitter=1e-3)

In [6]:
gplvm.X = pyro.nn.PyroSample(dist.Normal(X_init, 0.1).to_event())
gplvm.autoguide("X", dist.Normal)

In [None]:
'''Uses Adam optimizer and learning rate of 0.01 by default to converge the ELBO loss for increasing likelihood'''
loss=gp.util.train(gplvm, num_steps=4500)

In [None]:
plt.plot(loss)
plt.xlabel("Training size")
plt.ylabel("Loss")
plt.show()

In [None]:
plt.loglog(loss)
plt.xlabel("Training size(log)")
plt.ylabel("Loss(log)")
plt.show()

In [None]:
#Extracting mean and standard deviation from model in latent space
gplvm.mode = "guide"
mean = gplvm.X_loc.detach().numpy()  
std_dev=gplvm.X_scale.detach().numpy()

In [None]:
'''Used for predicting new latent points'''
new_points_latent=np.random.normal(loc=np.mean(mean,axis=0),scale=np.mean(std_dev,axis=0),size=(8000,20))
print("Shape of predicting new_points in latent space", new_points_latent.shape)


# Using predicted points to predict coulomb matrix using trained model

In [27]:
'''Converting predicted points in tensor'''
new_points_latent_tensor=torch.tensor(new_points_latent,dtype=torch.get_default_dtype())

'''Using forward method from pyro to convert new predi'''
#Xnew is set as gplvm.forward() had it as **kwargs parameter 
predicted_coulomb_matrix=np.random.normal(gplvm(Xnew=new_points_latent_tensor)[0].detach().numpy(),gplvm(Xnew=new_points_latent_tensor)[1].detach().numpy())

In [28]:
print("Shape of predicted coulomb matrix",predicted_coulomb_matrix.shape)

Shape of predicted coulomb matrix (78, 8000)


In [29]:
np.save("predicted_coulomb_matrix.npy", predicted_coulomb_matrix)