In [1]:
import qml
import numpy as np

import sys
sys.path.insert(0, '/home/misa/APDFT/prototyping/atomic_energies/')
import qml_interface as qi

# Import data

In [2]:
paths=qi.wrapper_alch_data()

In [3]:
paths

['/home/misa/APDFT/prototyping/atomic_energies/results/slice_ve38/dsgdb9nsd_000227/atomic_energies.txt',
 '/home/misa/APDFT/prototyping/atomic_energies/results/slice_ve38/dsgdb9nsd_000228/atomic_energies.txt',
 '/home/misa/APDFT/prototyping/atomic_energies/results/slice_ve38/dsgdb9nsd_000272/atomic_energies.txt',
 '/home/misa/APDFT/prototyping/atomic_energies/results/slice_ve38/dsgdb9nsd_000274/atomic_energies.txt',
 '/home/misa/APDFT/prototyping/atomic_energies/results/slice_ve38/dsgdb9nsd_000275/atomic_energies.txt',
 '/home/misa/APDFT/prototyping/atomic_energies/results/slice_ve38/dsgdb9nsd_000293/atomic_energies.txt',
 '/home/misa/APDFT/prototyping/atomic_energies/results/slice_ve38/dsgdb9nsd_000391/atomic_energies.txt',
 '/home/misa/APDFT/prototyping/atomic_energies/results/slice_ve38/dsgdb9nsd_000392/atomic_energies.txt',
 '/home/misa/APDFT/prototyping/atomic_energies/results/slice_ve38/dsgdb9nsd_000393/atomic_energies.txt',
 '/home/misa/APDFT/prototyping/atomic_energies/results/

In [4]:
# load data into list, count number of atoms per molecule
alchemy_data, molecule_size = qi.load_alchemy_data(paths)
max_size = np.amax(molecule_size)

In [5]:
alchemy_data[0]

array([[  6.        ,  18.096977  ,  21.64438   ,  19.081327  ,
         -6.61849788, -39.71098726,   0.2635706 ],
       [  6.        ,  18.203213  ,  18.749054  ,  19.081635  ,
         -7.57298056, -45.43788334,  -5.46332548],
       [  6.        ,  19.554795  ,  17.743611  ,  21.438864  ,
         -6.61902749, -39.71416497,   0.2603929 ],
       [  8.        ,  15.710188  ,  17.711578  ,  18.906942  ,
         -7.40431151, -59.23449211,  17.95787275],
       [  6.        ,  19.49281   ,  17.776523  ,  16.674988  ,
         -7.10548338, -42.63290029,  -2.65834243],
       [  8.        ,  22.015057  ,  18.688861  ,  16.629396  ,
         -7.24688103, -57.97504821,  19.21731665],
       [  1.        ,  19.996976  ,  22.446056  ,  18.974885  ,
         -5.81697921,  -5.81697921,  -3.18892035],
       [  1.        ,  17.003736  ,  22.314421  ,  17.45944   ,
         -5.71357123,  -5.71357123,  -3.08551237],
       [  1.        ,  17.191433  ,  22.343892  ,  20.80822   ,
         -5.6313

# Generate Representation

In [6]:
full_matrix = qi.generate_atomic_representations(alchemy_data, molecule_size)

In [7]:
full_matrix.shape

(7584, 210)

# Calculate distance between representations

In [8]:
dist = qi.calculate_distances(full_matrix)

# Generate Label vector

In [9]:
energies = qi.generate_label_vector(alchemy_data, molecule_size.sum())

In [10]:
energies

array([ 0.2635706 , -5.46332548,  0.2603929 , ...,  3.10684318,
       -5.36082098, -5.31307228])

# Difference between labels

In [11]:
diff_shape = int(len(full_matrix)*(len(full_matrix)+1)/2)
start=0
width = len(full_matrix)
en_diff = np.empty(diff_shape)
for idx in range(0, len(full_matrix)):
    en_diff[start:start+width] = np.abs(energies[idx]-energies[idx:])
    start = start+width
    width -= 1

# Plots

In [12]:
import matplotlib
matplotlib.use('Qt5Agg')
from matplotlib import pyplot as plt
plt.rcParams.update({'font.size': 22})
# import bokeh bokeh.figures, hovertool

In [13]:
# select random indices
indices = np.random.choice(len(dist), 100000)
indices = np.sort(indices)

fig, ax = plt.subplots(1,1)

ax.plot(np.take(dist, indices), np.take(en_diff, indices), 'o')
ax.set_xlabel(r'$d_{ij} $')
ax.set_ylabel('$|e_i - e_j|$')

Text(0, 0.5, '$|e_i - e_j|$')

# Generate Kernel

In [14]:
import qml.kernels

In [15]:
full_matrix

array([[36.8581052 ,  2.90564209,  0.5       , ...,  0.        ,
         0.        ,  0.        ],
       [36.8581052 , 17.73882949, 73.51669472, ...,  0.        ,
         0.        ,  0.        ],
       [36.8581052 ,  2.90563574,  0.5       , ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [53.3587074 , 19.54993299, 53.3587074 , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.5       ,  2.92143402, 36.8581052 , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.5       ,  2.92677017, 36.8581052 , ...,  0.        ,
         0.        ,  0.        ]])

In [16]:
gaussian_kernel = qml.kernels.gaussian_kernel(full_matrix, full_matrix, 50)

In [17]:
gaussian_kernel.shape

(7584, 7584)

In [18]:
# test if kernels.gaussian_kernel does what we want by comparing the kernel elements
# to manually computed elements

In [19]:
def kernel_element(rep_matrix, i, j, sigma):
    d = np.linalg.norm(rep_matrix[i]-rep_matrix[j])
    element = np.exp(-np.power(d,2)/(2*sigma**2))
    return(element)

In [20]:
import math
math.isclose(kernel_element(full_matrix, 0, 5, 50), gaussian_kernel[0,5])

True

# Calculate Regression Coefficients

In [21]:
import qml.math

In [22]:
coefficients = qml.math.cho_solve(gaussian_kernel, energies)

In [23]:
coefficients.shape

(7584,)

# Prediction

In [58]:
# build training kernel by selecting elements from full kernel and their labels
# solve for training kernel, labels to get coefficients
# pick test data by selecting elements from full kernel that contain test element and training elements -> test kernel
# predict label and compare


In [59]:
import qml.kernels
import qml.math

In [66]:
# training
tr_ind, test_ind = qi.get_indices(len(full_matrix), tr_size=5000)

In [67]:
test_error, tr_error = qi.test(full_matrix, tr_ind, test_ind, energies, sigma=1000, lam_val=1e-8)

In [68]:
test_error[1]

0.05804520125473692

In [70]:
print('Max error', str(np.amax(test_error[0])) )
print('Min error', str(np.amin(test_error[0])))

Max error 1.197791901755969
Min error 2.8489807171183656e-05


In [71]:
tr_error[1]

8.989050636802887e-08

In [72]:
print('Max error', str(np.amax(tr_error[0])) )

Max error 4.0336330187074054e-07
