In [1]:
import qml
import numpy as np

import sys
sys.path.insert(0, '/home/misa/APDFT/prototyping/atomic_energies/')
import alchemy_tools as alch
import qml_interface as qi

In [2]:
paths=qi.wrapper_alch_data()
# load data into list, count number of atoms per molecule
alchemy_data, molecule_size = qi.load_alchemy_data(paths)

# Generation of representations and labels

In [3]:
# local
local_reps = qi.generate_atomic_representations(alchemy_data, molecule_size)
local_labels = qi.generate_label_vector(alchemy_data, molecule_size.sum(), value='atomisation')
local_labels_shifted = qi.shift_by_mean_energy(local_reps, local_labels)


# global
global_reps = qi.wrapper_global_representations(alchemy_data, molecule_size) # all global representations
global_labels = np.zeros(len(global_reps)) # all global labels
for idx, mol in enumerate(alchemy_data):
    global_labels[idx] = alchemy_data[idx][:,6].sum()

In [5]:

import matplotlib
matplotlib.use('Qt5Agg')
from matplotlib import pyplot as plt
plt.rcParams.update({'font.size': 22})

nuc_charges = np.power(local_reps*2, 1/2.4)[:,0].astype(int)
# idc_c = np.where(nuc_charges==6)
# plt.plot(local_labels[idc_c])

# idc_h = np.where(nuc_charges==1)
# plt.plot(local_labels[idc_h])

# idc_n = np.where(nuc_charges==7)
# plt.plot(local_labels[idc_n])

# idc_o = np.where(nuc_charges==8)
# plt.plot(local_labels[idc_o])

shifted = qi.shift_by_mean_energy(local_reps, local_labels)

for pl in set(nuc_charges):
    i = np.where(nuc_charges==pl)
    plt.plot(shifted[i])

In [4]:
ref=local_labels[qi.get_local_idx([10], molecule_size)].sum()

In [5]:
nuc_charges = np.power(local_reps*2, 1/2.4)[:,0].astype(int)
set(nuc_charges)
np.where(nuc_charges==1)

(array([   6,    7,    8, ..., 7573, 7582, 7583]),)

# Cross-validation

In [44]:
sizes = [10, 50, 100, 200, 300, 400, 500]
errors = []
for i in sizes:
    errors.append(qi.crossvalidate_local(len(alchemy_data), i, local_reps, local_labels, molecule_size))

In [49]:
errors_atomic = errors

In [80]:
errors_atomic_local = np.zeros((len(errors_atomic),2))
for i in range(0, len(errors_atomic)):
    errors_atomic_local[i] = errors_atomic[i][0]
errors_atomic_local

array([[0.38235913, 0.04973001],
       [0.10633949, 0.01609039],
       [0.05686432, 0.0054599 ],
       [0.03660741, 0.00296098],
       [0.03101996, 0.00582005],
       [0.02781766, 0.00586551],
       [0.02546925, 0.0055068 ]])

In [78]:
errors_atomic_local

array([[0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.]])

In [81]:
errors_atomic_global = np.zeros((len(errors_atomic),2))
for i in range(0, len(errors_atomic)):
    errors_atomic_global[i] = errors_atomic[i][1]

In [46]:
sizes = [10, 50, 100, 200, 300, 400, 500]
errors_shifted = []
for i in sizes:
    errors_shifted.append(qi.crossvalidate_local(len(alchemy_data), i, local_reps, local_labels_shifted, molecule_size))

In [83]:
errors_shifted_local = np.zeros((len(errors_atomic),2))
for i in range(0, len(errors_atomic)):
    errors_shifted_local[i] = errors_shifted[i][0]
errors_shifted_local

array([[0.77517398, 0.04575044],
       [0.29714426, 0.04303408],
       [0.15203697, 0.02420382],
       [0.06685523, 0.00679266],
       [0.04851345, 0.01105046],
       [0.03949337, 0.00738924],
       [0.03339172, 0.0084296 ]])

In [84]:
errors_shifted_global = np.zeros((len(errors_atomic),2))
for i in range(0, len(errors_atomic)):
    errors_shifted_global[i] = errors_shifted[i][1]
errors_shifted_global

array([[5.50027039, 0.65339766],
       [2.45648134, 0.2191541 ],
       [1.35086413, 0.22157257],
       [0.61285078, 0.090267  ],
       [0.41503038, 0.12397733],
       [0.30988661, 0.07485668],
       [0.25529421, 0.07161679]])

In [52]:
sizes = [10, 50, 100, 200, 300, 400, 500]
errors_global = []
for i in sizes:
    
    errors_global.append(qi.crossvalidate(len(alchemy_data), i, global_reps, global_labels, molecule_size, mode='global')[0])


In [129]:
fig, ax = plt.subplots(1,1)
ax.plot(sizes, errors_global, '-o', label='global rep')
ax.plot(sizes, errors_atomic_global[:,0], '-o', label='local rep', color='red')
ax.plot(sizes, errors_atomic_local[:,0], '--o', label='local rep atomic', color='red')
ax.plot(sizes, errors_shifted_global[:,0], '-o', label='local rep shifted', color='green')
ax.plot(sizes, errors_shifted_local[:,0], '--o', label='local rep shifted atomic', color='green')
ax.legend()
ax.set_xlabel('Training set size')
ax.set_xscale('log')
ax.set_ylabel('Mean error per molecule')
ax.set_yscale('log')

In [131]:
fig, ax = plt.subplots(1,1)
ax.errorbar(sizes, errors_global, yerr=None, ls='-', marker='o', label='global rep')
ax.errorbar(sizes, errors_atomic_global[:,0], yerr=errors_atomic_global[:,1], ls='solid', marker='o',label='local rep', color='red')
ax.errorbar(sizes, errors_atomic_local[:,0], ls='--', marker='o', label='local rep atomic', color='red', yerr=errors_atomic_local[:,1])
ax.errorbar(sizes, errors_shifted_global[:,0], ls='-', marker='o', label='local rep shifted', color='green', yerr=errors_shifted_global[:,1])
ax.errorbar(sizes, errors_shifted_local[:,0], ls='--', marker='o', label='local rep shifted atomic', color='green', yerr=errors_shifted_local[:,1])
ax.legend()
ax.set_xlabel('Training set size')
ax.set_xscale('log')
ax.set_ylabel('Mean error per molecule')
ax.set_yscale('log')

In [116]:
fig, ax = plt.subplots(1,1)
ax.errorbar(sizes, errors_atomic_global[:,0], yerr=errors_atomic_global[:,1])
ax.errorbar(sizes, errors_atomic_global[:,0])
ax.set_yscale('log')
ax.set_xscale('log')

# Select data

In [7]:
training_set_size = 100
total_set_size = len(alchemy_data)
global_rep_bool = False

# split molecule indices in training and validation
global_idc = qi.get_indices(total_set_size, training_set_size)
# get the indices of the corresponding atomic representations, labels
local_idc = qi.get_local_idx(global_idc[0], molecule_size), qi.get_local_idx(global_idc[1], molecule_size)

# select the data
if global_rep_bool:
    rep = global_reps[global_idc[0]], global_reps[global_idc[1]]
    labels = global_labels[global_idc[0]], global_labels[global_idc[1]]
else:
    rep = local_reps[local_idc[0]], local_reps[local_idc[1]] # select the representations
    labels = local_labels[local_idc[0]], local_labels[local_idc[1]] # select the labels

# optimize hyperparameters

In [8]:
rep = local_reps[local_idc[0]], local_reps[local_idc[1]] # select the representations
labels = local_labels[local_idc[0]], local_labels[local_idc[1]] # select the labels
sigmas = np.logspace(-1, 4, 12).tolist() #14)
lams = np.logspace(-15, 0, 16).tolist()#16)
out = qi.optimize_hypar(rep, labels, sigmas, lams)

In [14]:
# store output
out[0]

array([[1.00000000e-01, 1.00000000e-15, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-14, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-13, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-12, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-11, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-10, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-09, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-08, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-07, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-06, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-05, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-04, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-03, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-02, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e-01, 4.44204377e+00],
       [1.00000000e-01, 1.00000000e+00, 4.44204377e+00],
       [2.84803587e-01, 1.00000000e-15, 4.44204377e+00],
       [2.84803587e-01, 1.00000

In [16]:
# optimum sigma, lambda and mean error for the optimum hyperparameters
out[0][np.where(out[0]==np.amin(out[0][:,2]))[0]][0]

# coefficient for optimum hyperparameters
coeffs = out[1]

(array([136]), array([2]))

In [26]:
out[0][np.where(out[0]==np.amin(out[0][:,2]))[0]][0,2]


0.04810685008524246

array([4.32876128e+02, 1.00000000e-07, 4.81068501e-02])