In [1]:
import numpy as np
import qml
import sys
sys.path.insert(0, '/home/misa/APDFT/prototyping/atomic_energies/')
import qml_interface as qmi
import glob
import re

import matplotlib
matplotlib.use("Qt5Agg")
from matplotlib import pyplot as plt

# data selection

In [2]:
# kernel
kernel_name = 'full_kernel_sig18.73817422860383'
kernel_full = np.loadtxt('/home/misa/APDFT/prototyping/atomic_energies/results/analyse_learning/FCHL/kernels/'+kernel_name)

In [108]:
# labels
data, molecule_size = qmi.load_alchemy_data(qmi.wrapper_alch_data())
prop = 'alch_pot'
labels = qmi.generate_label_vector(data, molecule_size.sum(), value=prop)

In [109]:
# test setup
lam = 5e-11
# tr_size = 500
delta = False
num_cross = 3
tr_sl = dict.fromkeys(np.logspace(0, 9, 10, base=2), 0)

In [110]:
for tr_size in tr_sl:
    # cross validation
    test_error_cv = []
    for n in range(num_cross):
        # select training and test data
        global_tr, global_test = qmi.get_indices(len(molecule_size), int(tr_size))
        tr_ind = qmi.get_local_idx(global_tr, molecule_size)
        test_ind = qmi.get_local_idx(global_test, molecule_size)
        tr_kernel, test_kernel = qmi.split_kernel(kernel_full, tr_ind, test_ind)

        # train kernel
        tr_kernel += np.identity(len(tr_kernel))*lam # add regularizer

        if delta: # get labels
            prop_mean = qmi.get_average_property(tr_ind, data, molecule_size, prop) # baseline for property
            tr_label = labels[tr_ind] - qmi.get_label_delta(prop_mean, tr_ind, data, molecule_size)
            test_label = labels[test_ind] - qmi.get_label_delta(prop_mean, test_ind, data, molecule_size)
        else:
            tr_label = labels[tr_ind]
            test_label = labels[test_ind]

        coeffs = qml.math.cho_solve(tr_kernel, tr_label) # get coefficients

        # error calculation
        pred_tr_label = np.dot(tr_kernel, coeffs) # training error
        tr_error = np.abs(tr_label-pred_tr_label)
        mean_tr_error = tr_error.mean()

        pred_test_label = np.dot(test_kernel, coeffs) # test error
        test_error = np.abs(test_label-pred_test_label)
        test_error_cv.append(test_error.mean())
    tr_sl[tr_size] = np.array(test_error_cv).mean()

In [112]:
lcurve = np.array(list(tr_sl.items()))
basepath = '/home/misa/APDFT/prototyping/atomic_energies/results/analyse_learning/FCHL/delta_learning/alch_pot/'
if delta:
    np.savetxt(basepath+'delta_lcurve', lcurve, delimiter='\t', header='{}-fold cv, sigma = 18, lambda = {}'.format(num_cross, lam))
else:
    np.savetxt(basepath+'lcurve', lcurve, delimiter='\t', header='{}-fold cv, sigma = 18, lambda = {}'.format(num_cross, lam))
    

In [114]:
basepath = '/home/misa/APDFT/prototyping/atomic_energies/results/analyse_learning/FCHL/delta_learning/atomisation/'

lcurve = np.loadtxt(basepath+'lcurve')
delta_lcurve = np.loadtxt(basepath+'delta_lcurve')

fig, ax = plt.subplots(1,1)

ax.plot(lcurve[:,0], lcurve[:,1],'-o', label='direct')
ax.plot(delta_lcurve[:,0], delta_lcurve[:,1],'-o', label='delta')
ax.set_xscale('log')
ax.set_yscale('log')

ax.set_xlabel('training points')
ax.set_ylabel('MAE (Ha)')

ax.legend()

<matplotlib.legend.Legend at 0x7f1552f346a0>