In [None]:
import numpy as np
import sys, os  
import quippy as qp

In [None]:
from ase.io import read,write
from ase.visualize import view

In [None]:
sys.path.insert(0,'../')
sys.path.insert(0,'../tools')
from GlobalSimilarity import get_environmentalKernels, get_globalKernel
from CV import CrossValidation
from krr import KRR,dump_json,load_json,dump_data,load_data,score

# Compute a kernel and save it

In [None]:
frames = read('small_molecules.xyz',index=':')[0:100]

In [None]:
soap_params = dict(cutoff=3, nmax=6, lmax=6, gaussian_width=0.4,
                    cutoff_transition_width=0.5, centerweight=1.,nocenters=[],
                   chem_channels=True, is_fast_average=False,
                   islow_memory=False,nthreads=1,nchunks=1,nprocess=1)

environmentalKernels = get_environmentalKernels(atoms=frames,**soap_params)

In [None]:
kernel_params = dict(kernel_type='average', zeta=2, normalize_global_kernel=True)
globalKernel = get_globalKernel(environmentalKernels,**kernel_params)

In [None]:
prefix = './'
fn = 'my_kernel_matrix'
metadata = dict(soap_params=soap_params,fn=fn+'.npy',
                kernel_params=kernel_params)
dump_data(prefix+fn+'.json',metadata,globalKernel)

# Train and save a KRR model

In [None]:
params, Kmat = load_data('./my_kernel_matrix.json')

train = range(100)

Kmat_train = Kmat[np.ix_(train,train)]
y_train = np.load('./small_molecules-dHf_peratom.npy')[train]

In [None]:
model = KRR(sigma=1e-1,csi=1)
model.train(Kmat_train,y_train)

In [None]:
state = model.pack()
dump_json('./my_krr_model.json',state)

# Predict with saved model

In [None]:
params, Kmat = load_data('./my_kernel_matrix.json')

train = range(100)
test = range(50,100)

Kmat_test = Kmat[np.ix_(train,test)]

y_test = np.load('./small_molecules-dHf_peratom.npy')[test]

model_state = load_json('./my_krr_model.json')
model = KRR().unpack(model_state)

In [None]:
y_pred = model.predict(Kmat_test)

print('MAE={:.3e} RMSE={:.3e} SUP={:.3e} R2={:.3e} CORR={:.3e}'.format(*score(y_pred,y_test)))

# Kfold CV

In [None]:
params, Kmat = load_data('./my_kernel_matrix.json',mmap_mode=None)
y = np.load('./small_molecules-dHf_peratom.npy')[:100]
params = dict(sigma=1e-1,csi=1)

In [None]:
scoreTest, err_scoreTest = CrossValidation(Kmat,y,params,Nfold=4,seed=10)
print('MAE={:.3e} RMSE={:.3e} SUP={:.3e} R2={:.3e} CORR={:.3e}'.format(*scoreTest))