#### Load Modules

In [7]:
#from loader import load_data

import sys
from loader.loader import load_data
#sys.path.append("/home/kellner/packages/project_COSMO/")
#sys.path.append("/home/kellner/packages/project_COSMO/loader")

from sklearn.pipeline import Pipeline
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.kernel_ridge import KernelRidge
from skcosmo.model_selection import atom_groups_by_frame
from sklearn.model_selection import GroupKFold
from feature_utils.parallel import get_features_in_parallel, get_optimal_radial_basis_hypers_parallel
from rascal.representations import SphericalInvariants as SOAP

from falkon import Falkon, kernels
import falkon
import torch

In [8]:
#train_structures, test_structures, train_properties, test_properties = load_data("/home/kellner/packages/project_COSMO/make_tensor_data/train_tensor/CSD-3k+S546_shift_tensors.xyz",\
                                                                                   # "/home/kellner/packages/project_COSMO/make_tensor_data/test_tensor/CSD-500+104-7_shift_tensors.xyz",selected_species=6)
train_structures, test_structures, train_properties, test_properties = load_data("./make_tensor_data/train_tensor/CSD-3k+S546_shift_tensors.xyz",\
                                                                                    "./make_tensor_data/test_tensor/CSD-500+104-7_shift_tensors.xyz",random_subsample_train=200, random_subsample_test=200, selected_species=6)


hypers = dict(soap_type="PowerSpectrum",
              interaction_cutoff=4.643,
              max_radial=8,
              max_angular=8,
              gaussian_sigma_constant=0.179,
              gaussian_sigma_type="Constant",
              radial_basis="GTO",
              normalize=True,
              cutoff_smooth_width=0.3,
              cutoff_function_type="RadialScaling",
              optimization=
                    dict(
                            Spline=dict(
                               accuracy=1.0e-05
                            )
                        ),
              cutoff_function_parameters= dict(rate=1.968,
                    scale= 2.601,
                    exponent=4.698
                        ),
              compute_gradients=False,
              expansion_by_species_method="user defined",
              global_species=[1, 6, 7, 8, 16]
              )

hypers = get_optimal_radial_basis_hypers_parallel(hypers,train_structures,expanded_max_radial=20)

Xtrain = get_features_in_parallel(train_structures,SOAP,hypers)
Xtest = get_features_in_parallel(test_structures,SOAP,hypers)

Xtrain = torch.from_numpy(Xtrain)
Ytrain = torch.from_numpy(train_properties).reshape(-1, 1)
Xtest = torch.from_numpy(Xtest)
Ytest = torch.from_numpy(test_properties).reshape(-1, 1)

structure_groups = atom_groups_by_frame(train_structures)


In [13]:
estimator = falkon.Falkon(
    kernel=falkon.kernels.GaussianKernel(sigma=1), penalty=1e-3, M=1000,  # Mandatory parameters, will be overridden
    maxiter=10, options=falkon.FalkonOptions(use_cpu=True))


In [14]:
estimator.get_params()

{'M': 1000,
 'center_selection': <falkon.center_selection.UniformSelector at 0x7fc9304cfb80>,
 'error_every': 1,
 'error_fn': None,
 'kernel': GaussianKernel(sigma=Parameter containing:
 tensor([1.], dtype=torch.float64)),
 'maxiter': 10,
 'options': FalkonOptions(keops_acc_dtype='auto', keops_sum_scheme='auto', keops_active='auto', keops_memory_slack=0.7, chol_force_in_core=False, chol_force_ooc=False, chol_par_blk_multiplier=2, pc_epsilon_32=1e-05, pc_epsilon_64=1e-13, cpu_preconditioner=False, cg_epsilon_32=1e-07, cg_epsilon_64=1e-15, cg_tolerance=1e-07, cg_full_gradient_every=10, cg_differential_convergence=False, debug=False, use_cpu=True, max_gpu_mem=inf, max_cpu_mem=inf, compute_arch_speed=False, no_single_kernel=True, min_cuda_pc_size_32=10000, min_cuda_pc_size_64=30000, min_cuda_iter_size_32=300000000, min_cuda_iter_size_64=900000000, never_store_kernel=False, store_kernel_d_threshold=1200, num_fmm_streams=2),
 'penalty': 0.001,
 'seed': None,
 'weight_fn': None}

In [None]:
gaussian_parameter_grid = {
    'kernel': Categorical([
               falkon.kernels.GaussianKernel(sigma=0.01),
               falkon.kernels.GaussianKernel(sigma=0.1),
               falkon.kernels.GaussianKernel(sigma=1),
               falkon.kernels.GaussianKernel(sigma=5),
               falkon.kernels.GaussianKernel(sigma=10),
               falkon.kernels.GaussianKernel(sigma=15),]),
    'penalty': Real(1e-6, 1e+4, prior='log-uniform'),
    'maxiter': Categorical([5,10,20,40]),
}





estimator = falkon.Falkon(
    kernel=falkon.kernels.GaussianKernel(1), penalty=1e-3, M=1000,  # Mandatory parameters, will be overridden
    maxiter=10, options=falkon.FalkonOptions(use_cpu=True))


In [None]:


general_kernel_space = {
    "model":[KernelRidge(kernel_params={"n_jobs":-1})],
    "model__kernel": Categorical(["linear","poly","rbf","laplacian"]),
    "model__gamma": Real(1e-6, 1e+1, prior='log-uniform'),
    "model__degree": Integer(1,8),
    "model__alpha": Real(1e-6, 1e+4, prior='log-uniform'),
    "model__coef0": Integer(0,1)
}

opt = BayesSearchCV(pipe,general_kernel_space,n_iter=100,cv=GroupKFold(n_splits=3),n_jobs=-1,scoring="neg_mean_squared_error",verbose=2)

opt.fit(Xtrain, train_properties, groups=structure_groups)

print("val. score: %s" % opt.best_score_)
print("test score: %s" % opt.score(Xtest, test_properties))
print("best params: %s" % str(opt.best_params_))

In [None]:
np.