In [1]:
import torch
from sklearn.datasets import load_boston
from falkon import Falkon, kernels
from loader.loader import load_data
import falkon
import time
from rascal.representations import SphericalInvariants as SOAP
from feature_utils.parallel import get_features_in_parallel
from sklearn.compose import TransformedTargetRegressor
from skcosmo.preprocessing import StandardFlexibleScaler

options = falkon.FalkonOptions(use_cpu=True, keops_active="yes")
train_structures, test_structures, train_properties, test_properties = load_data("./make_tensor_data/train_tensor/CSD-3k+S546_shift_tensors.xyz",\
                                                                                    "./make_tensor_data/test_tensor/CSD-500+104-7_shift_tensors.xyz",selected_species=1)

hypers = dict(soap_type="PowerSpectrum",
              interaction_cutoff=4.5,
              max_radial=8,
              max_angular=8,
              gaussian_sigma_constant=0.3,
              gaussian_sigma_type="Constant",
              radial_basis="GTO",
              normalize=False,
              cutoff_smooth_width=0.3,
              cutoff_function_type="RadialScaling",
              optimization=
                    dict(
                            Spline=dict(
                               accuracy=1.0e-05
                            )
                        ),
              cutoff_function_parameters= dict(rate=1,
                    scale=2.0,
                    exponent=3.
                        ),
              compute_gradients=False,
              expansion_by_species_method="user defined",
              global_species=[1, 6, 7, 8, 16]
              )
start_time = time.time()
Xtrain = get_features_in_parallel(train_structures,SOAP,hypers,n_cores=8)
Xtest = get_features_in_parallel(test_structures,SOAP,hypers,n_cores=8)
print("--- %s seconds ---" % (time.time() - start_time))

Xtrain = torch.from_numpy(Xtrain)
Ytrain = torch.from_numpy(train_properties).reshape(-1, 1)
Xtest = torch.from_numpy(Xtest)
Ytest = torch.from_numpy(test_properties).reshape(-1, 1)


kernel = kernels.PolynomialKernel(gamma=2.,beta=0.,degree=2.)
model = Falkon(
    kernel=kernel,
    penalty=1e-3,
    M=20000,
    options=options
)


reg = TransformedTargetRegressor(regressor=model, transformer=StandardFlexibleScaler())

--- 11.20531153678894 seconds ---


In [None]:
start_time = time.time()
model.fit(Xtrain, Ytrain)
print("--- %s seconds ---" % (time.time() - start_time))

In [2]:
from sklearn.model_selection import GroupKFold
from skcosmo.model_selection import atom_groups_by_frame

In [None]:
atom_groups = atom_groups_by_frame(train_structures)

In [4]:
splits = list(GroupKFold(n_splits=5).split(Xtrain,Ytrain,groups=atom_groups))

In [10]:
type(splits[1][1])

numpy.ndarray

In [5]:
estimator = falkon.Falkon(
    kernel=kernels.PolynomialKernel(gamma=1.,beta=0.,degree=3.), penalty=1e-3, M=10000,  # Mandatory parameters, will be overridden
    maxiter=10, options=falkon.FalkonOptions(use_cpu=True))

In [3]:
import numpy as np

In [4]:
from sklearn.model_selection import cross_val_score

In [8]:

start_time = time.time()
score = -np.mean(cross_val_score(estimator, Xtrain, Ytrain, cv=splits, n_jobs=1,
                                    scoring="neg_mean_squared_error"))
print(score)
print("--- %s seconds ---" % (time.time() - start_time))

2.0899512074267608
--- 363.4872148036957 seconds ---


In [5]:
import falkon.hopt
from falkon import FalkonOptions
from falkon.hopt.objectives import NystromCompReg
import numpy as np

In [6]:
def rmse(true, pred):
    return torch.sqrt(torch.mean((true.reshape(-1, 1) - pred.reshape(-1, 1))**2))

In [8]:
sigma_init = torch.tensor([1.0] * Xtrain.shape[1]).requires_grad_()
penalty_init = torch.tensor(1e-5)
centers_init = Xtrain[np.random.choice(Xtrain.shape[0], size=(5000, ), replace=False)].clone()
options = falkon.FalkonOptions(use_cpu=True, max_cpu_mem=20*1e09)
kernel = falkon.kernels.GaussianKernel(sigma=sigma_init, opt=options)
#kernels.PolynomialKernel(gamma=2.,beta=0.,degree=2.,opt=options)
model = NystromCompReg(
    kernel=kernel, penalty_init=penalty_init, centers_init=centers_init,  # The initial hp values
    opt_penalty=True, opt_centers=True,  # Whether the various hps are to be optimized
    )

opt_hp = torch.optim.Adam(model.parameters(), lr=0.1)

In [None]:
tr_loss, tr_err = [], []

for epoch in range(50):
    opt_hp.zero_grad()
    loss = model(Xtrain, Ytrain)
    loss.backward()
    opt_hp.step()

    tr_loss.append(loss.item())
    tr_err.append(rmse(Ytrain, model.predict(Xtrain)))
    print(f"Epoch {epoch} Loss {tr_loss[-1]:.3f} Error {tr_err[-1] * 100:.2f}%")

In [9]:
10775360000/1e06

10775.36

In [3]:
falkon.__version__

'0.7.1'

In [3]:
train_pred = model.predict(Xtrain).reshape(-1, )
test_pred = model.predict(Xtest).reshape(-1, )

def rmse(true, pred):
    return torch.sqrt(torch.mean((true.reshape(-1, 1) - pred.reshape(-1, 1))**2))

print("Training RMSE: %.3f" % (rmse(train_pred, Ytrain)))
print("Test RMSE: %.3f" % (rmse(test_pred, Ytest)))



Training RMSE: 1.169
Test RMSE: 1.127


In [12]:
model.get_params()

{'M': 10000,
 'center_selection': <falkon.center_selection.UniformSelector at 0x7f2e4d88a130>,
 'error_every': 1,
 'error_fn': None,
 'kernel': PolynomialKernel(beta=tensor([0.], dtype=torch.float64), gamma=tensor([2.], dtype=torch.float64), degree=tensor([2.], dtype=torch.float64)),
 'maxiter': 20,
 'options': FalkonOptions(keops_acc_dtype='auto', keops_sum_scheme='auto', keops_active='no', keops_memory_slack=0.7, chol_force_in_core=False, chol_force_ooc=False, chol_par_blk_multiplier=2, pc_epsilon_32=1e-05, pc_epsilon_64=1e-13, cpu_preconditioner=False, cg_epsilon_32=1e-07, cg_epsilon_64=1e-15, cg_tolerance=1e-07, cg_full_gradient_every=10, cg_differential_convergence=False, debug=False, use_cpu=True, max_gpu_mem=inf, max_cpu_mem=inf, compute_arch_speed=False, no_single_kernel=True, min_cuda_pc_size_32=10000, min_cuda_pc_size_64=30000, min_cuda_iter_size_32=300000000, min_cuda_iter_size_64=900000000, never_store_kernel=False, store_kernel_d_threshold=1200, num_fmm_streams=2),
 'penal

In [14]:
model.set_params(degree=3.)

ValueError: Invalid parameter degree for estimator Falkon(M=10000, center_selection=<falkon.center_selection.UniformSelector object at 0x7f2e4d88a130>, kernel=PolynomialKernel(beta=tensor([0.], dtype=torch.float64), gamma=tensor([2.], dtype=torch.float64), degree=tensor([2.], dtype=torch.float64)), options=FalkonOptions(keops_acc_dtype='auto', keops_sum_scheme='auto', keops_active='no', keops_memory_slack=0.7, chol_force_in_core=False, chol_force_ooc=False, chol_par_blk_multiplier=2, pc_epsilon_32=1e-05, pc_epsilon_64=1e-13, cpu_preconditioner=False, cg_epsilon_32=1e-07, cg_epsilon_64=1e-15, cg_tolerance=1e-07, cg_full_gradient_every=10, cg_differential_convergence=False, debug=False, use_cpu=True, max_gpu_mem=inf, max_cpu_mem=inf, compute_arch_speed=False, no_single_kernel=True, min_cuda_pc_size_32=10000, min_cuda_pc_size_64=30000, min_cuda_iter_size_32=300000000, min_cuda_iter_size_64=900000000, never_store_kernel=False, store_kernel_d_threshold=1200, num_fmm_streams=2), penalty=0.001). Check the list of available parameters with `estimator.get_params().keys()`.