In [1]:
import torch
from sklearn.datasets import load_boston
from falkon import Falkon, kernels
from loader.loader import load_data
import falkon
import time
from rascal.representations import SphericalInvariants as SOAP
from feature_utils.parallel import get_features_in_parallel
from sklearn.compose import TransformedTargetRegressor
from skcosmo.preprocessing import StandardFlexibleScaler
from feature_utils.parallel import get_optimal_radial_basis_hypers_parallel
import numpy as np
from falkon.center_selection import FixedSelector

options = falkon.FalkonOptions(use_cpu=True, keops_active="no")
train_structures, test_structures, train_properties, test_properties = load_data("./make_tensor_data/train_tensor/CSD-3k+S546_shift_tensors.xyz",\
                                                                                    "./make_tensor_data/test_tensor/CSD-500+104-7_shift_tensors.xyz",selected_species=1)

hypers = dict(soap_type="PowerSpectrum",
              interaction_cutoff=4.643,
              max_radial=8,
              max_angular=8,
              gaussian_sigma_constant=0.179,
              gaussian_sigma_type="Constant",
              radial_basis="GTO",
              normalize=True,
              cutoff_smooth_width=0.3,
              cutoff_function_type="RadialScaling",
              optimization=
                    dict(
                            Spline=dict(
                               accuracy=1.0e-05
                            )
                        ),
              cutoff_function_parameters= dict(rate=1.968,
                    scale=2.601,
                    exponent=4.698
                        ),
              compute_gradients=False,
              expansion_by_species_method="user defined",
              global_species=[1, 6, 7, 8, 16]
              )


hypers = get_optimal_radial_basis_hypers_parallel(hypers,train_structures)

start_time = time.time()
Xtrain = get_features_in_parallel(train_structures,SOAP,hypers,n_cores=8)
Xtest = get_features_in_parallel(test_structures,SOAP,hypers,n_cores=8)
print("--- %s seconds ---" % (time.time() - start_time))

Xtrain = torch.from_numpy(Xtrain)
Ytrain = torch.from_numpy(train_properties).reshape(-1, 1)
Xtest = torch.from_numpy(Xtest)
Ytest = torch.from_numpy(test_properties).reshape(-1, 1)

indices = np.load("./PCov-FPS-sample-ids/PCOV_FPS_selected_sample_ids_6_selected_20000_n8_l8_PASSING.npy")
indices_torch = torch.from_numpy(indices).reshape(-1,1)
X_centers_init = Xtrain[indices].clone()
Y_centers_init = Ytrain[indices].clone()



--- 17.09061050415039 seconds ---


In [15]:


indices_torch = torch.from_numpy(indices).reshape(-1,1)
X_centers_init = Xtrain[indices].clone()
Y_centers_init = Ytrain[indices].clone()

selector = FixedSelector(X_centers_init,Y_centers_init,indices_torch)

kernel = kernels.GaussianKernel(sigma=1.352)

model = Falkon(
    maxiter=20,
    kernel=kernel,
    penalty=1.07e-06,
    M=20000,
    options=options,
    #center_selection=selector
        
)

In [16]:
start_time = time.time()
model.fit(Xtrain, Ytrain)
print("--- %s seconds ---" % (time.time() - start_time))

--- 157.88375782966614 seconds ---


In [17]:
train_pred = model.predict(Xtrain).reshape(-1, )
test_pred = model.predict(Xtest).reshape(-1, )

def rmse(true, pred):
    return torch.sqrt(torch.mean((true.reshape(-1, 1) - pred.reshape(-1, 1))**2))

print("Training RMSE: {}".format(rmse(train_pred, Ytrain)))
print("Test RMSE: {}".format(rmse(test_pred, Ytest)))



Training RMSE: 5.76531312083172
Test RMSE: 5.178937731049624


In [None]:
Training RMSE: 5.799836323452176
Test RMSE: 5.204630858539724


In [7]:
from sklearn.metrics import mean_squared_error

In [8]:
mean_squared_error(test_pred, Ytest,squared=False)

5.1982427007730605

In [None]:
Training RMSE: 53.89054682767797
Test RMSE: 50.388110001305485




In [None]:
Training RMSE: 53.891
Test RMSE: 50.388

In [8]:
X_centers_init.shape

torch.Size([20000, 8640])

In [10]:
torch.allclose(model.ny_points_,X_centers_init)

True

In [14]:
torch.allclose(model.ny_points_,X_centers_init)

False

In [None]:
Training RMSE: 53.884
Test RMSE: 50.378

In [5]:
from sklearn.model_selection import GroupKFold
from skcosmo.model_selection import atom_groups_by_frame

In [6]:
atom_groups = atom_groups_by_frame(train_structures)

In [26]:
splits[1]

(array([    20,     21,     22, ..., 103909, 103910, 103911]),
 array([     0,      1,      2, ..., 103811, 103812, 103813]))

In [27]:
splits[0]

(array([     0,      1,      2, ..., 103839, 103840, 103841]),
 array([    20,     21,     22, ..., 103909, 103910, 103911]))

In [28]:
splits[3]

(array([     0,      1,      2, ..., 103909, 103910, 103911]),
 array([   130,    131,    132, ..., 103769, 103770, 103771]))

In [23]:
train_structures[0].get_atomic_numbers()

array([7, 7, 7, 7, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1])

In [7]:
splits = list(GroupKFold(n_splits=5).split(Xtrain,Ytrain,groups=atom_groups))

In [10]:
type(splits[1][1])

numpy.ndarray

In [14]:
estimator = falkon.Falkon(
    kernel=kernels.PolynomialKernel(gamma=1.,beta=0.,degree=3.), penalty=1e-3, M=20000,  # Mandatory parameters, will be overridden
    maxiter=40, options=falkon.FalkonOptions(use_cpu=True))

In [15]:
import numpy as np

In [16]:
from sklearn.model_selection import cross_val_score

In [17]:

start_time = time.time()
score = -np.mean(cross_val_score(estimator, Xtrain, Ytrain, cv=splits, n_jobs=1,
                                    scoring="neg_mean_squared_error"))
print(score)
print("--- %s seconds ---" % (time.time() - start_time))

6385.565644286591
--- 685.7443106174469 seconds ---


In [5]:
import falkon.hopt
from falkon import FalkonOptions
from falkon.hopt.objectives import NystromCompReg
import numpy as np

In [6]:
def rmse(true, pred):
    return torch.sqrt(torch.mean((true.reshape(-1, 1) - pred.reshape(-1, 1))**2))

In [8]:
sigma_init = torch.tensor([1.0] * Xtrain.shape[1]).requires_grad_()
penalty_init = torch.tensor(1e-5)
centers_init = Xtrain[np.random.choice(Xtrain.shape[0], size=(5000, ), replace=False)].clone()
options = falkon.FalkonOptions(use_cpu=True, max_cpu_mem=20*1e09)
kernel = falkon.kernels.GaussianKernel(sigma=sigma_init, opt=options)
#kernels.PolynomialKernel(gamma=2.,beta=0.,degree=2.,opt=options)
model = NystromCompReg(
    kernel=kernel, penalty_init=penalty_init, centers_init=centers_init,  # The initial hp values
    opt_penalty=True, opt_centers=True,  # Whether the various hps are to be optimized
    )

opt_hp = torch.optim.Adam(model.parameters(), lr=0.1)

In [None]:
tr_loss, tr_err = [], []

for epoch in range(50):
    opt_hp.zero_grad()
    loss = model(Xtrain, Ytrain)
    loss.backward()
    opt_hp.step()

    tr_loss.append(loss.item())
    tr_err.append(rmse(Ytrain, model.predict(Xtrain)))
    print(f"Epoch {epoch} Loss {tr_loss[-1]:.3f} Error {tr_err[-1] * 100:.2f}%")

In [9]:
10775360000/1e06

10775.36

In [3]:
falkon.__version__

'0.7.1'

In [12]:
model.get_params()

{'M': 10000,
 'center_selection': <falkon.center_selection.UniformSelector at 0x7f2e4d88a130>,
 'error_every': 1,
 'error_fn': None,
 'kernel': PolynomialKernel(beta=tensor([0.], dtype=torch.float64), gamma=tensor([2.], dtype=torch.float64), degree=tensor([2.], dtype=torch.float64)),
 'maxiter': 20,
 'options': FalkonOptions(keops_acc_dtype='auto', keops_sum_scheme='auto', keops_active='no', keops_memory_slack=0.7, chol_force_in_core=False, chol_force_ooc=False, chol_par_blk_multiplier=2, pc_epsilon_32=1e-05, pc_epsilon_64=1e-13, cpu_preconditioner=False, cg_epsilon_32=1e-07, cg_epsilon_64=1e-15, cg_tolerance=1e-07, cg_full_gradient_every=10, cg_differential_convergence=False, debug=False, use_cpu=True, max_gpu_mem=inf, max_cpu_mem=inf, compute_arch_speed=False, no_single_kernel=True, min_cuda_pc_size_32=10000, min_cuda_pc_size_64=30000, min_cuda_iter_size_32=300000000, min_cuda_iter_size_64=900000000, never_store_kernel=False, store_kernel_d_threshold=1200, num_fmm_streams=2),
 'penal

In [14]:
model.set_params(degree=3.)

ValueError: Invalid parameter degree for estimator Falkon(M=10000, center_selection=<falkon.center_selection.UniformSelector object at 0x7f2e4d88a130>, kernel=PolynomialKernel(beta=tensor([0.], dtype=torch.float64), gamma=tensor([2.], dtype=torch.float64), degree=tensor([2.], dtype=torch.float64)), options=FalkonOptions(keops_acc_dtype='auto', keops_sum_scheme='auto', keops_active='no', keops_memory_slack=0.7, chol_force_in_core=False, chol_force_ooc=False, chol_par_blk_multiplier=2, pc_epsilon_32=1e-05, pc_epsilon_64=1e-13, cpu_preconditioner=False, cg_epsilon_32=1e-07, cg_epsilon_64=1e-15, cg_tolerance=1e-07, cg_full_gradient_every=10, cg_differential_convergence=False, debug=False, use_cpu=True, max_gpu_mem=inf, max_cpu_mem=inf, compute_arch_speed=False, no_single_kernel=True, min_cuda_pc_size_32=10000, min_cuda_pc_size_64=30000, min_cuda_iter_size_32=300000000, min_cuda_iter_size_64=900000000, never_store_kernel=False, store_kernel_d_threshold=1200, num_fmm_streams=2), penalty=0.001). Check the list of available parameters with `estimator.get_params().keys()`.