In [51]:
%config IPCompleter.use_jedi = False
from loader import load_data
import numpy as np
from sklearn.linear_model import RidgeCV
from rascal.representations import SphericalInvariants as SOAP
from rascal.utils import get_optimal_radial_basis_hypers
from rascal.neighbourlist.structure_manager import mask_center_atoms_by_id
from skcosmo.model_selection import atom_groups_by_frame
from sklearn.linear_model import LinearRegression, Ridge
from copy import deepcopy
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize
from sklearn.model_selection import GroupKFold
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score

In [91]:
train_structures, test_structures, train_properties, test_properties = load_data("./make_tensor_data/train_tensor/CSD-3k+S546_shift_tensors.xyz",\
                                                                                    "./make_tensor_data/test_tensor/CSD-500+104-7_shift_tensors.xyz",selected_species=1,random_subsample_test=200)

In [29]:
class BufferedSOAPFeatures:
    def __init__(self, structures, calculator_params, calculator=SOAP):
        self.X = None
        self.structures = structures
        self.calculator = calculator
        self.calculator_params = calculator_params

    def get_features(self, update_params):
        
        updated_params = self.calculator_params.copy()
        
        for key, value in update_params.items():
            
            if isinstance(value, np.integer):
                value = int(value)
            if isinstance(value, np.floating):
                value = float(value)
            if isinstance(value, np.ndarray):
                value = value.tolist()
                
            updated_params[key] = value

        
        if self.X is None:
            
            #print("Initial calculation")
            self.X = get_features_in_parallel(self.structures,self.calculator,updated_params)
        
        else:
            
            if updated_params == self.calculator_params:
                #print("Stored")
                pass
            else:
                #print("Recalculate")
                self.X = get_features_in_parallel(self.structures,self.calculator,updated_params)
        
        self.calculator_params = updated_params
        
        return self.X

In [3]:
hypers = dict(soap_type="PowerSpectrum",
              interaction_cutoff=3.,
              max_radial=8,
              max_angular=8,
              gaussian_sigma_constant=0.3,
              gaussian_sigma_type="Constant",
              radial_basis="GTO",
              normalize=True,
              cutoff_smooth_width=0.3,
              optimization=
                    dict(
                            Spline=dict(
                               accuracy=1.0e-05
                            )
                        ),
              compute_gradients=False,
              
              )

In [3]:
SOAP_HYPERS = {
    "soap_type": "PowerSpectrum",
    "interaction_cutoff": 4.5,
    "max_radial": 9,
    "max_angular": 9,
    "gaussian_sigma_constant": 0.1,
    "gaussian_sigma_type": "Constant",
    "cutoff_smooth_width": 0.5,
    "radial_basis": "GTO",
    "expansion_by_species_method": "user defined",
    "global_species": [1, 6, 7, 8],
    "compute_gradients": False,
    "normalize": True,
}

In [51]:
len(list(combinations_with_replacement(SOAP_HYPERS["global_species"],2)))

10

In [31]:
mysoap = SOAP(**SOAP_HYPERS)

In [48]:
combinations_with_replacement(SOAP_HYPERS["global_species"],2)

<itertools.combinations_with_replacement at 0x7fe1158b9db0>

In [35]:
mysoap.transform(train_structures).get_features(mysoap).shape

(7667, 12150)

In [36]:
mysoap.get_num_coefficients()

810

In [37]:
12150/810

15.0

In [33]:
hypers = dict(soap_type="PowerSpectrum",
              interaction_cutoff=3.,
              max_radial=4,
              max_angular=4,
              gaussian_sigma_constant=0.3,
              gaussian_sigma_type="Constant",
              radial_basis="GTO",
              normalize=True,
              cutoff_smooth_width=0.3,
              optimization=
                    dict(
                            Spline=dict(
                               accuracy=1.0e-05
                            )
                        ),
              compute_gradients=False,
              expansion_by_species_method="user defined",
              global_species=[1, 6, 7, 8, 16]
              )

space = [Real(10**-5, 10**2, "log-uniform", name='alpha'),
        Real(0.05,1.5, "uniform", name="gaussian_sigma_constant"),
        Real(2.,4.5, "uniform", name="interaction_cutoff")]

reg = Ridge()
y = train_properties
atom_groups = atom_groups_by_frame(train_structures)
Feature_gen = BufferedSOAPFeatures(train_structures, hypers)


@use_named_args(space)
def soap_objective(**params):
    update_dict = {}
    
    new_params = params.copy()
    
    for key, value in new_params.items():
        if key in Feature_gen.calculator_params:
            #hypers[key] = value
            update_dict[key] = params.pop(key, None)
    
    #print(update_dict)
    reg.set_params(**params)
    
    print("step")
    
    X = Feature_gen.get_features(update_dict)
    #print(X.shape)
    
    #print(Feature_gen.hypers["max_angular"])
    splits = list(GroupKFold(n_splits=5).split(X,y,groups=atom_groups))
    
    return -np.mean(cross_val_score(reg, X, y, cv=splits, n_jobs=-1,
                                    scoring="neg_mean_squared_error"))

res_gp = gp_minimize(soap_objective, space, n_calls=10, random_state=0)

step
step
step
step
step
step
step
step
step
step


In [82]:
len(y)

3662

134692

In [40]:
reg.alpha

2.043192897589359

In [42]:
paramdict = {}
for param in zip(space,res_gp.x):
    paramdict[param[0].name] = param[1]

In [43]:
paramdict

{'alpha': 0.0033970924303804028,
 'gaussian_sigma_constant': 0.05557628804633241,
 'interaction_cutoff': 3.6601119046746073}

In [28]:
import cProfile

In [30]:
cProfile.run("gp_minimize(soap_objective, space, n_calls=10, random_state=0)")

         551440 function calls (545703 primitive calls) in 168.840 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     2935    0.003    0.000    0.030    0.000 <__array_function__ internals>:2(all)
      117    0.000    0.000    0.009    0.000 <__array_function__ internals>:2(allclose)
      141    0.000    0.000    0.001    0.000 <__array_function__ internals>:2(alltrue)
        1    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(amax)
       19    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(amin)
      422    0.000    0.000    0.005    0.000 <__array_function__ internals>:2(any)
      145    0.000    0.000    0.002    0.000 <__array_function__ internals>:2(append)
        1    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(argmax)
     2014    0.001    0.000    0.004    0.000 <__array_function__ internals>:2(argmin)
       13    0.000    0.000    0.002    0.000 

In [None]:
import matplotlib.pyplot as plt

In [None]:
from skopt.plots import plot_convergence


plot_convergence(res_gp)
plt.xlim(3,50)
plt.ylim(0.8,1.)

In [None]:
reg.

In [41]:
res_gp.x

[0.0033970924303804028, 0.05557628804633241, 3.6601119046746073]

In [26]:
np.sqrt(0.8)

0.8944271909999159

In [6]:

def split(a, n):
    k, m = divmod(len(a), n)
    return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))


In [54]:
list(split(range(10),3))

[range(0, 4), range(4, 7), range(7, 10)]

In [48]:
help(divmod)

Help on built-in function divmod in module builtins:

divmod(x, y, /)
    Return the tuple (x//y, x%y).  Invariant: div*y + mod == x.



In [4]:
from helpers import grouper

In [5]:

from joblib import Parallel, delayed

In [101]:
import joblib
from itertools import combinations_with_replacement
from joblib import Parallel, delayed, parallel_backend
from helpers import grouper
from joblib.externals.loky import set_loky_pickler

def split(a, n):
    #splits a list into n chunks, fails if len(a) is 0
    k, m = divmod(len(a), n)
    return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))

def get_features(frames,calculator,hypers):
    calculatorinstance = calculator(**hypers)
    #print("worker spawned")
    return calculatorinstance.transform(frames).get_features(calculatorinstance)

def get_features_by_species(frames,calculator,hypers):
    calculatorinstance = calculator(**hypers)
    #print("worker spawned")
    return calculatorinstance.transform(frames).get_features_by_species(calculatorinstance)


def get_features_in_parallel(frames,calculator,hypers,blocks=4):
    """helper function that returns the features of a calculator (from calculator.transform())
       in parallel
    """
    
    #block is necessary to ensure that shape of the chunks is equal
    #replace by get_atomic_species functions
    

    with parallel_backend(backend="threading"):
        results = Parallel(n_jobs=joblib.cpu_count())(delayed(get_features)(frame, calculator, hypers) for frame in grouper(25,frames))
    
    return np.concatenate(results)

def get_features_in_parallel_by_species(frames,calculator,hypers,blocksize=25):
    """helper function that returns the features of a calculator (from calculator.transform())
       in parallel
    """
    
    #block is necessary to ensure that shape of the chunks is equal
    #replace by get_atomic_species functions
    

    with parallel_backend(backend="threading"):
        intermediate_results = Parallel(n_jobs=joblib.cpu_count())(delayed(get_features_by_species)(frame, calculator, hypers) for frame in grouper(blocksize,frames))
    
    results = {}
    
    for key in intermediate_results[0].keys():
        results[key] = np.concatenate([chunk[key] for chunk in intermediate_results])
    
    return results


In [None]:
mydict = {"a","b"}

In [9]:
feat_gen = SOAP(**SOAP_HYPERS)

In [32]:
my_feat = feat_gen.transform(train_structures[0]).get_features_by_species(feat_gen)

In [33]:
my_feat.keys()

dict_keys([(1, 1), (1, 6), (1, 7), (1, 8), (6, 6), (6, 7), (6, 8), (7, 7), (7, 8), (8, 8)])

In [5]:
def _get_features_in_parallel(frames,calculator,blocksize=100,n_jobs=-1):
    """helper function that returns the features of a calculator (from calculator.transform())
       in parallel
    """
    #for np.concatenate. arrays in list should all have same shape
    hypers = calculator.hypers
    hypers["expansion_by_species_method"] = "user defined"
    hypers["global_species"] = get_all_species(frames).tolist()
    calculator.update_hyperparameters(**hypers)
    return np.concatenate(Parallel(n_jobs=2)(delayed(retrieve_features)(calculator, chunk)\
                                              for chunk in grouper(blocksize,frames)))


In [None]:
grouper(25,frames)

In [19]:
joblib.cpu_count()

8

In [8]:
import time

In [None]:
grouper(50,frames)

In [37]:
Parallel(n_jobs=2,verbose=1)(delayed(np.sqrt)(i ** 2) for i in range(10))

[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  10 out of  10 | elapsed:    0.5s finished


[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]

In [36]:
from rascal.utils import get_optimal_radial_basis_hypers

In [55]:
SOAP_HYPERS = {
    "soap_type": "PowerSpectrum",
    "interaction_cutoff": 4.5,
    "max_radial": 9,
    "max_angular": 9,
    "gaussian_sigma_constant": 0.1,
    "gaussian_sigma_type": "Constant",
    "cutoff_smooth_width": 0.5,
    "radial_basis": "GTO",
    "expansion_by_species_method": "user defined",
    "global_species": [1, 6, 7, 8],
    "compute_gradients": False,
    "normalize": False,
}

SOAP_HYPERS = get_optimal_radial_basis_hypers(SOAP_HYPERS, train_structures, expanded_max_radial=30)

In [58]:
SOAP_HYPERS = {
    "soap_type": "PowerSpectrum",
    "interaction_cutoff": 4.5,
    "max_radial": 9,
    "max_angular": 0,
    "gaussian_sigma_constant": 0.1,
    "gaussian_sigma_type": "Constant",
    "cutoff_smooth_width": 0.5,
    "radial_basis": "GTO",
    "expansion_by_species_method": "user defined",
    "global_species": [1, 6, 7, 8],
    "compute_gradients": False,
    "normalize": False,
}

start_time = time.time()
SOAP_HYPERS = get_optimal_radial_basis_hypers(SOAP_HYPERS, train_structures, expanded_max_radial=30)
print("--- %s seconds ---" % (time.time() - start_time))

--- 5.575486183166504 seconds ---


In [47]:
sys.getsizeof(SOAP_HYPERS["optimization"])

232

In [53]:
sys.getsizeof(SOAP_HYPERS["optimization"]["RadialDimReduction"]["projection_matrices"])

232

In [45]:
import sys

In [46]:
sys.getsizeof(SOAP_HYPERS)

640

In [35]:




start_time = time.time()
get_features_in_parallel(train_structures,calculator=SOAP,hypers=SOAP_HYPERS)
print("--- %s seconds ---" % (time.time() - start_time))

--- 5.222474098205566 seconds ---


In [29]:
mysoap = SOAP(**SOAP_HYPERS)
soap_feats =  mysoap.transform(train_structures).get_features(mysoap)

In [30]:
np.allclose(feats,soap_feats)

True

In [35]:
split([1,2,3,4,5],5)

<generator object split.<locals>.<genexpr> at 0x7f9ac85bdcf0>

In [56]:
feats.shape

(134692, 12150)

In [36]:
type(grouper(10,np.arange(100)))

generator

In [38]:
type(split(np.arange(100),10))

generator

In [57]:
len(train_properties)

134692

In [28]:
import time

In [14]:
start_time = time.time()
_ = get_features_in_parallel(train_structures,calculator=SOAP,hypers=hypers,blocks=2)
print("--- %s seconds ---" % (time.time() - start_time))

NameError: name 'hypers' is not defined

In [99]:
import os
import copy
import numpy as np
import multiprocessing
from multiprocessing import shared_memory
from itertools import combinations_with_replacement
import ase
from rascal.representations import SphericalInvariants, SphericalExpansion

SOAP_HYPERS = {
    "soap_type": "PowerSpectrum",
    "interaction_cutoff": 4.5,
    "max_radial": 9,
    "max_angular": 9,
    "gaussian_sigma_constant": 0.1,
    "gaussian_sigma_type": "Constant",
    "cutoff_smooth_width": 0.5,
    "radial_basis": "GTO",
    "expansion_by_species_method": "user defined",
    "global_species": [1, 6, 7, 8, 16],
    "compute_gradients": False,
    "normalize": True,
}
CALCULATOR = None


def _compute_single(data):
    representation = CALCULATOR.transform(data["frame"])
    return representation.get_features(CALCULATOR)

def _compute_single_by_species(data):
    representation = CALCULATOR.transform(data["frame"])
    return representation.get_features_by_species(CALCULATOR)

def _initializer_spherical(hypers):
    global CALCULATOR
    CALCULATOR = SphericalExpansion(**hypers)
    
def _initializer(hypers):
    global CALCULATOR
    CALCULATOR = SphericalInvariants(**hypers)


def compute_soap(frames, passed_hyper, selected_features=None):
    hypers = copy.deepcopy(passed_hyper)

    if selected_features is not None:
        hypers["coefficient_subselection"] = selected_features

    calculator = SphericalInvariants(**hypers)

    if selected_features is not None:
        n_features = calculator.get_num_coefficients()
    else:
        # get_num_coefficients does not handle global_species, the factor of 10
        # comes from all the species pairs
        n_pairs = len(list(combinations_with_replacement(hypers["global_species"],2)))
        n_features = n_pairs * calculator.get_num_coefficients()

    #shape = (len(frames), n_features)

    with multiprocessing.Pool(
        processes=multiprocessing.cpu_count(),
        initializer=_initializer,
        initargs=(hypers,),
    ) as pool:

        iterator = pool.imap(
            _compute_single,
            map(
                lambda data: {"i": data[0], "frame": data[1]},
                enumerate(frames),
            ),
        )
        
        try:
            lengths = [len(frame.arrays["center_atoms_mask"].nonzero()[0]) for frame in frames]
        except:
            lengths = [len(frame) for frame in frames]
            
        cumulated_sum = np.cumsum([0, *lengths]) 
        result = np.zeros((np.sum(lengths),n_features),dtype=np.float64)
        
        for i,value in enumerate(iterator):
            result[cumulated_sum[i]:cumulated_sum[i+1]] = value

    return result

def compute_spex_by_species(frames, passed_hyper, selected_features=None):
    hypers = copy.deepcopy(passed_hyper)

    if selected_features is not None:
        hypers["coefficient_subselection"] = selected_features

    calculator = SphericalExpansion(**hypers)

    if selected_features is not None:
        n_features = calculator.get_num_coefficients()
    else:
        # get_num_coefficients does not handle global_species, the factor of 10
        # comes from all the species pairs
        n_pairs = len(list(combinations_with_replacement(hypers["global_species"],2)))
        n_features = n_pairs * calculator.get_num_coefficients()

    #shape = (len(frames), n_features)

    with multiprocessing.Pool(
        processes=multiprocessing.cpu_count(),
        initializer=_initializer_spherical,
        initargs=(hypers,),
    ) as pool:

        iterator = pool.imap(
            _compute_single_by_species,
            map(
                lambda data: {"i": data[0], "frame": data[1]},
                enumerate(frames),
            ),
        )
        
        """try:
            lengths = [len(frame.arrays["center_atoms_mask"].nonzero()[0]) for frame in frames]
        except:
            lengths = [len(frame) for frame in frames]
            
        cumulated_sum = np.cumsum([0, *lengths]) 
        result = np.zeros((np.sum(lengths),n_features),dtype=np.float64)
        
        """
        intermediate_results = []
        
        results = {}
        
        for i,value in enumerate(iterator):
            intermediate_results.append(value)
        
        for key in intermediate_results[0].keys():
            results[key] = np.concatenate([chunk[key] for chunk in intermediate_results])
        
        
        
    return results

In [20]:
8/2

4.0

In [None]:
lengths = [len(frame.arrays["center_atoms_mask"].nonzero()[0]) for frame in frames]
cumulated_sum = np.cumsum([0, *lengths]) 

In [101]:
np.sum([len(frame.arrays["center_atoms_mask"].nonzero()[0]) for frame in train_structures])

134692

In [None]:
len(list(combinations_with_replacement(SOAP_HYPERS["global_species"],2)))

In [6]:
import time

In [61]:
#multiprocessing.set_start_method("forkserver")
time = time.time()
features = compute_soap(train_structures)

CPU times: user 4 µs, sys: 3 µs, total: 7 µs
Wall time: 11.9 µs


In [24]:
start_time = time.time()
compute_soap(train_structures,SOAP_HYPERS)
print("--- %s seconds ---" % (time.time() - start_time))

--- 26.577285051345825 seconds ---


In [7]:
start_time = time.time()
mysoap = SOAP(**SOAP_HYPERS)
mysoap.transform(train_structures).get_features(mysoap)
print("--- %s seconds ---" % (time.time() - start_time))

--- 40.99152374267578 seconds ---


In [11]:
multiprocessing.cpu_count()

8

In [86]:
features.shape

(3662, 12150)

In [39]:
SOAP_HYPERS["global_species"]

[1, 6, 7]

In [106]:
from scipy.special import legendre, gamma
from copy import deepcopy
import numpy as np
import time
from rascal.representations.spherical_expansion import SphericalExpansion
from rascal.utils import get_radial_basis_covariance, get_radial_basis_pca, get_radial_basis_projections

def get_optimal_radial_basis_hypers_parallel(hypers, frames, blocksize=25,expanded_max_radial=-1):
    """
    Helper function to compute an optimal radial basis following
    Goscinski et al, arxiv:2105.08717.
    hypers: dictionary
        hyperparameters for the desired representation. "max_radial" indicates
        the desired size of the optimal basis
    frames: ase.Atoms
        a list of structures used to estimate the optimal radial basis. can also
        be given as a list of frames blocks, in which case it computes the covariance
        incrementally (useful for large expanded_max_radial and/or large framesets)
    expanded_max_radial: int
        number of intermediate basis to be used to estimate the optimal basis.
        defaults to -1, in which case it is taken to be 2*max_radial
    Returns:
    -------
    optimal_hypers: dictionary
        hyperparameters including the optimal basis projectors
    """

    spherical_expansion_hypers = deepcopy(hypers)

    # removes parameters that don't make sense for a spherical expansion
    spherical_expansion_hypers.pop("normalize", None)
    spherical_expansion_hypers.pop("soap_type", None)
    spherical_expansion_hypers.pop("compute_gradients", None)
    spherical_expansion_hypers.pop("inversion_symmetry", None)

    if "optimization" in spherical_expansion_hypers:
        spherical_expansion_hypers["optimization"].pop("RadialDimReduction", None)

    if expanded_max_radial == -1:
        expanded_max_radial = 2 * hypers["max_radial"]
    spherical_expansion_hypers["max_radial"] = expanded_max_radial

    spex = SphericalExpansion(**spherical_expansion_hypers)

    # computes density expansion coefficients and covariance (incrementally if needed)
    if not type(frames[0]) is list:
        frames = [frames]
    
    start_time = time.time()
    feats = get_features_in_parallel_by_species(frames[0],SphericalExpansion,spherical_expansion_hypers,blocksize=blocksize)
    
    #get_features_in_parallel_by_species(frames[0], calculator=SphericalExpansion, \hypers=spherical_expansion_hypers)
    #compute_spex(frames[0],spherical_expansion_hypers)
    """get_features_in_parallel(frames[0], calculator=SphericalExpansion, \
                         hypers=spherical_expansion_hypers)"""
    print("parallel: --- %s seconds ---" % (time.time() - start_time))
    
    start_time = time.time()
    feats_spex = spex.transform(frames[0]).get_features_by_species(spex)
    print("sequential: --- %s seconds ---" % (time.time() - start_time))
    
    
    
    cov = get_radial_basis_covariance(spex, feats)
    nframes = len(frames[0])
    
    for fr in frames[1:]:
        feats = spex.transform(fr).get_features_by_species(spex)
        icov = get_radial_basis_covariance(spex, feats)
        # bit perverse: both cov and icov are normalized, so we need to
        # un-normalize before accumulating
        for s in cov.keys():
            cov[s] = (cov[s] * nframes + icov[s] * len(fr)) / (nframes + len(fr))
        nframes += len(fr)

    # principal components from the covariance
    p_val, p_vec = get_radial_basis_pca(cov)

    # converts to the format suitable for hypers
    p_mat = get_radial_basis_projections(p_vec, hypers["max_radial"])

    # assemble the updated hypers
    optimal_hypers = deepcopy(hypers)
    if not "optimization" in optimal_hypers:
        optimal_hypers["optimization"] = {}
    optimal_hypers["optimization"] = {
        "RadialDimReduction": {"projection_matrices": p_mat},
    }

    if not "Spline" in optimal_hypers["optimization"]:
        optimal_hypers["optimization"]["Spline"] = {"accuracy": 1e-8}

    return optimal_hypers

In [107]:
SOAP_HYPERS = {
    "soap_type": "PowerSpectrum",
    "interaction_cutoff": 4.5,
    "max_radial": 9,
    "max_angular": 9,
    "gaussian_sigma_constant": 0.1,
    "gaussian_sigma_type": "Constant",
    "cutoff_smooth_width": 0.5,
    "radial_basis": "GTO",
    "expansion_by_species_method": "user defined",
    "global_species": [1, 6, 7, 8, 16],
    "compute_gradients": False,
    "normalize": False,
}

start_time = time.time()
SOAP_HYPERS = get_optimal_radial_basis_hypers_parallel(SOAP_HYPERS, train_structures, expanded_max_radial=20, blocksize=25)
print("--- %s seconds ---" % (time.time() - start_time))

parallel: --- 12.692618608474731 seconds ---
sequential: --- 33.23567724227905 seconds ---
--- 57.15631461143494 seconds ---


In [95]:
start_time = time.time()
SOAP_HYPERS = get_optimal_radial_basis_hypers(SOAP_HYPERS, train_structures, expanded_max_radial=20)
print("--- %s seconds ---" % (time.time() - start_time))

--- 44.46110129356384 seconds ---


In [88]:
HYPERS = {
    "interaction_cutoff": 4.5,
    "max_radial": 18,
    "max_angular": 9,
    "gaussian_sigma_constant": 0.1,
    "gaussian_sigma_type": "Constant",
    "cutoff_smooth_width": 0.5,
    "radial_basis": "GTO",
    "expansion_by_species_method": "user defined",
    "global_species": [1, 6, 7, 8, 16],
    "compute_gradients": False
}

In [108]:
start_time = time.time()
feat = get_features_in_parallel_by_species(train_structures,SphericalExpansion,HYPERS)
print("--- %s seconds ---" % (time.time() - start_time))

--- 11.498345136642456 seconds ---


In [109]:
mysoap = SphericalExpansion(**HYPERS)
my_feat = mysoap.transform(train_structures).get_features_by_species(mysoap)

In [112]:
for key in feat.keys():
    print(np.equal(feat[key],my_feat[key]))

[[ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 ...
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]]
[[ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 ...
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]]
[[ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 ...
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]]
[[ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 ...
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  

In [75]:
feat[(1,)][-1000]

array([ 1.61096077e-03,  4.12982012e-06,  7.78346713e-06,  5.31525923e-07,
        3.06907294e-06, -1.36602380e-06, -2.98652990e-06, -3.26303524e-06,
        2.73217042e-06, -5.50975307e-06, -7.05171349e-07,  3.53880289e-06,
        6.74865133e-06,  3.14175617e-07, -5.61144676e-07, -8.72513218e-07,
       -3.11290796e-06, -3.38386856e-06, -9.92018532e-07, -1.19032075e-05,
       -1.09938769e-05,  5.69084903e-06, -2.34788979e-06,  1.77574607e-06,
       -6.13369959e-06, -3.13528632e-06,  2.26332881e-06, -3.59902746e-06,
       -1.22400555e-05,  7.86851329e-06,  4.97600284e-06, -3.07423202e-07,
       -5.06172739e-06, -2.05555550e-06,  1.08991136e-06, -3.38540014e-06,
        4.83562422e-06, -1.99152681e-06,  4.13255998e-06, -2.07195307e-06,
        1.82864928e-06, -6.32409268e-06, -3.34862790e-06,  6.17707270e-06,
        3.93006261e-06,  3.22510080e-06, -5.80384756e-07,  2.06395208e-06,
        1.51530368e-06,  2.94945636e-06,  9.21527590e-07, -3.60663088e-06,
        8.41772686e-06,  

In [68]:
len(train_structures)

200

In [60]:
my_feat.keys()

dict_keys([(1,), (6,), (7,), (8,), (16,)])

In [62]:
len(myfeat[(8,)])

8010

In [61]:
myfeat.keys()

dict_keys([(1,), (6,), (7,), (8,), (16,)])

In [53]:
mysoap = SphericalExpansion(**HYPERS)
my_feat = mysoap.transform(train_structures).get_features_by_species(mysoap)

In [54]:
len(my_feat[(8,)])

8010

In [56]:
len(train_properties)

8010