In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib inline
from matplotlib import pylab as plt

import time
import rascal

import ase
from ase import Atoms
from ase.io import read, write
from ase.build import make_supercell
from ase.visualize import view
import numpy as np

# Descriptor related imports: compare the librascal and pyLODE versions of SOAP
import rascal.representations
import rascaline
from pylode.lib.projection_coeffs import DensityProjectionCalculator

# Introduction

This notebook is used to compare the features obtained from rascaline, librascal and pylode. The goal is to fully understand the details of the three implementations to make sure that the same coefficients from different codes agree.

# Compute the same features using 3 different libraries

### Define structure to be used for the comparison
The main differences between librascal and the pyLODE implementation are the presence of a smooth cutoff function and the potentially different order in which neighbors are stored. We thus wish to generate structures for which these two effects do not alter the coefficients too much. For this task, we use clusters of Oxygen atoms for which all atoms have a mutual distance of less than 3A. Then, even for a relatively large smearing of 1.5A, the atomic densities will be reasonably contained within a ball of cutoff radius 6 that will also be used for the cutoff.

In [3]:
frames = []
cell = np.eye(3) * 16
distances = np.linspace(1., 2.5, 10)
for d in distances:
    positions = [[1,1,1],[1+0.1*d,1+d,d+1]]
    positions = [[1,1,1],[1,1,1+d]]
    frame = Atoms('O2', positions=positions, cell=cell, pbc=True)
    frames.append(frame)

### Define common hyperparameters

In [4]:
nmax = 5
lmax = 2
rcut = 6.
smearing = 0.5

### Get the features from librascal

In [5]:
# define the parameters of the spherical expansion
hypers = dict(interaction_cutoff=rcut, 
              max_radial=nmax, 
              max_angular=lmax, 
              gaussian_sigma_constant=smearing,
              gaussian_sigma_type="Constant",
              cutoff_smooth_width=0.1,
              radial_basis="GTO",
              compute_gradients=False,
              expansion_by_species_method='structure wise',
              )

calculator_librascal = rascal.representations.SphericalExpansion(**hypers)
# compute the representation of all the structures
features_librascal = calculator_librascal.transform(frames).get_features(calculator_librascal)

### Get the features from pyLODE

In [6]:
hypers = {
    'smearing':smearing,
    'max_angular':lmax,
    'max_radial':nmax,
    'cutoff_radius':rcut,
    'potential_exponent':0,
    'radial_basis': 'gto',
    'compute_gradients':False,
    }

calculator_pylode = DensityProjectionCalculator(**hypers)
calculator_pylode.transform(frames)
features_pylode = calculator_pylode.features

### Get the features from Rascaline

In [7]:
HYPER_PARAMETERS = {
    "cutoff": rcut,
    "max_radial": nmax,
    "max_angular": lmax,
    "atomic_gaussian_width": smearing,
    "gradients": False,
    "radial_basis": {
        "Gto": {},
    },
    "cutoff_function": {
        "Step": {"width": 1e-3},
    },
}

calculator_rascaline = rascaline.SphericalExpansion(**HYPER_PARAMETERS)

# run the actual calculation
descriptor_rascaline = calculator_rascaline.compute(frames)

In [8]:
print(descriptor_rascaline.values[0].reshape(((lmax+1)**2, nmax)).T / 10.962374348347298)

[[ 0.05147185  0.          0.02122093  0.          0.          0.
   0.01512606  0.          0.        ]
 [-0.00546906  0.          0.00777222  0.          0.          0.
   0.00809737  0.          0.        ]
 [-0.00079393  0.         -0.00222316  0.          0.          0.
  -0.00188237  0.          0.        ]
 [-0.00095734  0.          0.00110087  0.          0.          0.
   0.00109044  0.          0.        ]
 [ 0.00070372  0.         -0.00020696  0.          0.          0.
  -0.00024995  0.          0.        ]]


In [9]:
print(features_librascal[0].reshape((nmax,(lmax+1)**2)))

[[ 0.05147185  0.          0.02122093  0.          0.          0.
   0.01512605  0.          0.        ]
 [-0.00546906  0.          0.00777222  0.          0.          0.
   0.00809737  0.          0.        ]
 [-0.00079393  0.         -0.00222316  0.          0.          0.
  -0.00188237  0.          0.        ]
 [-0.00095734  0.          0.00110087  0.          0.          0.
   0.00109044  0.          0.        ]
 [ 0.00070372  0.         -0.00020696  0.          0.          0.
  -0.00024995  0.          0.        ]]


In [10]:
print(np.round(features_pylode,13)[0,0])

[[ 0.58296935 -0.         -0.21830123 -0.          0.          0.
   0.14991096  0.          0.        ]
 [ 0.0413589  -0.         -0.1277388  -0.          0.         -0.
   0.11816994  0.         -0.        ]
 [ 0.03932226 -0.         -0.04570556  0.         -0.          0.
   0.04204833 -0.          0.        ]
 [ 0.00961505 -0.         -0.01020576 -0.          0.          0.
   0.00930852  0.         -0.        ]
 [ 0.00081809  0.         -0.00107424  0.         -0.          0.
   0.00102854  0.         -0.        ]]


In [11]:
10.962374348347298 / np.sqrt(2)

7.751569239621834

# Debugging: Compare coeffs with semi-analytical results

## Compute the coefficients for $l=1$ using 4 different approaches

To make sure that we understand every single aspect of the codes including all the prefactors and conventions, we compute the exact coefficients we would expect from an Oxygen molecule oriented along the z-axis. The computations are done using three (four) different methods: a fully analytical approach applicable to all $l$, a semi-analytical approach in which the final radial integral is evaluated numerically also valid for all $l$, and finally an explicit formula that was obtained directly for the special case $l=1$ starting at the very definition. For convenience, we also include the complete analytical formula for the special case $l=1$.

### Generate orthonormalization matrix

In [12]:
from scipy.integrate import quad
from pylode.lib.radial_basis import innerprod
from scipy.special import gamma, hyp1f1, sph_harm, iv

In [13]:
Nradial = 1000
sigma = np.ones(nmax, dtype=float)
for i in range(1, nmax):
    sigma[i] = np.sqrt(i)
sigma *= rcut / nmax

# Define primitive GTO-like radial basis functions
f_gto = lambda n, x: x**n * np.exp(-0.5 * (x / sigma[n])**2)
xx = np.linspace(0, rcut * 2.5, Nradial)
R_n = np.array([f_gto(n, xx) for n in range(nmax)])

# Orthonormalize
innerprods = np.zeros((nmax, nmax))
for i in range(nmax):
    for j in range(nmax):
        innerprods[i, j] = innerprod(xx, R_n[i], R_n[j])
eigvals, eigvecs = np.linalg.eigh(innerprods)
transformation = eigvecs @ np.diag(np.sqrt(1. / eigvals)) @ eigvecs.T

### Fully analytical formula for $l=1$

In [14]:
def coefficients_analytical_l1(nmax, d):
    # Define auxilary quantities and prefactors
    a = 1. / (2 * smearing**2)
    lplus3half = 2.5
    prefac_global = np.pi * np.sqrt(3/4) * a / gamma(lplus3half) * d * np.exp(-a*d**2)
    prefac_global /= (2 * np.pi * smearing**2)**1.5
    
    # Start main loop
    featvec = np.zeros((nmax))
    for n in range(nmax):
        # Compute contribution for primitive GTO function
        nlplus3half = (4 + n) / 2
        b = 1. / (2 * sigma[n]**2)
        prefac_n_dep = gamma(nlplus3half) / (a+b)**nlplus3half
        hyp = hyp1f1(nlplus3half, lplus3half, a**2*d**2/(a+b))
        featvec[n] = prefac_n_dep * hyp

    return prefac_global * (transformation @ featvec)

### Fully analytical formula for general $l$

In [15]:
def coefficients_analytical_general_l(nmax, d, l):
    # Define auxilary quantities and prefactors
    a = 1. / (2 * smearing**2)
    lplus3half = l + 1.5
    prefac_global = np.pi**1.5 * sph_harm(0, l, 0, 0).real * a / gamma(lplus3half)
    prefac_global *= d * np.exp(-a*d**2)
    prefac_global /= (2 * np.pi * smearing**2)**1.5

    # Start main loop
    featvec = np.zeros((nmax))
    for n in range(nmax):
        nlplus3half = (3 + n + l) / 2
        b = 1. / (2 * sigma[n]**2)
        prefac_n_dep = gamma(nlplus3half) / (a+b)**nlplus3half
        hyp = hyp1f1(nlplus3half, lplus3half, a**2*d**2/(a+b))
        featvec[n] = prefac_n_dep * hyp

    return prefac_global * (transformation @ featvec)   

### Semianalytical (final integral performed numerically) formula for general l

In [111]:
def coefficients_semianalytical_general_l(nmax, d, l):
    # Prefactor
    a = 1. / (2 * smearing**2)
    prefac_global = 4*np.pi * sph_harm(0, l, 0, 0).real * np.exp(-a*d**2)
    prefac_global /= (2 * np.pi * smearing**2)**1.5

    # Main loop performing the numerical integration
    featvec = np.zeros((nmax))
    for n in range(nmax):
        # Define the integrand consisting of a power-law, Gaussian and Bessel part
        b = 1. / (2 * sigma[n]**2)
        gaussian = lambda r: np.exp(-(a+b)*r**2)
        power = lambda r: r**(1+n)
        mod_sph_bessel = lambda x: np.sqrt(np.pi/2/x) * iv(l + 0.5, x)
        bessel = lambda r: mod_sph_bessel(2*a*d*r)
        integrand = lambda r: power(r) * gaussian(r) * bessel(r)
        
        # Numerical integration
        eps = 1e-10
        featvec[n] = quad(integrand, eps, 10*rcut)[0]

    return prefac_global * (transformation @ featvec)   

### Semianalytical (final integral performed numerically) formula for $l=1$

In [69]:
def coefficients_semianalytical_l1(nmax, d):
    # Prefactor
    a = 1. / (2 * smearing**2)
    prefac = np.sqrt(1.5) / (2 * np.pi * smearing**3)
    prefac *= np.exp(-a*d**2)
    
    # Start main loop
    featvec = np.zeros((nmax,))
    for n in range(nmax):
        # Start defining functions appearing in integrand
        b = 1. / (2 * sigma[n]**2)
        gaussian = lambda r: np.exp(-(a+b)*r**2)
        power = lambda r: r**(2+n)
        mod_sph_bessel = lambda x: (x*np.cosh(x) - np.sinh(x))/x**2
        bessel = lambda r: mod_sph_bessel(2*a*d*r)

        integrand = lambda r: power(r) * gaussian(r) * bessel(r) * 2
        
        # Numerical Integration
        featvec[n] = quad(integrand, 1e-10, 10 * rcut)[0]
    
    return prefac * (transformation @ featvec)

### Compare the coefficients obtained using different methods

In [112]:
features_analytical_l1 = np.zeros((len(distances), nmax))
features_semianalytical_l1 = np.zeros((len(distances), nmax))
features_analytical_general = np.zeros((len(distances), nmax))
features_semianalytical_general = np.zeros((len(distances), nmax))

for i_dist, d in enumerate(distances):
    features_analytical_l1[i_dist] = coefficients_analytical_l1(nmax, d)
    features_semianalytical_l1[i_dist] = coefficients_semianalytical_l1(nmax, d)
    features_analytical_general[i_dist] = coefficients_analytical_general_l(nmax, d, l=1)
    features_semianalytical_general[i_dist] = coefficients_semianalytical_general_l(nmax, d, l=1)

In [113]:
delta1 = np.linalg.norm(features_analytical_l1 - features_analytical_general)
delta2 = np.linalg.norm(features_analytical_general - features_semianalytical_general)
delta3 = np.linalg.norm(features_semianalytical_l1 - features_semianalytical_general)
assert delta1 < 1e-14
assert delta2 < 1e-14
assert delta3 < 1e-14

AssertionError: 

## Compare the exact values with those obtained from the three codes

The exact coefficients obtained using the analytical formula are now compared to those obtained from the three codes.

### Get the $l=1$ part of the features of the three codes

In [85]:
print('Shapes of feature vectors')
print('rascaline', descriptor_rascaline.values.shape)
print('librascal', features_librascal.shape)

Shapes of feature vectors
rascaline (20, 45)
librascal (20, 45)


In [90]:
features_L1_rascaline = descriptor_rascaline.values[0::2, 2*nmax:3*nmax] / 10.962374348347298 

In [91]:
features_L1_librascal = features_librascal[0::2, 2::(lmax+1)**2]

In [104]:
features_L1_pylode = features_pylode[0::2, 0, :, 2]

In [105]:
for features in [features_L1_rascaline, features_L1_librascal, features_L1_pylode, features_analytical_l1]:
    assert features.shape == (len(distances), nmax)

In [106]:
print(features_analytical_l1 / features_L1_pylode)

[[-1.00000652 -0.99999203 -1.00000484 -0.99997672 -1.00013495]
 [-1.00001176 -0.99999042 -1.00000417 -0.99997917 -1.00010812]
 [-0.99999967 -0.99999679 -1.00001156 -0.99995062 -1.00022285]
 [-0.999988   -0.99999817 -1.0000171  -0.99993273 -1.00027445]
 [-0.99999971 -0.99999182 -1.00001961 -0.9999286  -1.0002671 ]
 [-1.00004083 -0.99998263 -1.00002343 -0.99992237 -1.00026729]
 [-1.0001318  -0.99997251 -1.00003167 -0.99990673 -1.00029551]
 [-0.99987848 -0.99995229 -1.00004472 -0.99988466 -1.00033642]
 [-0.9999175  -0.99989227 -1.0000633  -0.99985851 -1.00037995]
 [-0.99985533 -0.99965541 -1.00009136 -0.99982544 -1.00043086]]


In [107]:
print(features_analytical_l1 / features_L1_rascaline)

[[  10.28713756   16.43517202  -20.558928      9.27042635   -5.19116778]
 [   9.77458243   13.96330621  -30.78519581   10.51760787   -5.49395653]
 [   9.07938946   12.43607865  -63.19524416   13.60294306   -7.24849529]
 [   8.12652275   11.30270389 2037.8391698    20.03520446  -11.47166267]
 [   6.79638072   10.32556364   62.45417019   34.67792844  -23.25304518]
 [   4.89369633    9.37042525   32.69576047   79.14078711  -97.75420123]
 [   2.09881702    8.33439651   22.59529455  496.49517869   90.48743811]
 [  -2.07770659    7.10699822   17.47936955 -302.31193861   43.83847828]
 [  -8.15579123    5.5286498    14.34035059 -245.05753763   36.20664184]
 [ -15.57813624    3.30894059   12.16169389 -835.47080194   35.83297961]]


In [114]:
print(features_semianalytical_general / features_L1_librascal)

[[  13.02502924    8.93481348  -13.17746885    6.13230946   -2.94123088]
 [  11.63657475    8.45317492  -18.85183055    6.4425329    -2.9715553 ]
 [  10.21276809    7.80510645  -36.88175146    7.73664343   -3.72743698]
 [   8.76335324    7.09471808 1131.22722203   10.62070829   -5.59549455]
 [   7.26233527    6.36661279   32.93751937   17.203659    -10.7474639 ]
 [   5.63620659    5.6371188    16.37446942   36.88438309  -42.81131864]
 [   3.74581104    4.90386018   10.7495392   218.13117543   37.5730556 ]
 [   1.36671538    4.14760433    7.90765615 -125.58043314   17.27674341]
 [  -1.76181228    3.32702909    6.17954936  -96.50461231   13.56059421]
 [  -5.49020168    2.3589434     5.00317787 -312.64728256   12.7727958 ]]
