In [None]:
from ase.io import read
import ase
from copy import deepcopy
import numpy as np
import matplotlib.pyplot as plt

from rascal.representations import SphericalExpansion, SphericalInvariants
from rascal.utils import (get_radial_basis_covariance, get_radial_basis_pca, 
                          get_radial_basis_projections, get_optimal_radial_basis_hypers )
from rascal.utils import radial_basis
from rascal.utils import (WignerDReal, ClebschGordanReal, 
                          spherical_expansion_reshape, spherical_expansion_conjugate,
                          lm_slice, real2complex_matrix, xyz_to_spherical, spherical_to_xyz)

This notebook provides examples of the kind of manipulations that need to be applied to rotate structures, spherical expansion coefficients and higher-order equivariants, which are useful to test equivariance of features and kernels, and in general to manipulate invariant and equivariant properties. 

The basic idea, using notation from [Musil et al. (2021)](http://doi.org/10.1021/acs.chemrev.1c00021), is that if a structure $A$ is acted upon with a rotation $\hat{R}$, _equivariant_ features transform in a prescribed way, i.e.
$$
|\hat{R} A; \lambda \mu\rangle = \sum_{\mu'} [D(\hat{R})^\lambda_{\mu\mu'}]^\star | A; \lambda \mu.\rangle
$$

$D(\hat{R})^\lambda_{\mu\mu'}$ are the elements of a _Wigner D matrix_ that enact rotations of spherical harmonics $Y^\mu_\lambda$. Given that spherical harmonics are irreducible representations of the rotation group, any property or set of features can be decomposed into terms that transform like one or more sets of $Y^\mu_\lambda$.

Using ML models based on equivariant features is useful because it allows building basic symmetries of the target properties into the model structure, improving the data efficiency of the models. 

# Spherical expansion coefficients

This section demonstrates the manipulations of spherical expansion coefficients, the simplest equivariant way of representing an environment $A_i$ centered on atom $i$,
$$
\langle a n l m|A; \rho_i\rangle = \sum_{j\in A_i} \delta_{aa_j}
\int \mathrm{d}\mathbf{r} \langle n|r\rangle \langle lm|\hat{\mathbf{r}}\rangle
\langle \mathbf{r} | \mathbf{r}_{ji}; g\rangle
$$
Note that $\langle lm|\hat{\mathbf{r}}\rangle$ is the complex conjugate of a spherical harmonic, and so strictly speaking the equivariant is $\langle a n l m|A; \rho_i\rangle^\star$. This section also discusses briefly some of the subtleties that arise when converting expressions based on complex-valued spherical harmonics into their real-valued counterparts, that are used in the librascal implementation.

In [None]:
# imports also some internals to demonstrate manually some CG manipulations
from rascal.utils.cg_utils import _r2c as r2c
from rascal.utils.cg_utils import _c2r as c2r
from rascal.utils.cg_utils import _cg as clebsch_gordan
from rascal.utils.cg_utils import _rotation as rotation
from rascal.utils.cg_utils import _wigner_d as wigner_d

## Loads the structures

In [None]:
import urllib.request
# a collection of distorted ethanol molecules from the ANI-1 dataset 
# (see https://github.com/isayev/ANI1_dataset) with energies and forces computed using DFTB+ 
# (see https://www.dftbplus.org/)
url = 'https://raw.githubusercontent.com/cosmo-epfl/librascal-example-data/833b4336a7daf471e16993158322b3ea807b9d3f/inputs/molecule_conformers_dftb.xyz'
# Download the file from `url`, save it in a temporary directory and get the
# path to it (e.g. '/tmp/tmpb48zma.txt') in the `structures_fn` variable:
structures_fn, headers = urllib.request.urlretrieve(url)
structures_fn

In [None]:
# Total number of structure to load
N = 100

# load the structures
frames = read(structures_fn,':{}'.format(N))

## Demonstrate the equivariance of spherical expansion coefficients

First, we compute the density expansion coefficients on a representative dataset

In [None]:
spherical_expansion_hypers = {
    "interaction_cutoff": 3,
    "max_radial": 8,
    "max_angular": 6,
    "gaussian_sigma_constant": 0.3,
    "gaussian_sigma_type": "Constant",
    "cutoff_smooth_width": 0.5,
    "radial_basis": "GTO",
}

spex = SphericalExpansion(**spherical_expansion_hypers)

In [None]:
selframe = frames[8];  sel_l = 3;    # frame and l value used for the test
feat_scaling = 1e6                   # just a scaling to make coefficients O(1)
feats = spex.transform(selframe).get_features(spex)
ref_feats = feat_scaling*spherical_expansion_reshape(feats, **spherical_expansion_hypers)

Then, we rotate the structure, and compute the features of the rotated configuration. 

In [None]:
# random rotation in terms of Euler angles
abc = np.random.uniform(size=(3))*np.pi

In [None]:
# this is the Cartesian rotation matrix (helper function, follows ZYZ convention)
mrot = rotation(*abc)

In [None]:
# computes the rotated structure and the associated features
rotframe = selframe.copy()
rotframe.positions = rotframe.positions @ mrot.T
rotframe.cell = rotframe.cell @ mrot.T   # rotate also the cell
rotfeats = spex.transform(rotframe).get_features(spex)
ref_feats_rot = feat_scaling*spherical_expansion_reshape(rotfeats, **spherical_expansion_hypers)

In [None]:
print(ref_feats[0,0,0,lm_slice(sel_l)])
np.linalg.norm(ref_feats[0,0,0,lm_slice(sel_l)])

the coefficients have the same magnitude, but they differ because of rotation!

In [None]:
print(ref_feats_rot[0,0,0,lm_slice(sel_l)])
np.linalg.norm(ref_feats_rot[0,0,0,lm_slice(sel_l)])

## Rotate the spherical expansion features using Wigner matrices

In [None]:
# computing the wigner matrix takes some time for L>4
rotation_d = wigner_d(sel_l, *abc)

In [None]:
# D^l_mm is orthogonal
plt.matshow(np.real(np.conjugate(rotation_d.T)@rotation_d))

Note that given that librascal computes features in terms of real spherical harmonics, we have to convert back to complex-valued coefficients, using the utility functions `c2r` and `r2c` from `cg_utils`.

Note also that strictly speaking calling `r2c` on the coefficients from librascal yields objects that transform as $Y^m_l$, and not their complex-conjugates - so in fact we are computing the equivariants $\langle n| A; \overline{\rho_i^{\otimes 1}; lm} \rangle \equiv \langle nlm|A; \rho_i \rangle^\star$ (see again [Musil et al. (2021)](http://doi.org/10.1021/acs.chemrev.1c00021))

In [None]:
#  back and forth to check transformation from real to complex SPH
c2r(r2c(ref_feats[0,0,0,lm_slice(sel_l)])) - ref_feats[0,0,0,lm_slice(sel_l)]

Rotating the coefficients using the Wigner D matrix formula
$\langle n| A; \overline{\rho_i^{\otimes 1}; lm} \rangle  = 
\sum_{mm'} D^l_{mm'}(\hat{R})^\star \langle n| A; \overline{\rho_i^{\otimes 1}; lm'} \rangle 
$ gives features that are equal to those of the rotated structure

In [None]:
ref_feats[0,0,0,lm_slice(sel_l)]

In [None]:
ref_feats_rot[0,0,0,lm_slice(sel_l)]

In [None]:
# some bookkeeping is needed to convert back to real
c2r(np.conjugate(rotation_d)@r2c(ref_feats[0,0,0,lm_slice(sel_l)]))

## Direct real transformations

There's no "real" reason to go through the complex algebra for rotations - we can transform once and for all the coefficients and be done with that!

Key idea is that the complex $\leftrightarrow$ real transformations can be formulated in a matrix form

In [None]:
# matrix version of the real-2-complex and complex-2-real transformations
r2c_mat = real2complex_matrix(sel_l)
c2r_mat = np.conjugate(r2c_mat.T)

.... which can be used to transform features between complex and real

In [None]:
# we can use this to transform features
r2c_mat@ref_feats[0,0,0,lm_slice(sel_l)] - r2c(ref_feats[0,0,0,lm_slice(sel_l)])

... but can also be applied to matrices that act on the features

In [None]:
# and Wigner D matrix as well
real_rotation_d = np.real(c2r_mat @ np.conjugate(rotation_d) @ r2c_mat)

The direct real rotation is equal (modulo noise) to going back and forth from complex sph

In [None]:
real_rotation_d @ ref_feats[0,0,0,lm_slice(sel_l)] - ref_feats_rot[0,0,0,lm_slice(sel_l)]

# Clebsch-Gordan iteration

CG coefficients $\langle l_1 m_1; l_2 m_2 | LM\rangle$ are essential in the quantum theory of angular momentum, describing the combination of two angular momentum kets to form a coupled basis
$$
\sum_{m_1,m_2} |l_1m_1\rangle|l_2m_2\rangle \langle l_1 m_1; l_2 m_2 | LM\rangle = |LM\rangle
$$
They are also useful to combine equivariant objects so that the resulting object is still equivariant. This is the idea behind NICE iteration [Nigam et al., JCP (2020)](http://doi.org/10.1063/5.0021116)]
$$
\langle Q; nlk|\overline{\rho^{\otimes \nu+1}_i; \lambda\mu}\rangle = 
\sum_{m q} \langle n | \overline{\rho^{1}_i; lm}\rangle
\langle Q|\overline{\rho^{\otimes \nu}_i; kq}\rangle 
\langle lm; kq | \lambda\mu \rangle
$$
as well as equivariant neural networks.
The relationship to NICE as well as to more established features is demonstrated in more detail in [a dedicated notebook ](nice_demo.ipynb), while here we focus on how tre result of a CG iteration applied to equivariant features is also equivariant.

In [None]:
# these are the l indices of the features being combined, and that of the result
sl1, sl2, sL = 3, 2, sel_l
# computes the corresponding CG coefficient
cg = clebsch_gordan(sl1, sl2, sL)

This is an example of the application of the CG iteration to two equivariants of structure $A$, and to the corresponding features in the rotated structure $\hat{R}A$. 

In [None]:
cg_feats = np.einsum("abc,a,b->c", cg,
                    r2c(ref_feats[0,0,0,lm_slice(sl1)]), 
                    r2c(ref_feats[0,0,0,lm_slice(sl2)]))

In [None]:
cg_feats_rot = np.einsum("abc,a,b->c", cg,
                    r2c(ref_feats_rot[0,0,0,lm_slice(sl1)]), 
                    r2c(ref_feats_rot[0,0,0,lm_slice(sl2)]))

In [None]:
cg_feats

In [None]:
cg_feats_rot

The result is equivariant, in that the CG features of $\hat{R}A$ can be obtained by applying the usual Wigner D multiplication to the CG features of $A$

In [None]:
np.conjugate(rotation_d)@cg_feats - cg_feats_rot

## Real form of the iteration

Similarly to the Wigner rotation, the CG iteration can also be cast in a way so it acts directly on the real-valued coefficients

In [None]:
r2c_mat_l1 = real2complex_matrix(sl1)
r2c_mat_l2 = real2complex_matrix(sl2)
r2c_mat_L = real2complex_matrix(sL)

computing the real-valued CGs requires converting in the appropriate way inputs AND outputs

In [None]:
real_cg = np.real(np.einsum("abc, ax, by, zc -> xyz", cg, r2c_mat_l1, r2c_mat_l2, np.conjugate(r2c_mat_L.T)))

while the "complex" CG have a simple sparsity pattern (m1+m2=M), the real-valued are kind of messy because they need to pick up and combine real and imaginary parts of the expansion coefficients

In [None]:
cg[:,:,2]

In [None]:
real_cg[:,:,2]

... but at the end of the day, they work just fine!

In [None]:
real_cg_feats = np.einsum("abc,a,b->c",real_cg,
                    ref_feats[0,0,0,lm_slice(sl1)],
                    ref_feats[0,0,0,lm_slice(sl2)])

In [None]:
real_cg_feats - c2r(cg_feats)

needless to say, these are also equivariant, and can be acted upon with the real-valued wigner matrix

In [None]:
real_rotation_d @ real_cg_feats - c2r(cg_feats_rot)

# Streamlined WignerD, and CG class

The manipulations discussed above (and more!) can be realized using the utility classes defined in `rascal.utils`. In particular, 

`WignerDReal` is a Wigner D matrix implementation to rotate $Y^m_l$ - like coefficients. `WignerDReal` also allows rotating structures so you won't have to wonder about what Euler angle convention is being used ever again

`ClebschGordanReal` precomputes Clebsch-Gordan coefficients, and applies different kinds of related operations using real-only storage of the spherical expansion coefficients. 


In [None]:
# WignerDReal is initialized with the Euler angles of the rotation
WD = WignerDReal(spherical_expansion_hypers["max_angular"], *abc)
CG = ClebschGordanReal(spherical_expansion_hypers["max_angular"])

In [None]:
# prepares a list of features for different l's, just to use for testing
test_feats = [ ref_feats[0,0,0,lm_slice(l)]  for l in range(0,5) ]
test_feats_rot = [ ref_feats_rot[0,0,0,lm_slice(l)] for l in range(0,5) ]

## Rotation and CG iteration

`WignerDReal` can be used to rotate a frame (ASE or librascal internal format)

In [None]:
test_frame = WD.rotate_frame(selframe.copy())

In [None]:
test_frame.positions - rotframe.positions

`CG.combine` implements in the most straightforward manner the CG iteration that takes two equivariants, and produces and invariant of the specified L order. The order $l$ of the inputs is inferred by the size.

In [None]:
t1 = CG.combine(test_feats[3], test_feats[4], sel_l)
t1_r = CG.combine(test_feats_rot[3], test_feats_rot[4], sel_l)

In [None]:
t1

The result can be rotated with the WignerD helper (and, ça va sans dire, matches the equivariant computed for the rotated structure)

In [None]:
WD.rotate(t1) - t1_r

Lots of fun: we can rinse, repeat as much as we want (the "I" in NICE is for "iterative"!)

In [None]:
t2 = CG.combine(t1, test_feats[3], 2)
t2_r = CG.combine(t1_r, test_feats_rot[3], 2)

In [None]:
WD.rotate(t2) - t2_r

In [None]:
t3 = CG.combine(t2, test_feats[1], 1)
t3_r = CG.combine(t2_r, test_feats_rot[1], 1)

In [None]:
WD.rotate(t3) - t3_r # note: the residual grows only because the features become very large

A small twist: we can also combine two equivariants that are not expansion coefficients!

In [None]:
t4 = CG.combine(t3, t2, 3)
t4_r = CG.combine(t3_r, t2_r, 3)

In [None]:
(WD.rotate(t4) - t4_r)/np.linalg.norm(t4)

Note that the CG iter is built to fail gracefully if called with "impossible" inputs (e.g. with l1,l2,L that do not fulfill the triangle inequality)

In [None]:
CG.combine(t2, t4, 6)  #nb: t2 is L=2 and t4 is L=3

... but not when called outside the precomputed range 

In [None]:
# catch the exception because example notebooks should run without errors
try: 
    CG.combine(t2, t4, 12)
except ValueError:
    print("ValueError: Requested CG entry has not been precomputed")

# Feature products

Another common use of CG coefficients is to expand products of spherical harmonics into objects that transform as individual irreps of $O(3)$. This is also implemented as part of `ClebschGordanReal`. These are basically outer products of the features $|l_1 m_1; l_2 m_2;\rangle = |l_1 m_1\rangle |l_2 m_2\rangle$

In [None]:
test_prod = test_feats[2][:,np.newaxis]@test_feats[3][np.newaxis,:]
test_prod_rot = test_feats_rot[2][:,np.newaxis]@test_feats_rot[3][np.newaxis,:]

In [None]:
plt.matshow(test_prod); plt.show(); plt.matshow(test_prod_rot);

`ClebschGordanReal.couple()` takes one of these $l_1\times l_2$ matrices and explodes them as a list of terms with $L\in[|l_1-l_2|,(l_1+l_2)]$, i.e.
$$
|(l_1, l_2); L M\rangle = \sum_{m_1 m_2} |l_1 m_1 \rangle  |l_2 m_2 \rangle  \langle l_1 m_1; l_2 m_2 |L M \rangle
$$

In [None]:
test_coupled = CG.couple(test_prod)

Depending on the $l$ values this come from, the $L$ items have different nature (e.g. in terms of parity) and so the coupling function returns a dictionary in which each entry keeps track of the $l$ values of the terms that have been coupled

In [None]:
test_coupled

The coupled entries transform as $Y^m_l$ and can be rotated accordingly

In [None]:
test_coupled_rot = CG.couple(test_prod_rot)

In [None]:
test_coupled_rot[(2,3)][3]

In [None]:
WD.rotate(test_coupled[(2,3)][3]) - test_coupled_rot[(2,3)][3]

The coupled coefficients can be translated back into the product form

In [None]:
test_decoupled = CG.decouple(test_coupled)

In [None]:
test_decoupled

In [None]:
np.linalg.norm(test_decoupled - test_prod)/np.linalg.norm(test_prod)

This is a consequence of the fact that the real CG are orthogonal, just like their conventional counterparts

In [None]:
# this also gives a view into the internal storage of the CG coefficients, that are stored in a
# sparse format because of the non-trivial sparsity pattern
l1,l2 = next(iter(test_coupled.keys()))
prod = np.zeros((2*l1+1,2*l2+1,2*l1+1,2*l2+1))
for L in range(abs(l1-l2), abs(l1+l2)+1):
    for M in range(0, 2*L+1):
        for m1, m2, mcg in CG._cgdict[(l1, l2, L)][M]:
            for m1p, m2p, mcgp in CG._cgdict[(l1, l2, L)][M]:
                prod[m1,m2,m1p,m2p] += mcg*mcgp

In [None]:
pr = prod.reshape((2*l1+1)*(2*l2+1),(2*l1+1)*(2*l2+1))
plt.matshow(pr)

## Higher products

The coupling/decoupling machinery can be applied also to higher products $\prod |l_i m_i\rangle$, in an iterative fashion

In [None]:
# this is a pretty complicated product
test_prod = np.einsum("a,b,c,d->abcd",test_feats[2],test_feats[1],test_feats[1],test_feats[2])

In [None]:
test_coupled = CG.couple(test_prod, iterate=2)

There are MANY terms, but all transform as the associated $Y^M_L$. The dictionary keys are composed according to the sequence of $l$ channels that were coupled.

In [None]:
list(test_coupled.keys())

In [None]:
test_coupled[(2,  1,  1,  1, 1,  2)][3]

In [None]:
test_prod_rot = np.einsum("a,b,c,d->abcd",test_feats_rot[2],test_feats_rot[1],test_feats_rot[1],test_feats_rot[2])
test_coupled_rot = CG.couple(test_prod_rot, iterate=2)

In [None]:
WD.rotate(test_coupled[(2,  1,  1,  1, 1,  2)][3]) - test_coupled_rot[(2,  1,  1,  1, 1,  2)][3]

even if it's really messy behind the scenes, even this nested coupling sequence can be undone exactly (provided there's sufficiently high LMAX)

In [None]:
test_decoupled = CG.decouple(test_coupled, iterate=2)

In [None]:
np.linalg.norm(test_decoupled-test_prod)/np.linalg.norm(test_prod)

## An application: irreducible spherical tensors

 Cartesian tensors (products of terms transforming like $x,y,z$) transform as a combination of rotation matrices. They can instead be decomposed in and _irreducible spherical tensor_ form, that instead transforms like irreps of $SO(3)$

For instance, take the product of atomic vectors

In [None]:
xx = selframe.positions.T @ selframe.positions

this converts xyz arrays to spherical l=1 form, so we can use the CG machinery

In [None]:
xx_spherical = xyz_to_spherical(xx)

the outer product of (xyz) decomposes as L=0+L=2 (L=1 is missing because of symmetry)

In [None]:
xx_coupled = CG.couple(xx_spherical)

In [None]:
xx_coupled

we can build the rotated tensor by rotating the irreducible representations. note that CG.decouple is smart enough to ignore the missing L=1 component (that would be full of zeros!)

In [None]:
xx_rot_wd = {(1,1) : {
                    0: xx_coupled[(1,1)][0], 
                    2: WD.rotate(xx_coupled[(1,1)][2]) 
                    }
                }
xx_rot_wd = spherical_to_xyz(CG.decouple(xx_rot_wd))

This matches the corresponding tensor computed for the rotated frame

In [None]:
xx_rot = rotframe.positions.T @ rotframe.positions

In [None]:
print(xx_rot)

In [None]:
print(xx_rot_manual)