In [1]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import pdist, squareform

from ase import Atoms

from rascal.representations import SphericalInvariants
from rascal.models import Kernel

### Check equation (9) in [Comparing molecules and solids across structural and alchemical space](https://arxiv.org/abs/1601.04077)

In [2]:
def get_systems_tag(frames):
    labels = []
    for i, frame in enumerate(frames):
        labels.extend([i]*len(frame))
    return np.array(labels)
def get_dist_mat(soaps_vectors, normalized=True):
    distance = squareform(pdist(soaps_vectors))
    
    if normalized:
        max_val=max(distance.flatten())

    distance_df = pd.DataFrame(distance/max_val)
    # Set display options to show all columns without truncation and maximum 3 decimals
    pd.set_option('display.max_columns', None)
    pd.set_option('display.float_format', lambda x: '%.4f' % x)
    return distance_df
def avg_soaps(atoms_soaps_features, frames):
    df = pd.DataFrame(atoms_soaps_features)
    df["molecule"]=get_systems_tag(frames)
    return df.groupby("molecule").mean().values

def get_kernel_mat(soaps_vectors):
    distance = squareform(pdist(soaps_vectors))
    # Create kernel matrix using Gaussian kernel
    sigma = 0.5  # You can adjust the sigma value according to your requirement
    kernel_matrix = np.exp(-distance ** 2 / (2 * sigma ** 2))

    # Convert kernel matrix to a pandas DataFrame
    kernel_matrix_df = pd.DataFrame(kernel_matrix)
    return kernel_matrix_df

In [3]:
samples0=[Atoms('CC',positions=[[0,0,0],[1,0,0]]),
         Atoms('CC',positions=[[0,0,0],[0,1.1,0]]),
         Atoms('CC',positions=[[0,0,0],[1.21,0,0]]),
         Atoms('CC',positions=[[0,0,0],[0,1.33,0]]),
         Atoms('CC',positions=[[1,0,1],[1,1.01,1]]),
         Atoms('CC',positions=[[0,0,0],[1.5,0,0]]),
          
        ]
samples=[Atoms('CC',positions=[[0,0,0],[1,0,0]]),
         Atoms('CC',positions=[[0,0,0],[0,1.1,0]]),
         Atoms('CC',positions=[[0,0,0],[1.21,0,0]]),
         Atoms('CC',positions=[[0,0,0],[0,1.33,0]]),
         Atoms('CN',positions=[[1,0,1],[1,1.01,1]]),
         Atoms('CC',positions=[[0,0,0],[1.5,0,0]]),
          
        ]

In [4]:
#SphericalInvariants?

In [5]:
hypers = {
    "soap_type":"PowerSpectrum",
    "interaction_cutoff": 5.0,
    "max_radial": 6,
    "max_angular": 6,
    "gaussian_sigma_constant": 0.4,
    "gaussian_sigma_type":"Constant",
    "cutoff_smooth_width":0.5,
    "radial_basis": "GTO",
    "cutoff_function_type": "ShiftedCosine",
    "cutoff_function_parameters":{"width": 0.5},
    "global_species":[6,7]
    }
soap = SphericalInvariants(**hypers)

In [6]:
for sample in samples0:
    sample.cell=[10,10,10]
    sample.pbc=(1,1,1)
    sample.wrap()
for sample in samples:
    sample.cell=[10,10,10]
    sample.pbc=(1,1,1)
    sample.wrap()

In [7]:
soap_rep0 = soap.transform(samples0)
soap_rep = soap.transform(samples)

In [8]:
X0=soap_rep0.get_features(soap)
X=soap_rep.get_features(soap)

In [14]:
X0.shape

(12, 252)

In [9]:
avg_soap_samples0=avg_soaps(X0, samples0)
avg_soap_samples=avg_soaps(X, samples)

In [10]:
get_dist_mat(avg_soap_samples0)

Unnamed: 0,0,1,2,3,4,5
0,0.0,0.1949,0.4064,0.645,0.0198,1.0
1,0.1949,0.0,0.2237,0.4873,0.1755,0.8859
2,0.4064,0.2237,0.0,0.2767,0.3888,0.7068
3,0.645,0.4873,0.2767,0.0,0.6305,0.4472
4,0.0198,0.1755,0.3888,0.6305,0.0,0.9905
5,1.0,0.8859,0.7068,0.4472,0.9905,0.0


In [11]:
get_dist_mat(avg_soap_samples)

Unnamed: 0,0,1,2,3,4,5
0,0.0,0.0565,0.1178,0.187,0.9889,0.29
1,0.0565,0.0,0.0649,0.1413,0.9917,0.2569
2,0.1178,0.0649,0.0,0.0802,0.9942,0.2049
3,0.187,0.1413,0.0802,0.0,0.9965,0.1297
4,0.9889,0.9917,0.9942,0.9965,0.0,1.0
5,0.29,0.2569,0.2049,0.1297,1.0,0.0
