In [1]:
%load_ext autoreload
%autoreload 2

import time

import numpy as np
from ase.io import read
from ase import Atoms

# Imports specific to this "library"
from pylode import DensityProjectionCalculator
from pylode.lib.kvec_generator import KvectorGenerator

# Kevector Example

In [2]:
# The smearing of the target density determines the cutoff in
# reciprocal space. pi/smearing was shown to yield good LODE convergence.
smearing_realspace = 1.5
kspace_cutoff = np.pi / smearing_realspace

# Define cell
L = 15.6
cell = np.eye(3) * L

# Generate k vectors
kvecgen = KvectorGenerator(cell, kspace_cutoff, need_origin=False)
kvecgen.compute()
print('Number of k-vectors = ', kvecgen.kvector_number)
print('Shape of k-vectors array = ', kvecgen.kvectors.shape)
print('Shape of array for norms = ', kvecgen.kvector_norms.shape)

Number of k-vectors =  309
Shape of k-vectors array =  (309, 3)
Shape of array for norms =  (309,)


# Example gaussian

In [3]:
frames = []
cell = np.eye(3) *12
distances = np.linspace(1.5, 2., 5)
# for d in distances:
#     positions2 = [[0,0,0],[0,0,d],[0,d,0],[0,d,d],[d,d,d]]
#     frame = Atoms('O5', positions=positions2, cell=cell, pbc=True)
#     frames.append(frame)
# write('oxygen_toy_structures.xyz', frames)

for d in distances:
    positions2 = [[1,1,1],[1,1,d+1]]
    frame = Atoms('O2', positions=positions2, cell=cell, pbc=True)
    frames.append(frame)


# frames = read('oxygen_toy_structures.xyz', ':')

# Define hyperparameters
hypers = {
    'smearing':1.5,
    'max_angular':5,
    'max_radial':8,
    'cutoff_radius':5.,
    'potential_exponent':0,
    'radial_basis': 'gto',
    'compute_gradients':False
    }

tstart = time.time()
calculator = DensityProjectionCalculator(**hypers)
calculator.transform(frames)
# gradients = calculator.get_feature_gradients()

print('Shapes = ', calculator.features.shape)
tend = time.time()
dt = tend - tstart
print(f'Required time for {len(frames)} frames = {dt}s')

cell= Cell([12.0, 12.0, 12.0])
cell= Cell([12.0, 12.0, 12.0])
cell= Cell([12.0, 12.0, 12.0])
cell= Cell([12.0, 12.0, 12.0])
cell= Cell([12.0, 12.0, 12.0])
Shapes =  (10, 1, 8, 36)
Required time for 5 frames = 1.8305718898773193s


# Example BaTiO3

In [4]:
# Get frames and define a dictionary specifying to which index
# the individual chemical elements are mapped.
# Since we have 3 elements in this systems, we can access the coefficients
# using indices 0,1,2, and we map those to the elements O,Ba,Ti.
# WARNING: There will be errors if the range is not 0,1,...,num_species-1
frames = read('../datasets/BaTiO3_Training_set.xyz', ':30')

# Define hyperparameters
hypers = {
    'smearing':2.0, # WARNING: comp. cost scales cubically with 1/smearing
    'max_radial':6,
    'max_angular':6,
    'cutoff_radius':4.5,
    'potential_exponent':1, # currently, only the exponent p=1 is supported
    'compute_gradients':True,
    'radial_basis': 'GTO',
    
}

# Evaluate the features on all frames and record required time
tstart = time.time()
calculator = DensityProjectionCalculator(**hypers)
calculator.transform(frames)
dt = time.time() - tstart

# Example for how to get features and gradients
# Check out get_features() and get_feature_gradients() for a detailed
# description of the array format
features = calculator.features
gradients = calculator.feature_gradients
print('BaTiO3 data set')
print('Shape of obtained feature array = ', features.shape)
print('Shape of obtained gradient array = ', gradients.shape)

# Reference values to understand the array shapes
print('\nValues for reference to understand the array shapes:')
print('Number of frames =', len(frames))
print('Total number of environments = ', sum([len(frame) for frame in frames]))
print('nmax = ', calculator.max_radial)
print('lmax = ', calculator.max_angular)
print('(lmax+1)^2 = ', (hypers['max_angular']+1)**2)
print('Note: nmax=1 by default for 1/r LODE with monomial basis.')

# Estimate required time to compute features for all frames
print('\nComputational cost:')
print(f'Required time for {len(frames)} frames = {dt:4.1f}s')
frames_all = read('../datasets/BaTiO3_Training_set.xyz', ':')
N_all = len(frames_all)
dt_all = N_all/len(frames)*dt
print(f'Estimated time for {N_all} structures {dt_all:4.1f}s = {dt_all/60.:4.1f}min')

cell= Cell([7.955160132292204, 7.955160132292204, 7.955160132292204])
cell= Cell([7.954639951096393, 7.954639951096393, 7.954639951096393])
cell= Cell([7.979600181660014, 7.979600181660014, 7.979600181660014])
cell= Cell([8.00504990114136, 8.00504990114136, 8.00504990114136])
cell= Cell([7.955129969191328, 7.955129969191328, 7.955129969191328])
cell= Cell([7.954639951096393, 7.954639951096393, 7.954639951096393])
cell= Cell([7.979600181660014, 7.979600181660014, 7.979600181660014])
cell= Cell([7.95197977727, 7.95197977727, 7.95197977727])
cell= Cell([8.00504990114136, 8.00504990114136, 8.00504990114136])
cell= Cell([7.95197977727, 7.95197977727, 7.95197977727])
cell= Cell([8.00504990114136, 8.00504990114136, 8.00504990114136])
cell= Cell([7.979719775709101, 7.979719775709101, 7.979719775709101])
cell= Cell([7.955129969191328, 7.955129969191328, 7.955129969191328])
cell= Cell([7.979719775709101, 7.979719775709101, 7.979719775709101])
cell= Cell([8.00504990114136, 8.00504990114136, 8.005

In [5]:
n_frames = 30
n_atoms = len(frames[0])
X = features.reshape(n_frames, n_atoms, np.prod(features.shape[1:]))

# Example shiftML

In [9]:
frames = read('../datasets/shiftml.xyz',':')

# Define hyperparameters
hypers = {
    'smearing':1.5,
    'max_radial': 1,
    'max_angular':2,
    'cutoff_radius':3.5,
    'potential_exponent':1,
    'compute_gradients':False,
    'radial_basis': 'monomial',
}
num_strucs = len(frames)
num_atoms = np.array([len(frame) for frame in frames])
num_env = np.sum(num_atoms)
print('\nShiftML data set')
print('Number of structures         = ', num_strucs)
print('Total # of environments      = ', num_env)
print('Average # of atoms per frame = ', np.round(num_env / len(frames),1))

frames = frames[:20]
print(f'Number of atoms of first {len(frames)} structures = ', [len(frame) for frame in frames])

tstart = time.time()
calculator = DensityProjectionCalculator(**hypers)
calculator.transform(frames)
features = calculator.features
tend = time.time()
dt = tend - tstart
print(f'Required time for first {len(frames)} frames = {np.round(dt,2)}s')
print(f'Estimated time for all frames = {np.round(dt/len(frames)*num_strucs/60,2)}min')


ShiftML data set
Number of structures         =  3546
Total # of environments      =  340941
Average # of atoms per frame =  96.1
Number of atoms of first 20 structures =  [156, 44, 120, 76, 18, 48, 48, 200, 52, 20, 120, 50, 40, 64, 32, 28, 120, 152, 14, 44]
cell= Cell([[10.360074368, 0.0, 0.0], [0.0, 9.5756716973, 0.0], [-2.47333307439, 0.0, 14.5245238218]])
cell= Cell([[6.660418234, 0.0, 0.0], [-1.9016559725, 6.42812948767, 0.0], [-1.25141266115, -2.27504567912, 12.6523569315]])
cell= Cell([[11.820081988, 0.0, 0.0], [0.0, 6.16204514198, 0.0], [-1.93709867636, 0.0, 18.1259066073]])
cell= Cell([[6.67904537, 0.0, 0.0], [0.0, 7.72205177402, 0.0], [-1.246009309, 0.0, 13.0838958904]])
cell= Cell([[3.814011932, 0.0, 0.0], [0.0, 4.67901458213, 0.0], [-0.186058944079, 0.0, 9.11012983072]])
cell= Cell([8.206047178, 4.74602482773, 10.6000629696])
cell= Cell([[9.690026652, 0.0, 0.0], [0.0, 7.48801654547, 0.0], [-1.97226926464, 0.0, 4.42979568396]])
cell= Cell([[9.98906627, 0.0, 0.0], [0.0, 9.98

Note for computational cost:

version 1: BaTiO3: estimated total 24min, ShiftML: 62s for first 20 frames

version 2: Precompute as much as possible using numpy arrays -> 18min, 51s
            Some tests show that main loop is indeed the main contribution
            to computational cost + fixed 1s contribution from splining in
            the very beginning.

version 3: