In [13]:
%load_ext autoreload
%autoreload 2

In [1]:
# System
import os
import sys
sys.path.append('/home/helfrech/Tools/Toolbox/utils')
sys.path.append('/home/helfrech/Tools/GCH/GCH')

# Maths
import numpy as np

# Atoms
import ase.io as aseio

# ML
from kernels import build_kernel, linear_kernel, gaussian_kernel
from kernels import center_kernel, center_kernel_fast
from kernels import center_kernel_oos, center_kernel_oos_fast
from gch_init import gch_init
from gch_run import gch_run

# Utilities
import h5py
import json
import subprocess
from project_utils import load_structures_from_hdf5

# SOAP
from soap import quippy_soap, librascal_soap

In [2]:
cutoff = 6.0

In [21]:
component_idxs = np.loadtxt(f'../Processed_Data/DEEM_10k/Data/{cutoff}/FPS_components.idxs', 
                            usecols=0, dtype=np.int)

In [23]:
# Load SOAP hyperparameters
# TODO: load from hyperparamters file
centers = [14] # Center on Si, take Si and O in environment
n = 12
l = 9

soap_args = dict(max_radial=n,
                 max_angular=l,
                 interaction_cutoff=6.0, 
                 cutoff_smooth_width=0.3, 
                 gaussian_sigma_constant=0.3, 
                 component_idxs=component_idxs)

In [4]:
# Load kernel hyperparamters
model_file = f'../Processed_Data/DEEM_10k/Models/{cutoff}/volumes_mae_parameters.json'

with open(model_file, 'r') as f:
    model_dict = json.load(f)

kernel_type = model_dict['kernel_type']
gamma = model_dict['gamma']

In [8]:
# Load SOAPs
deem_file = f'../Processed_Data/DEEM_10k/Data/{cutoff}/soaps.hdf5'
deem_soaps = load_structures_from_hdf5(deem_file, datasets=None, concatenate=False)
deem_soaps = deem_soaps[0::100]

iza_file = f'../Processed_Data/IZA_226onDEEM_10k/Data/{cutoff}/soaps.hdf5'
iza_soaps = load_structures_from_hdf5(iza_file, datasets=None, concatenate=False)

In [9]:
# Build kernel
K = build_kernel(iza_soaps+deem_soaps, iza_soaps+deem_soaps,
                 kernel=kernel_type, gamma=gamma)

np.savetxt('K.dat', K)

100%|██████████| 326/326 [01:11<00:00,  4.55it/s]


In [10]:
# Concatenate IZA and DEEM xyz files
deem = aseio.read('../Raw_Data/GULP/DEEM_10k/DEEM_10000_OPT.xyz', index=':')
iza = aseio.read('../Raw_Data/GULP/IZA_226/IZA_OPT.xyz', index=':')
aseio.write('iza+deem.xyz', iza+deem[0::100], format='extxyz')

# Prepare energies
energies = []
for i in iza:
    energies.append(i.info['Energy_per_Si'])

for d in deem[0::100]:
    energies.append(d.info['Energy_per_Si'])

np.savetxt('energies_per_si.dat', energies)

In [18]:
# Initialize GCH
pk = 'K.dat' # File containing the kernel
pnrg = 'energies_per_si.dat' # File containing the energies
setxyz = 'iza+deem.xyz' # File containing the structures
wdir_local = './rattled' # Directory in which to save the rattled reference structures
s_c = 0.02 # Uncertainty in cell between structures
s_e = 1.0 # Uncertainty in energy
ndim = 2 # GCH dimensions
numref = 10 # Number of reference structures
numshaken = 5 # Number of rattled structures per reference
conv = 0.20 # Convergence threshold: 100/conv hulls are constructed
mode = 'fps' # Selection mode for the reference structures

gch_init(pk, pnrg, setxyz, wdir_local, s_c, s_e, ndim, numref, numshaken, conv, mode)

Loading the kernel matrix, it can take a minute if thousands of elements
/scratch/helfrech/Sync/GDrive/Projects/Zeolites_IZA-DEEM2/Scripts/./rattled
DONE: Loaded data
Initializing statistical sampling of the fuzzy GCH
Uncertainty in Cartesian positions 0.05523075798022895
DONE ! go to /scratch/helfrech/Sync/GDrive/Projects/Zeolites_IZA-DEEM2/Scripts/./rattled/ to see what's in there


In [24]:
# Compute SOAPs for shaken structures
shaken_refs = aseio.read('./rattled/shaketraj.xyz', index=':')

shaken_ref_soaps = librascal_soap(shaken_refs, centers, **soap_args)

100%|██████████| 60/60 [00:00<00:00, 72.11it/s]


In [28]:
# Compute kernel for shaken structures
K_rattled = build_kernel(shaken_ref_soaps, iza_soaps+deem_soaps,
                 kernel=kernel_type, gamma=gamma)
np.savetxt('./rattled/K.dat', K_rattled)

100%|██████████| 60/60 [00:09<00:00,  6.01it/s]


In [29]:
# Run GCH
shk = './rattled/K.dat' # File containing the kernel for the rattled structures
wdir = './rattled' # Directory in which the rattled reference structures reside
mp = 0.99 # Cutoff probability for determining the GCH vertices
gch_run(shk, wdir, mp)

We will load both the dataset kernel and the shaken kernel, it could take some minutes in case of thousands of structures..
Centering!
  And now we build a projection 
Done, super quick. 
You have selected 500 convex hulls samples per pruning iterations
Statistical sampling of the fuzzy GCH
GCH construction :  0.05364632606506348  sec
Single Hull construction during before pruning :  0.05415201187133789  sec
Iteration :  200  in  500
Iteration :  400  in  500
 Let's start pruning! 
printing rr_pfile.shape
(12, 33)
Iteration :  200  in  500
Iteration :  400  in  500
0.19
Pruning iter :  1  min prob:  0.19  # vertex :  12
printing rr_pfile.shape
(9, 33)
Iteration :  200  in  500
Iteration :  400  in  500
0.358
Pruning iter :  2  min prob:  0.358  # vertex :  9
printing rr_pfile.shape
(7, 33)
Iteration :  200  in  500
Iteration :  400  in  500
0.502
Pruning iter :  3  min prob:  0.502  # vertex :  7
printing rr_pfile.shape
(6, 33)
Iteration :  200  in  500
Iteration :  400  in  500
0.66
P

In [30]:
vprobprune = np.loadtxt('./rattled/vprobprune.dat')
vprobprune.shape

(6, 326)