In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# System
import os
import sys
sys.path.append('/home/helfrech/Tools/Toolbox/utils')

# Maths
import numpy as np
from scipy.spatial import Voronoi, ConvexHull

# Plotting
import matplotlib.pyplot as plt

# Atoms
from ase.io import read, write

# ML
from skcosmo.decomposition import PCovR
from sklearn.linear_model import Ridge

# Utilities
import h5py
import json
import subprocess
import glob
from copy import deepcopy
import project_utils as utils
from tools import save_json, load_json, recursive_array_convert

# SOAP
from soap import librascal_soap

# Functions

In [3]:
# Compute the hull distances
def hull_distances(hull, data):
        
    # Omit the simplices on the 'top' of the CH
    top_idxs = np.nonzero(hull.equations[:, 0] > 0.0)[0]
    
    hull_facets = np.delete(
        hull.equations,
        top_idxs,
        axis=0
    )
    
    hull_simplices = np.delete(
        hull.simplices,
        top_idxs,
        axis=0
    )
    
    hull_vertices = np.unique(hull_simplices)

    hull_distance = -1.0 * (
        np.matmul(data, hull_facets[:, 0:-1].T) 
        + hull_facets[:, -1]
    )
    
    hull_distance_energy = -1.0 * hull_distance / hull_facets[:, 0]
    
    hull_distance = np.amin(hull_distance, axis=1)
    hull_distance_energy = np.amin(hull_distance_energy, axis=1)
    
    return hull_distance, hull_distance_energy, hull_vertices

# Model setup

In [4]:
cutoff = 6.0
spectrum = 'power'
spectrum_name = spectrum.capitalize()
n_cantons = 4
group_name = 'OO+OSi+SiSi'
mixing_suffixes = ['', '_0.0', '_1.0']
df_types = ['OvR', 'OvO']

In [5]:
model_dir = '../Processed_Data/Models'

deem_name = 'DEEM_330k'
iza_name = 'IZA_230'
deem_dir = f'../Processed_Data/{deem_name}/Data'
iza_dir = f'../Processed_Data/{iza_name}/Data'

In [6]:
# Load train sets for IZA and Deem
iza_train_idxs = np.loadtxt(f'../Processed_Data/IZA_230/svm_train.idxs', dtype=int)

deem_train_idxs = np.loadtxt('../Processed_Data/DEEM_330k/svm_train.idxs', dtype=int)

# Load test sets for IZA and Deem
iza_test_idxs = np.loadtxt('../Processed_Data/IZA_230/svm_test.idxs', dtype=int)

deem_test_idxs = np.loadtxt('../Processed_Data/DEEM_330k/svm_test.idxs', dtype=int)

iza_idxs = np.concatenate((iza_train_idxs, iza_test_idxs))
deem_idxs = np.concatenate((deem_train_idxs, deem_test_idxs))

In [7]:
iza_frames = read('../Raw_Data/GULP/IZA_230/IZA_230.xyz', index=':')
deem_frames = read('../Raw_Data/DEEM_330k/XYZ/DEEM_331172.xyz', index=':')
frames = [iza_frames[i] for i in iza_idxs] + [deem_frames[i] for i in deem_idxs]

In [8]:
deem_energies = np.loadtxt('../Raw_Data/GULP/DEEM_330k/optimization_summary.dat', usecols=(1, 2)) # 1=Ref, 2=GULP
iza_energies = np.loadtxt('../Raw_Data/GULP/IZA_230/optimization_summary_fix.dat', usecols=1) # 1=GULP
energies = np.concatenate((iza_energies[iza_idxs], deem_energies[deem_idxs, 1]))

# Build a convex hull based on PCovR projections

In [9]:
 # CH dimensions (includes energy)
ndim = 3

## Build a standard convex hull

In [10]:
for df_type in df_types:
    print(f'===== {df_type} =====')
    
    ch_dir = f'{model_dir}/{cutoff}/CH/{df_type}/{n_cantons}-Class/{spectrum_name}/{group_name}'
    os.makedirs(ch_dir, exist_ok=True)
    
    model_data_dir = f'LPCovR/{df_type}/{n_cantons}-Class/{spectrum_name}/{group_name}'

    # Save frames and energies in duplicate
    # for compatibility with the (deprecated) GCH framework
    write(f'{ch_dir}/iza+deem.xyz', frames, format='extxyz')
    np.savetxt(f'{ch_dir}/energies_per_si.dat', energies)
    
    for mixing in mixing_suffixes:
        if mixing == '':
            print('----- CH for PCovR with optimal mixing -----')
        else:
            print(f'----- CH for PCovR with mixing = {mixing[1:]} -----')
        
        T = np.concatenate((
            utils.load_hdf5(
                f'{iza_dir}/{cutoff}/{model_data_dir}/pcovr_structure_projections{mixing}.hdf5',
            )[iza_idxs],
            utils.load_hdf5(
                f'{deem_dir}/{cutoff}/{model_data_dir}/pcovr_structure_projections{mixing}.hdf5',
            )[deem_idxs]
        ))

        # Load projections
        data = np.column_stack((energies, T[:, 0:ndim-1]))
        ch = ConvexHull(data)

        d, de, v = hull_distances(ch, data)
        
        # Save projections and distances.
        # Projections are saved in duplicate
        # for compatibility with the (deprecated) GCH framework
        np.savetxt(f'{ch_dir}/T{mixing}.dat', T)
        np.savetxt(f'{ch_dir}/hull_distances{mixing}.dat', d)
        np.savetxt(f'{ch_dir}/hull_distances_energy{mixing}.dat', de)
        
        np.savetxt(f'{ch_dir}/hull_vertices{mixing}.dat', v, fmt='%d')

===== OvR =====
----- CH for PCovR with optimal mixing -----
----- CH for PCovR with mixing = 0.0 -----
----- CH for PCovR with mixing = 1.0 -----
===== OvO =====
----- CH for PCovR with optimal mixing -----
----- CH for PCovR with mixing = 0.0 -----
----- CH for PCovR with mixing = 1.0 -----
