In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
# System
import os
import sys
sys.path.append('/home/helfrech/Tools/Toolbox/utils')

# Maths
import numpy as np
from scipy.spatial import Voronoi, ConvexHull

# Plotting
import matplotlib.pyplot as plt

# Atoms
from ase.io import read, write

# ML
from skcosmo.decomposition import PCovR
from sklearn.linear_model import Ridge

# Utilities
import h5py
import json
import subprocess
import glob
from copy import deepcopy
import project_utils as utils
from tools import save_json, load_json, recursive_array_convert

# SOAP
from soap import librascal_soap

# Functions

In [10]:
# Compute the hull distances
def hull_distances(hull, data):
        
    # Omit the simplices on the 'top' of the CH
    hull_facets = np.delete(
        hull.equations,
        np.nonzero(hull.equations[:, 0] > 0.0),
        axis=0
    )
    
    hull_distance = -1.0 * (
        np.matmul(data, hull_facets[:, 0:-1].T) 
        + hull_facets[:, -1]
    )
    
    hull_distance_energy = -1.0 * hull_distance / hull_facets[:, 0]
    
    hull_distance = np.amin(hull_distance, axis=1)
    hull_distance_energy = np.amin(hull_distance_energy, axis=1)
    
    return hull_distance, hull_distance_energy

# Model setup

In [11]:
cutoff = 6.0
spectrum = 'power'
spectrum_name = spectrum.capitalize()
n_cantons = 4
group_name = 'OO+OSi+SiSi'
mixing_suffixes = ['', '_0.0', '_1.0']
df_types = ['OvR', 'OvO']

In [12]:
model_dir = '../Processed_Data/Models'

deem_name = 'DEEM_330k'
iza_name = 'IZA_230'
deem_dir = f'../Processed_Data/{deem_name}/Data'
iza_dir = f'../Processed_Data/{iza_name}/Data'

In [13]:
# Load train sets for IZA and Deem
iza_train_idxs = np.loadtxt(f'../Processed_Data/IZA_230/svm_train.idxs', dtype=int)
iza_sort_train_idxs = np.argsort(iza_train_idxs)
iza_unsort_train_idxs = np.argsort(iza_sort_train_idxs)

deem_train_idxs = np.loadtxt('../Processed_Data/DEEM_330k/svm_train.idxs', dtype=int)

# Load test sets for IZA and Deem
iza_test_idxs = np.loadtxt('../Processed_Data/IZA_230/svm_test.idxs', dtype=int)
iza_sort_test_idxs = np.argsort(iza_test_idxs)
iza_unsort_test_idxs = np.argsort(iza_sort_test_idxs)

deem_test_idxs = np.loadtxt('../Processed_Data/DEEM_330k/svm_test.idxs', dtype=int)

In [14]:
iza_frames = read('../Raw_Data/GULP/IZA_230/IZA_230.xyz', index=':')
deem_frames = read('../Raw_Data/DEEM_330k/XYZ/DEEM_331172.xyz', index=':')
frames = [iza_frames[i] for i in iza_test_idxs] + [deem_frames[i] for i in deem_test_idxs]

In [15]:
deem_energies = np.loadtxt('../Raw_Data/GULP/DEEM_330k/optimization_summary.dat', usecols=(1, 2)) # 1=Ref, 2=GULP
iza_energies = np.loadtxt('../Raw_Data/GULP/IZA_230/optimization_summary_fix.dat', usecols=1) # 1=GULP
train_energies = np.concatenate((iza_energies[iza_train_idxs], deem_energies[deem_train_idxs, 1]))
test_energies = np.concatenate((iza_energies[iza_test_idxs], deem_energies[deem_test_idxs, 1]))

# Center energies relative to the train set
energy_scaler = utils.StandardNormScaler(with_mean=True, with_scale=False)
train_energies = energy_scaler.fit_transform(train_energies)
test_energies = energy_scaler.transform(test_energies)

# Build a convex hull based on PCovR projections

In [16]:
 # CH dimensions (includes energy)
ndim = 3

## Build a standard convex hull

In [17]:
for df_type in df_types:
    print(f'===== {df_type} =====')
    
    ch_dir = f'{model_dir}/{cutoff}/CH_TESTSET/{df_type}/{n_cantons}-Class/{spectrum_name}/{group_name}'
    os.makedirs(ch_dir, exist_ok=True)
    
    model_data_dir = f'LPCovR/{df_type}/{n_cantons}-Class/{spectrum_name}/{group_name}'

    # Save frames and energies in duplicate
    # for compatibility with the (deprecated) GCH framework
    write(f'{ch_dir}/iza+deem.xyz', frames, format='extxyz')
    np.savetxt(f'{ch_dir}/energies_per_si.dat', test_energies)
    
    for mixing in mixing_suffixes:
        if mixing == '':
            print('----- CH for PCovR with optimal mixing -----')
        else:
            print(f'----- CH for PCovR with mixing = {mixing[1:]} -----')
                
        T = np.concatenate((
            utils.load_hdf5(
                f'{iza_dir}/{cutoff}/{model_data_dir}/pcovr_structure_projections{mixing}.hdf5',
                indices=iza_test_idxs[iza_sort_test_idxs]
            )[iza_unsort_test_idxs],
            utils.load_hdf5(
                f'{deem_dir}/{cutoff}/{model_data_dir}/pcovr_structure_projections{mixing}.hdf5',
                indices=deem_test_idxs
            )
        ))

        # Load projections
        data = np.column_stack((test_energies, T[:, 0:ndim-1]))
        ch = ConvexHull(data)

        d, de = hull_distances(ch, data)
        
        # Save projections and distances.
        # Projections are saved in duplicate
        # for compatibility with the (deprecated) GCH framework
        np.savetxt(f'{ch_dir}/T{mixing}.dat', T)
        np.savetxt(f'{ch_dir}/hull_distances{mixing}.dat', d)
        np.savetxt(f'{ch_dir}/hull_distances_energy{mixing}.dat', de)
        np.savetxt(f'{ch_dir}/hull_vertices{mixing}.dat', ch.vertices, fmt='%d')

===== OvR =====
----- CH for PCovR with optimal mixing -----
----- CH for PCovR with mixing = 0.0 -----
----- CH for PCovR with mixing = 1.0 -----
===== OvO =====
----- CH for PCovR with optimal mixing -----
----- CH for PCovR with mixing = 0.0 -----
----- CH for PCovR with mixing = 1.0 -----


# Voronoi tesselation