In [5]:
# System
import os
import sys
sys.path.append('/home/helfrech/Tools/Toolbox/utils')

# Maths
import numpy as np

# ML
from regression import SparseKRR
from kernels import build_kernel, linear_kernel, gaussian_kernel

# Utilities
import h5py
import json

# Initial setup

In [None]:
# Load SOAP cutoffs
with open('../Processeed_Data/soap_hyperparameters.json', 'r') as f:
    soap_hyperparameters = json.load(f)
    
cutoffs = soap_hyperparameters['interaction_cutoff']

In [None]:
# Load train and test sets
train_idxs = np.loadtxt('../Processed_Data/DEEM_10k/train_idxs', dtype=int)
test_idxs = np.loadtxt('../Processed_Data/DEEM_10k/test_idxs', dtype=int)

In [None]:
representative_soaps = {}

for cutoff in cutoffs:
    work_dir = '../Processed_Data/DEEM_10k/Data/{cutoff}'
    n_Si = np.loadtxt('{work_dir}/n_Si.dat', dtype=int)
    split_idxs = np.cumsum(n_Si)[0:-1]
    representative_idxs = np.loadtxt('{work_dir}/FPS_representatives.idxs', dtype=int)
    soaps_file = '{work_dir}/soaps.hdf5'
    representative_soaps['{cutoff}'] = build_representatives_from_hdf5(soaps_file, representative_idxs, split_idxs)

In [None]:
property_names = ['volumes', 'energies']

# Functions

In [None]:
def transform_skrr(skrrs, property_names, representative_soaps, kernel_type, kernel_parameters, work_dir='.'):
    
    # Read SOAPs
    soaps = load_structures_from_hdf5('{work_dir}/soaps.hdf5')
    
    KNM = build_kernel(soaps, representative_soaps,
                       kernel=kernel_type, **kernel_parameters)
    KNM_environments = build_kernel(np.concatenate(soaps, axis=0), representative_soaps,
                                    kernel=kernel_type, **kernel_parameters)
    for pn in property_names:

        # Predict based on provided SparseKRR object
        Yp_structures = skrrs[pn].transform(KNM)
        Yp_environments = skrrs[pn].transform(KNM_environments)
        
        np.savetxt('{work_dir}/predicted_structure_{pn}.dat', Yp_structures)
        np.savetxt('{work_dir}/predicted_environment_{pn}.dat', Yp_environments)

# DEEM_10k

In [None]:
deem_10k_structure_volumes = np.loadtxt('../Processed_Data/DEEM_10k/structure_volumes.dat')
deem_10k_structure_energies = np.loadtxt('../Processed_Data/DEEM_10k/structure_energies.dat')
deem_10k_structure_properties = [deem_10k_structure_volumes, deem_10k_structure_energies]

In [None]:
for cutoff in cutoffs:
            
    # Read SOAPs
    deem_10k = load_structures_from_hdf5('../Processed_Data/DEEM_10k/Data/{cutoff}/soaps.hdf5')
    
    # Load kernel parameters
    with open('../Processed_Data/DEEM_10k/Data/{cutoff}/kernel_parameters.json', 'r') as f:
        kernel_dict = json.load(f)
    
    kernel_type = kernel_dict['kernel_type']
    kernel_parameters = kernel_dict['kernel_parameters']

    # Build kernels
    KMM = build_kernel(representative_soaps, representative_soaps,
                       kernel=kernel_type, **kernel_parameters)
    KNM_train = build_kernel([deem_10k[i] for i in train_idxs], representative_soaps,
                             kernel=kernel_type, **kernel_parameters)
    KNM_test = build_kernel([deem_10k[i] for i in test_idxs], representative_soaps,
                            kernel=kernel_type, **kernel_parameters)
    KNM_environments = build_kernel(np.concatenate(deem_10k, axis=0), representative_soaps,
                                  kernel=kernel_type, **kernel_parameters)
    
    for pn, Y in zip(property_names, deem_10k_structure_properties):
        
        Yp_structures = np.zeros(len(deem_10k))
        delta = np.var(Y[train_idxs]) * KMM.shape[0] / np.trace(KMM)
        
        # TODO: load regularizations, sigmas
    
        # Initialize sparse KRR
        skrr = SparseKRR(sigma=sigma, reg=reg, rcond=None)
        skrr.fit(delta*KNM_train, delta*KMM, delta*Y[train_idxs])
        
        Yp_structures[train_idxs] = skrr.transform(KNM_train)
        Yp_structures[test_idxs] = skrr.transform(KNM_test)
        Yp_environments = skrr.transform(KNM_environments)
        
        # Pickle the model
        # TODO: account for numpy arrays
        with open('../Processed_Data/DEEM_10k/Models/{cutoff}/skrr_{pn}.json', 'w') as f:
            json.dump(skrr.__dict__, f)       
        
        np.savetxt('../Processed_Data/DEEM_10k/Data/{cutoff}/predicted_structure_{pn}.dat', Yp_structures)
        np.savetxt('../Processed_Data/DEEM_10k/Data/{cutoff}/predicted_environment_{pn}.dat', Yp_environments)

# IZA_226 on DEEM_10k

In [None]:
for cutoff in cutoffs:
    work_dir = '../Processed_Data/IZA_226onDEEM_10k/Data/{cutoff}'
    
    # Load kernel parameters
    with open('../Processed_Data/DEEM_10k/Data/{cutoff}/kernel_parameters.json', 'r') as f:
        kernel_dict = json.load(f)
    
    kernel_type = kernel_dict['kernel_type']
    kernel_parameters = kernel_dict['kernel_parameters']
    
    skrrs = {}
    
    for pn in property_names:
        
        # TODO: build objects and set __dict__
        with open('../Processed_Data/DEEM_10k/Models/{cutoff}/skrr_{pn}', 'r') as f:
            skrrs[pn] = json.load(f)
    
    transform_skrr(skrrs, property_names, representative_soaps['{cutoff}'], 
                   kernel_type, kernel_parameters, work_dir=work_dir)

# COD_196 on DEEM_10k