In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# System
import os
import sys
sys.path.append('/home/helfrech/Tools/Toolbox/utils')

# Maths
import numpy as np

# Plotting
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

# ML
from regression import PCovR, KPCovR, SparseKPCovR
from regression import LR, KRR
from kernels import build_kernel, linear_kernel, gaussian_kernel
from kernels import center_kernel, center_kernel_fast
from kernels import center_kernel_oos, center_kernel_oos_fast
from soap import compute_soap_density, reshape_soaps
from soap import rrw_neighbors, make_tuples

from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import LogisticRegression

# Utilities
import h5py
import json
import itertools
from tqdm.notebook import tqdm
import project_utils as utils
from tools import load_json

# Import COSMO style toolkit
import cosmoplot.colorbars as cosmocbars
import cosmoplot.utils as cosmoutils
import cosmoplot.style as cosmostyle

cosmostyle.set_style('article')
colorList = cosmostyle.color_cycle

In /home/helfrech/.config/matplotlib/stylelib/cosmo.mplstyle: 
The savefig.frameon rcparam was deprecated in Matplotlib 3.1 and will be removed in 3.3.
In /home/helfrech/.config/matplotlib/stylelib/cosmoLarge.mplstyle: 
The savefig.frameon rcparam was deprecated in Matplotlib 3.1 and will be removed in 3.3.


The savefig.frameon rcparam was deprecated in Matplotlib 3.1 and will be removed in 3.3.



In [3]:
# sys.path.append('/scratch/helfrech/Sync/GDrive/Projects/KPCovR/kernel-tutorials')
# sys.path.append('/scratch/helfrech/Sync/GDrive/Projects/KPCovR/KernelPCovR/analysis/scripts')
# from utilities.sklearn_covr.kpcovr import KernelPCovR as KPCovR2
# from utilities.sklearn_covr.pcovr import PCovR as PCovR2
# from helpers import l_regr, l_kpcovr

# Load train and test splits

In [6]:
# Load SOAP cutoffs
with open('../Processed_Data/soap_hyperparameters.json', 'r') as f:
    soap_hyperparameters = json.load(f)
    
cutoffs = soap_hyperparameters['interaction_cutoff']

In [5]:
# Load train and test set indices for Deem
idxs_deem_train = np.loadtxt('../Processed_Data/DEEM_10k/train.idxs', dtype=int)
idxs_deem_test = np.loadtxt('../Processed_Data/DEEM_10k/test.idxs', dtype=int)

# Total number of structures
n_deem_train = idxs_deem_train.size
n_deem_test = idxs_deem_test.size
n_deem = n_deem_train + n_deem_test

In [10]:
# Make dummy DEEM cantons
cantons_deem = np.ones(n_deem, dtype=int) * 4

In [None]:
# Load IZA cantons
cantons_iza = np.loadtxt('../Raw_Data/GULP/IZA_226/cantons.txt', usecols=1, dtype=int)
RWY = np.nonzero(cantons_iza == 4)[0][0]

In [7]:
cantons_iza = np.delete(cantons_iza, RWY)
n_iza = len(cantons_iza)

In [9]:
idxs_iza_train_file = 'TODO' 
idxs_iza_test_file = 'TODO'

try:
    idxs_iza_train = np.loadtxt(idxs_iza_train_file, dtype=int)
    idxs_iza_test = np.loadtxt(idxs_iza_test_file, dtype=int)

except IOError:

    # Select IZA sample
    # TODO: try loading existing indices first, otherwise generate and save
    n_iza_train = n_iza // 2
    n_iza_test = n_iza - n_iza_train
    idxs_iza = np.arange(0, n_iza)
    np.random.shuffle(idxs_iza)

    idxs_iza_train = idxs_iza[0:n_iza_train]
    idxs_iza_test = idxs_iza[n_iza_train:]
    
    np.savetxt(idxs_iza_train_file, idxs_iza_train, fmt='%d')
    np.savetxt(idxs_iza_test_file, idxs_iza_test, fmt='%d')

In [None]:
# Build set of "master" canton labels
cantons_train = {}
cantons_test = {}

In [17]:
cantons_train[4] = np.concatenate((cantons_iza[idxs_iza_train], cantons_deem[idxs_deem_train]))
cantons_test[4] = np.concatenate((cantons_iza[idxs_iza_test], cantons_deem[idxs_deem_test]))

cantons_train[2] = np.concatenate((np.ones(len(idxs_iza_train), dtype=int),
                                   np.ones(len(idxs_deem_train), dtype=int) * 2))
cantons_test[2] = np.concatenate((np.ones(len(idxs_iza_test), dtype=int),
                                  np.ones(len(idxs_deem_test), dtype=int) * 2))

# Model setup

In [None]:
model_dir = '../Processed_Data/Models'

deem_name = 'DEEM_10k'
iza_name = 'IZA_226onDEEM_10k'
deem_dir = f'../Processed_Data/{deem_name}/Data'
iza_dir = f'../Processed_Data/{iza_name}/Data'

In [None]:
# Global model parameters
# TODO: or use .get_params()?
# TODO: load from optimization?
svc_kwargs = dict(linear=dict(penalty='l2',
                              loss='squared_hinge',
                              dual=False,
                              multi_class='ovr',
                              class_weight=None,
                              fit_intercept=True,
                              intercept_scaling=1.0,
                              tol=1.0E-3),
                  kernel=dict(kernel='precomputed',
                              decision_function_shape='ovr',
                              class_weight=None,
                              break_ties=False,
                              tol=1.0E-3))

pcovr_kwargs = dict(linear=dict(n_components=None, alpha=0.0, regularization=1.0E-12),
                    kernel=dict(n_components=None, alpha=0.0, regularization=1.0E-12))

In [None]:
deem_train_slice = slice(n_iza_train, None)
deem_test_slice = slice(n_iza_test, None)
iza_train_slice = slice(0, n_iza_train)
iza_test_slice = slice(0, n_iza_test)

# Build kernels

In [None]:
# TODO: this can probably just stay in the optimization notebook once testing is done,
# it isn't really needed here if it is already in the optimization notebook
# Hmm...but the kernels will be different, right? maybe need to keep both

In [None]:
# Load the kernels or compute if they don't exist
for cutoff in cutoffs:
    for kernel_type in ('linear', 'gaussian'):
        kernel_name = kernel_type.capitalize()
        
        work_dir = f'{model_dir}/{cutoff}/Kernel_Models/{kernel_name}'
        
        # File to store kernels for re-use
        kernel_file = f'{work_dir}/structure_kernels.hdf5'
        kernel_parameter_file = f'{work_dir}/volumes_mae_parameters.json'

        if not os.path.exists(kernel_file):
            
            # SOAP files (atomwise, FPS'ed features)
            deem_file = f'{deem_dir}/{cutoff}/soaps.hdf5'
            iza_file = f'{iza_dir}/{cutoff}/soaps.hdf5'

            # Assemble the train and test set SOAPs from IZA and DEEM
            soaps_train, soaps_test = utils.load_soaps(deem_file, iza_file,
                                                       idxs_deem_train, idxs_deem_test,
                                                       idxs_iza_train, idxs_iza_test,
                                                       idxs_iza_delete=[RWY])

            # Compute kernels
            kernel_parameters = load_json(kernel_parameter_file)
            K_train, K_test, K_test_test = \
                utils.compute_kernels(soaps_train, soaps_test, **kernel_parameters, kernel_file=kernel_file)

# Kernel models

## KernelSVC

In [None]:
for cutoff in cutoffs:
    for kernel_type in ('linear', 'gaussian'):
        kernel_name = kernel_type.capitalize()
        
        kernel_dir = f'{model_dir}/{cutoff}/Kernel_Models/{kernel_name}'
        
        # File to store kernels for re-use
        kernel_file = f'{kernel_dir}/structure_kernels.hdf5'
        
        # Load kernels
        K_train, K_test, K_test_test = utils.load_kernels(kernel_file)

        # Center and scale kernels
        K_train, K_test, K_test_test = \
            utils.preprocess_kernels(K_train, K_test=[K_test], K_test_test=[K_test_test])
        
        for n_cantons in (2, 4):
            # TODO: MOVE KERNELS BEFORE RUNNING!
            
            # Directory to put all the output data in
            output_dir = f'Kernel_Models/{kernel_name}/KSVC-KPCovR/{n_cantons}-class'
            
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            
            # Files to store IZA and DEEM KSVC decision functions
            svc_df_deem_file = f'{deem_dir}/{cutoff}/{output_dir}/svc_structure_dfs.dat'
            svc_df_iza_file = f'{iza_dir}/{cutoff}/{output_dir}/svc_structure_dfs.dat'
            
            # Files to store IZA and DEEM KSVC class predictions
            svc_canton_deem_file = f'{deem_dir}/{cutoff}/{output_dir}/svc_structure_cantons.dat'
            svc_canton_iza_file = f'{iza_dir}/{cutoff}/{output_dir}/svc_structure_cantons.dat'
            
            # Files containing the hyperparameters for the kernel, KSVC, and KPCovR
            parameter_dir = f'{kernel_dir}/KSVC-KPCovR/{n_cantons}-class'
            svc_parameter_file = f'{parameter_dir}/svc_parameters.json'

            # Run KSVC
            svc_parameters = load_json(svc_parameter_file)
            df_train, df_test, predicted_cantons_train, predicted_cantons_test = \
                utils.do_svc(K_train, K_test, cantons_train[n_cantons], cantons_test[n_cantons], 
                             svc_type='kernel', **svc_parameters,
                             outputs=['decision_functions', 'predictions'])
            
            # Save IZA and DEEM KSVC decision functions
            utils.split_and_save(df_train, df_test,
                           idxs_deem_train, idxs_deem_test,
                           deem_train_slice, deem_test_slice,
                           hdf5_attrs=None, 
                           output=svc_df_deem_file, output_format='%f')
            
            utils.split_and_save(df_train, df_test,
                                 idxs_iza_train, idxs_iza_test,
                                 iza_train_slice, iza_test_slice,
                                 hdf5_attrs=None, 
                                 output=svc_df_iza_file, output_format='%f')
            
            # Save IZA and DEEM KSVC canton predictions
            utils.split_and_save(predicted_cantons_train, predicted_cantons_test,
                                 idxs_deem_train, idxs_deem_test,
                                 deem_train_slice, deem_test_slice,
                                 hdf5_attrs=None, 
                                 output=svc_cantons_deem_file, output_format='%f')
            
            utils.split_and_save(predicted_cantons_train, predicted_cantons_test,
                                 idxs_iza_train, idxs_iza_test,
                                 iza_train_slice, iza_test_slice,
                                 hdf5_attrs=None, 
                                 output=svc_cantons_iza_file, output_format='%f')

## KRR check

In [None]:
for cutoff in cutoffs:
    for kernel_type in ('linear', 'gaussian'):
        kernel_name = kernel_type.capitalize()
        
        kernel_dir = f'{model_dir}/{cutoff}/Kernel_Models/{kernel_name}'
        
        # File to store kernels for re-use
        kernel_file = f'{kernel_dir}/structure_kernels.hdf5'
        
        # Load kernels
        K_train, K_test, K_test_test = utils.load_kernels(kernel_file)

        # Center and scale kernels
        K_train, K_test, K_test_test = \
            utils.preprocess_kernels(K_train, K_test=[K_test], K_test_test=[K_test_test])
        
        for n_cantons in (2, 4):
            
            # Directory to put all the output data in
            output_dir = f'Kernel_Models/{kernel_name}/KSVC-KPCovR/{n_cantons}-class'
            
            # Files to store IZA and DEEM KSVC decision functions
            svc_df_deem_file = f'{deem_dir}/{cutoff}/{output_dir}/svc_structure_dfs.dat'
            svc_df_iza_file = f'{iza_dir}/{cutoff}/{output_dir}/svc_structure_dfs.dat'
            
            df_train, df_test = utils.load_data(svc_df_deem_file, svc_df_iza_file,
                                                idxs_deem_train, idxs_deem_test,
                                                idxs_iza_train, idxs_iza_test)
            
            # Center and scale the decision functions
            df_train, df_test, df_center, df_scale = \
                utils.preprocess_decision_functions(df_train, df_test)

            # Check that KRR can reproduce the decision functions
            utils.regression_check(K_train, K_test, df_train, df_test, regression_type='kernel')

## KPCovR

In [None]:
for cutoff in cutoffs:
    for kernel_type in ('linear', 'gaussian'):
        kernel_name = kernel_type.capitalize()
        
        kernel_dir = f'{model_dir}/{cutoff}/Kernel_Models/{kernel_name}'
        
        # File to store kernels for re-use
        kernel_file = f'{kernel_dir}/structure_kernels.hdf5'
        
        # Load kernels
        K_train, K_test, K_test_test = utils.load_kernels(kernel_file)

        # Center and scale kernels
        K_train, K_test, K_test_test = \
            utils.preprocess_kernels(K_train, K_test=[K_test], K_test_test=[K_test_test])
        
        for n_cantons in (2, 4):
            
            # Directory to put all the output data in
            output_dir = f'Kernel_Models/{kernel_name}/KSVC-KPCovR/{n_cantons}-class'
            
            # Files to store IZA and DEEM KSVC decision functions
            svc_df_deem_file = f'{deem_dir}/{cutoff}/{output_dir}/svc_structure_dfs.dat'
            svc_df_iza_file = f'{iza_dir}/{cutoff}/{output_dir}/svc_structure_dfs.dat'
            
            # Files to store IZA and DEEM KPCovR projections
            pcovr_projection_deem_file = f'{deem_dir}/{cutoff}/{output_dir}/pcovr_structures.hdf5' 
            pcovr_projection_iza_file = f'{iza_dir}/{cutoff}/{output_dir}/pcovr_structures.hdf5'
            
            # Files to store IZA and DEEM KPCovR decision functions
            pcovr_df_deem_file = f'{deem_dir}/{cutoff}/{output_dir}/pcovr_structure_dfs.dat'
            pcovr_df_iza_file = f'{iza_dir}/{cutoff}/{output_dir}/pcovr_structure_dfs.dat'
            
            # Files to store IZA and DEEM KPCovR class predictions
            pcovr_canton_deem_file = f'{deem_dir}/{cutoff}/{output_dir}/pcovr_structure_cantons.dat'
            pcovr_canton_iza_file = f'{iza_dir}/{cutoff}/{output_dir}/pcovr_structure_cantons.dat'
            
            parameter_dir = f'{kernel_dir}/KSVC-KPCovR/{n_cantons}-class'
            pcovr_parameter_file = f'{parameter_dir}/pcovr_parameters.json'
            
            df_train, df_test = utils.load_data(svc_df_deem_file, svc_df_iza_file,
                                                idxs_deem_train, idxs_deem_test,
                                                idxs_iza_train, idxs_iza_test)
            
            # Center and scale the decision functions
            df_train, df_test, df_center, df_scale = \
                utils.preprocess_decision_functions(df_train, df_test)
            
            # Run KPCovR
            pcovr_parameters = load_json(pcovr_parameter_file)
            T_train, T_test, dfp_train, dfp_test = \
                utils.do_covr(K_train, K_test, df_train, df_test, covr_type='kernel')
            
            # Post process the KPCovR decision functions
            # (i.e., turn them back into canton predictions)
            predicted_cantons_train, predicted_cantons_test = \
                utils.postprocess_decision_functions(dfp_train, dfp_test, df_center, df_scale)
            
            # Save IZA and DEEM KPCovR projections
            utils.split_and_save(T_train, T_test,
                                 idxs_deem_train, idxs_deem_test,
                                 deem_train_slice, deem_test_slice,
                                 hdf5_attrs=covr_parameters['kernel'],
                                 output=pcovr_projection_deem_file, output_format='%f')
            
            utils.split_and_save(T_train, T_test,
                                 idxs_iza_train, idxs_iza_test,
                                 iza_train_slice, iza_test_slice
                                 hdf5_attrs=covr_parameters['kernel'],
                                 output=pcovr_projection_iza_file, output_format='%f')
            
            # Save IZA and DEEM KPCovR decision functions
            utils.split_and_save(dfp_train, dfp_test,
                                 idxs_deem_train, idxs_deem_test,
                                 deem_train_slice, deem_test_slice,
                                 hdf5_attrs=None, 
                                 output=pcovr_df_deem_file, output_format='%f')
            
            utils.split_and_save(dfp_train, dfp_test,
                                 idxs_iza_train, idxs_iza_test,
                                 iza_train_slice, iza_test_slice,
                                 hdf5_attrs=None, 
                                 output=pcovr_df_iza_file, output_format='%f')
            
            # Save IZA and DEEM KPCovR canton predictions
            utils.split_and_save(predicted_cantons_train, predicted_cantons_test,
                                 idxs_deem_train, idxs_deem_test,
                                 deem_train_slice, deem_test_slice,
                                 hdf5_attrs=None, 
                                 output=pcovr_cantons_deem_file, output_format='%d')
            
            utils.split_and_save(predicted_cantons_train, predicted_cantons_test,
                                 idxs_iza_train, idxs_iza_test,
                                 iza_train_slice, iza_test_slice,
                                 hdf5_attrs=None, 
                                 output=pcovr_cantons_iza_file, output_format='%d')

# Linear Models

In [None]:
n_species = 2
n_species_pairs = n_species * (n_species + 1) // 2
n_features = n_species_pairs * soap_hyperparameters['max_radial']**2 \
    * (soap_hyperparameters['max_angular'] + 1)
feature_groups = extract_species_pair_groups(n_features, n_species)

## LinearSVC

In [None]:
for cutoff in cutoffs:
    linear_dir = f'{model_dir}/{cutoff}/Linear_Models'
    for spectrum_type in ('power', 'radial'):
        
        deem_file = f'{deem_dir}/{cutoff}/soaps_{spectrum_type}_full_avg_nonorm.hdf5'
        iza_file = f'{iza_dir}/{cutoff}/soaps_{spectrum_type}_full_avg_nonorm.hdf5'
        
        # Assemble the train and test set SOAPs from IZA and DEEM
        soaps_train, soaps_test = utils.load_soaps(deem_file, iza_file,
                                                   idxs_deem_train, idxs_deem_test,
                                                   idxs_iza_train, idxs_iza_test,
                                                   idxs_iza_delete=[RWY])

        # Scale the SOAPs so they are of a 'usable' magnitude for the SVC
        soaps_train, soaps_test = utils.preprocess_soaps(soaps_train, soaps_test)
        
        for species_pairing, feature_idxs in zip(('OO', 'OSi', 'SiSi'), feature_groups):
            
            for n_cantons in (2, 4):
                
                # Directory to put all the output data in
                output_dir = f'Linear_Models/LSVC-LPCovR/{n_cantons}-class/{spectrum_type}/{species_pairing}'
                
                if not os.path.exists(output_dir):
                    os.makedirs(output_dir)
                
                # Files for IZA and DEEM LSVC decision functions          
                svc_df_deem_file = f'{deem_dir}/{cutoff}/{output_dir}/svc_structure_dfs.dat'
                svc_df_iza_file = f'{iza_dir}/{cutoff}/{output_dir}/svc_structure_dfs.dat'

                # Files for IZA and DEEM LSVC canton predictions
                svc_canton_deem_file = f'{deem_dir}/{cutoff}/{output_dir}/svc_structure_cantons.dat'
                svc_canton_iza_file = f'{iza_dir}/{cutoff}/{output_dir}/svc_structure_cantons.dat'
                
                # Files for LSVC and PCovR hyperparameters
                parameter_dir = f'{linear_dir}/LSVC-LPCovR/{n_cantons}-class/{spectrum_type}/{species_pairing}'
                svc_parameter_file = f'{parameter_dir}/svc_parameters.json'

                # Run LSVC
                svc_parameters = load_json(svc_parameter_file)
                df_train, df_test, predicted_cantons_train, predicted_cantons_test = \
                    utils.do_svc(soaps_train[:, feature_idxs], soaps_test[:, feature_idxs], 
                                 cantons_train[n_cantons], cantons_test[n_cantons], 
                                 svc_type='linear', **svc_parameters,
                                 outputs=['decision_functions', 'predictions'])

                # Save IZA and DEEM LSVC decision functions
                utils.split_and_save(df_train, df_test,
                                     idxs_deem_train, idxs_deem_test,
                                     deem_train_slice, deem_test_slice,
                                     hdf5_attrs=None, 
                                     output=svc_df_deem_file, output_format='%f')
                
                utils.split_and_save(df_train, df_test,
                                     idxs_iza_train, idxs_iza_test,
                                     iza_train_slice, iza_test_slice,
                                     hdf5_attrs=None, 
                                     output=svc_df_iza_file, output_format='%f')

                # Save IZA and DEEM LSVC canton predictions
                utils.split_and_save(predicted_cantons_train, predicted_cantons_test,
                                     idxs_deem_train, idxs_deem_test,
                                     deem_train_slice, deem_test_slice,
                                     hdf5_attrs=None, 
                                     output=svc_cantons_deem_file, output_format='%f')
                
                utils.split_and_save(predicted_cantons_train, predicted_cantons_test,
                                     idxs_iza_train, idxs_iza_test,
                                     iza_train_slice, iza_test_slice,
                                     hdf5_attrs=None, 
                                     output=svc_cantons_iza_file, output_format='%f')

## LR check

In [None]:
for cutoff in cutoffs:
    for spectrum_type in ('power', 'radial'):
        
        deem_file = f'{deem_dir}/{cutoff}/soaps_{correlation_type}_full_avg_nonorm.hdf5'
        iza_file = f'{iza_dir}/{cutoff}/soaps_{correlation_type}_full_avg_nonorm.hdf5'

        # Assemble the train and test set SOAPs from IZA and DEEM
        soaps_train, soaps_test = utils.load_soaps(deem_file, iza_file,
                                                   idxs_deem_train, idxs_deem_test,
                                                   idxs_iza_train, idxs_iza_test,
                                                   idxs_iza_delete=[RWY])
        
        # Scale the SOAPs so they are of a 'usable' magnitude for the SVC
        soaps_train, soaps_test = utils.preprocess_soaps(soaps_train, soaps_test)
        
        for species_pairing, feature_idxs in zip(('OO', 'OSi', 'SiSi'), feature_groups):
            
            for n_cantons in (2, 4):
                
                # Center and scale the decision functions
                df_train, df_test, df_center, df_scale = \
                    utils.preprocess_decision_functions(df_train, df_test)

                # Check that LR can reproduce the decision functions
                utils.regression_check(soaps_train[:, feature_idxs], soaps_test[:, feature_idxs],
                                       df_train, df_test, regression_type='linear')

## PCovR

In [None]:
for cutoff in cutoffs:
    for spectrum_type in ('power', 'radial'):
        
        deem_file = f'{deem_dir}/{cutoff}/soaps_{correlation_type}_full_avg_nonorm.hdf5'
        iza_file = f'{iza_dir}/{cutoff}/soaps_{correlation_type}_full_avg_nonorm.hdf5'

        # Assemble the train and test set SOAPs from IZA and DEEM
        soaps_train, soaps_test = utils.load_soaps(deem_file, iza_file,
                                                   idxs_deem_train, idxs_deem_test,
                                                   idxs_iza_train, idxs_iza_test,
                                                   idxs_iza_delete=[RWY])
        
        # Scale the SOAPs so they are of a 'usable' magnitude for the SVC
        soaps_train, soaps_test = utils.preprocess_soaps(soaps_train, soaps_test)
        
        for species_pairing, feature_idxs in zip(('OO', 'OSi', 'SiSi'), feature_groups):
            
            for n_cantons in (2, 4):
                
                output_dir = f'Linear_Models/LSVC-LPCovR/{n_cantons}-class/{spectrum_type}/{species_pairing}'
                
                # Files for IZA and DEEM PCovR projections
                pcovr_projection_deem_file = f'{deem_dir}/{cutoff}/{output_dir}/pcovr_structures.hdf5'
                pcovr_projection_iza_file = f'{iza_dir}/{cutoff}/{output_dir}/pcovr_structures.hdf5'
                
                # Files for IZA and DEEM PCovR decision functions
                pcovr_df_deem_file = f'{deem_dir}/{cutoff}/{output_dir}/pcovr_structure_dfs.dat'
                pcovr_df_iza_file = f'{iza_dir}/{cutoff}/{output_dir}/pcovr_structure_dfs.dat'
                
                # Files for IZA and DEEM PCovR canton predictions
                pcovr_canton_deem_file = f'{deem_dir}/{cutoff}/{output_dir}/pcovr_structure_cantons.dat'
                pcovr_canton_iza_file = f'{iza_dir}/{cutoff}/{output_dir}/pcovr_structure_cantons.dat'
                
                parameter_dir = f'{linear_dir}/LSVC-LPCovR/{n_cantons}-class/{spectrum_type}/{species_pairing}'
                pcovr_parameter_file = f'{parameter_dir}/pcovr_parameters.json'
                
                # Run PCovR
                pcovr_parameters = load_json(pcovr_parameter_file)
                T_train, T_test, dfp_train, dfp_test = \
                    utils.do_covr(soaps_train[:, feature_idxs], soaps_test[:, feature_idxs],
                                  df_train, df_test, covr_type='linear')

                # Post process the PCovR decision functions
                # (i.e., turn them back into canton predictions)
                predicted_cantons_train, predicted_cantons_test = \
                    utils.postprocess_decision_functions(dfp_train, dfp_test, df_center, df_scale)
                
                # Save IZA and DEEM PCovR projections
                utils.split_and_save(T_train, T_test,
                                     idxs_deem_train, idxs_deem_test,
                                     deem_train_slice, deem_test_slice,
                                     hdf5_attrs=pcovr_parameters,
                                     output=pcovr_projection_deem_file, output_format='%f')
                
                utils.split_and_save(T_train, T_test,
                                     idxs_iza_train, idxs_iza_test,
                                     iza_train_slice, iza_test_slice,
                                     hdf5_attrs=pcovr_parameters,
                                     output=pcovr_projection_iza_file, output_format='%f')

                # Save IZA and DEEM PCovR decision functions
                utils.split_and_save(dfp_train, dfp_test,
                                     idxs_deem_train, idxs_deem_test,
                                     deem_train_slice, deem_test_slice,
                                     hdf5_attrs=None, 
                                     output=pcovr_df_deem_file, output_format='%f')
                
                utils.split_and_save(dfp_train, dfp_test,
                                     idxs_iza_train, idxs_iza_test,
                                     iza_train_slice, iza_test_slice,
                                     hdf5_attrs=None, 
                                     output=pcovr_df_iza_file, output_format='%f')

                # Save IZA and DEEM PCovR canton predictions
                utils.split_and_save(predicted_cantons_train, predicted_cantons_test,
                                     idxs_deem_train, idxs_deem_test,
                                     deem_train_slice, deem_test_slice,
                                     hdf5_attrs=None, 
                                     output=pcovr_cantons_deem_file, output_format='%d')
                
                utils.split_and_save(predicted_cantons_train, predicted_cantons_test,
                                     idxs_iza_train, idxs_iza_test,
                                     iza_train_slice, iza_test_slice,
                                     hdf5_attrs=None, 
                                     output=pcovr_cantons_iza_file, output_format='%d')

# Logistic Regression