In [1]:
#!/usr/bin/python

# Import required modules
import os
import sys
sys.path.append('..')
sys.path.append('/home/helfrech/Tools/Toolbox/utils')

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.ticker as mpltkr
import matplotlib.colors as mplcolors
import matplotlib.patches as mplpatches
import matplotlib.lines as mpllines
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# ML
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Utilities
from tools import load_json
import project_utils as utils
import h5py

# Import COSMO style toolkit
import cosmoplot.colorbars as cosmocbars
import cosmoplot.utils as cosmoutils
import cosmoplot.style as cosmostyle

cosmostyle.set_style('article')
color_list = cosmostyle.color_cycle

# PCA on structures

In [2]:
# Load SOAP cutoffs
soap_hyperparameters = load_json('../../Processed_Data/soap_hyperparameters.json')   
cutoffs = soap_hyperparameters['interaction_cutoff']

In [3]:
# Load train and test set indices for Deem
deem_train_idxs = np.loadtxt('../../Processed_Data/DEEM_330k/train.idxs', dtype=int)
deem_test_idxs = np.loadtxt('../../Processed_Data/DEEM_330k/test.idxs', dtype=int)
n_deem = len(deem_train_idxs) + len(deem_test_idxs)

In [4]:
# Load train and test set indices for IZA
iza_train_idxs = np.loadtxt('../../Processed_Data/IZA_226/train.idxs', dtype=int)
iza_test_idxs = np.loadtxt('../../Processed_Data/IZA_226/test.idxs', dtype=int)
n_iza = len(iza_train_idxs) + len(iza_test_idxs)

In [5]:
# Load IZA cantons
iza_cantons = np.loadtxt('../../Raw_Data/GULP/IZA_226/cantons.txt', usecols=1, dtype=int)

In [6]:
deem_dir = '../../Processed_Data/DEEM_330k/Data'
iza_dir = '../../Processed_Data/IZA_226/Data'

In [11]:
# Set property names for loading
property_names = ['volumes', 'energies']

# Load structure properties
deem_structure_properties = {}
deem_predicted_structure_properties = {}

iza_structure_properties = {}
iza_predicted_structure_properties = {}
for pn in property_names:
    property_label = pn.capitalize()
    
    deem_structure_properties[pn] = np.loadtxt(f'{deem_dir}/structure_{pn}.dat')
    iza_structure_properties[pn] = np.loadtxt(f'{iza_dir}/structure_{pn}.dat')
    
for cutoff in cutoffs:
    deem_predicted_structure_properties[cutoff] = {}
    iza_predicted_structure_properties[cutoff] = {}
    for pn in property_names:
        property_label = pn.capitalize()
        
        deem_predicted_structure_properties[cutoff][pn] = \
            np.loadtxt(f'{deem_dir}/{cutoff}/Linear_Models/LR/{property_label}/lr_structure_properties.dat')
        
        iza_predicted_structure_properties[cutoff][pn] = \
            np.loadtxt(f'{iza_dir}/{cutoff}/Linear_Models/LR/{property_label}/lr_structure_properties.dat')

In [14]:
for cutoff in cutoffs:
    for pn in property_names:
        property_label = pn.capitalize()
        print(f'-----Error for {cutoff} {property_label}-----')
        
        deem_error = mean_absolute_error(
            deem_structure_properties[pn][deem_test_idxs],
            deem_predicted_structure_properties[cutoff][pn][deem_test_idxs]
        )
        
        iza_error = mean_absolute_error(
            iza_structure_properties[pn],
            iza_predicted_structure_properties[cutoff][pn]
        )
        
        print(f'DEEM: {deem_error}')
        print(f'IZA: {iza_error}')
        print('')
        
        for canton in range(1, np.amax(iza_cantons)+1):
            canton_idxs = np.nonzero(iza_cantons == canton)[0]
            canton_error = mean_absolute_error(
                iza_structure_properties[pn][canton_idxs],
                iza_predicted_structure_properties[cutoff][pn][canton_idxs]
            )
            
            print(f'IZA{canton}: {canton_error}')
            
        print('')

-----Error for 3.5 Volumes-----
DEEM: 2.7935750685009757
IZA: 6.797542311606806

IZA1: 7.474096733435603
IZA2: 6.675127613796397
IZA3: 6.108586385886484
IZA4: 44.901733629450774

-----Error for 3.5 Energies-----
DEEM: 0.656013285557361
IZA: 1.3495293905570203

IZA1: 1.725520216705211
IZA2: 1.2653855121502229
IZA3: 1.094296196309776
IZA4: 15.811260849812243

-----Error for 6.0 Volumes-----
DEEM: 1.118436900240289
IZA: 1.6516906987878806

IZA1: 0.897330599872892
IZA2: 1.5224326348762411
IZA3: 1.842321136775539
IZA4: 30.860908174871696

-----Error for 6.0 Energies-----
DEEM: 0.11258433185549802
IZA: 0.3139135473987966

IZA1: 0.46050723632064805
IZA2: 0.28128755861027577
IZA3: 0.2392501746590717
IZA4: 4.24061441363483



In [None]:
# TODO: look at the regression backup to see what else to put in here