In [None]:
import os
import sys
sys.path.append('..')

import numpy as np
from scipy.stats import gaussian_kde

import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.ticker as mpltkr
import matplotlib.colors as mplcolors
import matplotlib.patches as mplpatches
import matplotlib.lines as mpllines
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Utilities
import project_utils as utils
import h5py

# Import COSMO style toolkit
import cosmoplot.colorbars as cosmocbars
import cosmoplot.utils as cosmoutils
import cosmoplot.style as cosmostyle

cosmostyle.set_style('article')
color_list = cosmostyle.color_cycle

# PCA on structures

In [None]:
cutoff = 6.0

In [None]:
deem_dir = '../../Processed_Data/DEEM_330k'
iza_dir = '../../Processed_Data/IZA_230'
data_dir = f'Data/{cutoff}/LPCA'

In [None]:
cantons = np.loadtxt('../../Raw_Data/IZA_230/cantons_compositions.dat', usecols=1, dtype=int)
n_iza = len(cantons)

iza_delete = np.nonzero(cantons == 4)[0]
deem_delete = np.loadtxt('../../Processed_Data/DEEM_330k/10kJmol_error.idxs', dtype=int)

In [None]:
# Load Deem PCA
deem_pca = utils.load_hdf5(f'{deem_dir}/{data_dir}/pca_structures.hdf5')
deem_pca = np.delete(deem_pca, deem_delete, axis=0)

# Load IZA PCA
iza_pca = utils.load_hdf5(f'{iza_dir}/{data_dir}/pca_structures.hdf5')
# iza_pca = np.delete(iza_pca, iza_delete, axis=0)

pca = np.vstack((iza_pca, deem_pca))

In [None]:
# Structure energies
deem_330k_energies = np.loadtxt('../../Processed_Data/DEEM_330k/Data/structure_energies.dat')
deem_330k_energies = np.delete(deem_330k_energies, deem_delete)

iza_energies = np.loadtxt('../../Processed_Data/IZA_230/Data/structure_energies.dat')
# iza_energies = np.delete(iza_energies, iza_delete)

energies = np.concatenate((iza_energies, deem_330k_energies))
relative_energies = energies - np.amin(energies)

In [None]:
# Structure volumes
deem_330k_volumes = np.loadtxt('../../Processed_Data/DEEM_330k/Data/structure_volumes.dat')
deem_330k_volumes = np.delete(deem_330k_volumes, deem_delete)

iza_volumes = np.loadtxt('../../Processed_Data/IZA_230/Data/structure_volumes.dat')
# iza_volumes = np.delete(iza_energies, iza_delete)

volumes = np.concatenate((iza_volumes, deem_330k_volumes))
relative_volumes = volumes - np.amin(volumes)

In [None]:
# Histogram of PCA values

# PCA HISTOGRAM
n_pca = 2
fig = plt.figure(figsize=(10.0, 2.0))
gs = fig.add_gridspec(nrows=1, ncols=5, wspace=0.1, width_ratios=(1.0, 1.0, 0.4, 1.0, 1.0))

deem_pca_axes = [fig.add_subplot(gs[0, i+3]) for i in range(0, n_pca)]
iza_pca_axes = [axs.twinx() for axs in deem_pca_axes]
deem_energy_axs = fig.add_subplot(gs[0, 0:2])
iza_energy_axs = deem_energy_axs.twinx()
deem_grouper = deem_pca_axes[0].get_shared_y_axes()
for ax in deem_pca_axes[1:]:
    deem_grouper.join(ax, deem_pca_axes[0])

iza_grouper = iza_pca_axes[0].get_shared_y_axes()
for ax in iza_pca_axes[1:]:
    iza_grouper.join(ax, iza_pca_axes[0])
    
iza_pca_axes[0].tick_params(labelright=False)
deem_pca_axes[1].tick_params(labelleft=False)

histogram_max = np.amax(np.vstack((iza_pca, deem_pca)), axis=0)
histogram_min = np.amin(np.vstack((iza_pca, deem_pca)), axis=0)
histogram_parameters = dict(
    bins=50, 
    density=False, 
    log=False, 
    alpha=1.0, 
    histtype='step', 
    stacked=True
)

# h_max = 0.0
for pc, (deem_axs, iza_axs, subfigure_label) in enumerate(zip(deem_pca_axes, iza_pca_axes, ['(b)', '(c)'])):
    histogram_range = (histogram_min[pc], histogram_max[pc])  
    
    # Deem
    h_deem, bins_deem, _ = deem_axs.hist(
        deem_pca[:, pc], 
        range=histogram_range, 
        **histogram_parameters,
        color=color_list[5],
        label='Deem'
    )
    
    # IZA
    iza_pca_list = []
    iza_label_list = []
    iza_colors = []
    for i in range(1, np.amax(cantons)+1):
#     for i in range(1, np.amax(cantons)):
        canton_idxs = np.nonzero(cantons == i)[0]
        iza_pca_list.append(iza_pca[canton_idxs, pc])
        iza_label_list.append(f'IZA{i}')
        iza_colors.append(color_list[i])
        
    h_iza, bins_iza, _ = iza_axs.hist(
        iza_pca_list,
        range=histogram_range,
        **histogram_parameters,
        color=iza_colors,
        label=iza_label_list,
    )
    
    offset_txt = deem_axs.get_yaxis().get_offset_text().set_visible(False)
        
    deem_axs.set_xlabel(fr'PC$_{{{pc+1}}}$')
    
    deem_axs.text(
        0.05, 0.95, subfigure_label,
        horizontalalignment='left', verticalalignment='top',
        transform=deem_axs.transAxes
    )
    
deem_pca_axes[0].set_ylabel('Deem Frequency')

# Some operations on the last axes from the loop
iza_axs.set_ylabel('IZA Frequency')

handles_deem, labels_deem = deem_axs.get_legend_handles_labels()
handles_iza, labels_iza = iza_axs.get_legend_handles_labels()
handles = handles_deem + handles_iza[::-1]
labels = labels_deem + labels_iza[::-1]
deem_axs.legend(handles, labels)

# ENERGY HISTOGRAM
energy_histogram_parameters = dict(
    range=(np.amin(relative_energies), np.amax(relative_energies)), 
    density=False, 
    bins=100, 
    log=False, 
    alpha=1.0,
    histtype='step'
)

iza_energy_axs.hist(
    relative_energies[0:n_iza],
    **energy_histogram_parameters, 
    color=color_list[1], label='IZA'
)
iza_energy_axs.axvline(
    np.mean(relative_energies[0:n_iza]), 
    color=color_list[1], linestyle='--', label=r'$\mu_{IZA}$'
)

deem_energy_axs.hist(
    relative_energies[n_iza:],
    **energy_histogram_parameters, color=color_list[2], label='Deem'
)
deem_energy_axs.axvline(
    np.mean(relative_energies[n_iza:]), 
    color=color_list[2], linestyle='--', label=r'$\mu_{Deem}$'
)

deem_energy_axs.set_xlabel(r'Relative Energy E - E$_{min}$ (kJ/mol Si)')
deem_energy_axs.set_ylabel('Deem Frequency')
iza_energy_axs.set_ylabel('IZA Frequency')
deem_energy_axs.ticklabel_format(axis='y', useMathText=True, scilimits=(0, 0))

deem_energy_axs.text(
    0.03, 0.93, '(a)',
    horizontalalignment='left', verticalalignment='top',
    transform=deem_energy_axs.transAxes
)

iza_energy_axs.annotate(
    'RWY',
    xy=(relative_energies[iza_delete], 1),
    xytext=(relative_energies[iza_delete]-17, 7),
    horizontalalignment='center',
    verticalalignment='bottom',
    arrowprops=dict(arrowstyle='-|>')
)

handles_deem, labels_deem = deem_energy_axs.get_legend_handles_labels()
handles_iza, labels_iza = iza_energy_axs.get_legend_handles_labels()
handles = [handles_deem[0], handles_iza[0], handles_deem[1], handles_iza[1]]
labels = [labels_deem[0], labels_iza[0], labels_deem[1], labels_iza[1]]
deem_energy_axs.legend(handles, labels)

fig.savefig(f'../../Results/{cutoff}/pca_energy_histogram_{cutoff}.pdf', bbox_inches='tight')

plt.show()

In [None]:
fig = plt.figure(figsize=(7.0, 3.5))
deem_volume_axs = fig.add_subplot(1, 1, 1)
iza_volume_axs = deem_volume_axs.twinx()

# VOLUME HISTOGRAM
volume_histogram_parameters = dict(
    range=(0.0, np.amax(volumes)-np.amin(volumes)), 
    density=False, bins=100, log=False, alpha=1.0
)

iza_volume_axs.hist(
    volumes[0:n_iza]-np.amin(volumes),
    **volume_histogram_parameters, color=color_list[1], histtype='step', label='IZA'
)
iza_volume_axs.axvline(
    np.mean(volumes[0:n_iza]-np.amin(volumes)), 
    color=color_list[1], linestyle='--', label=r'$\mu_{IZA}$'
)

deem_volume_axs.hist(
    volumes[n_iza:]-np.amin(volumes),
    **volume_histogram_parameters, color=color_list[2], histtype='step', label='Deem'
)
deem_volume_axs.axvline(
    np.mean(volumes[n_iza:]-np.amin(volumes)), 
    color=color_list[2], linestyle='--', label=r'$\mu_{Deem}$'
)

iza_volume_axs.annotate(
    'RWY',
    xy=(relative_volumes[iza_delete], 1),
    xytext=(relative_volumes[iza_delete]-17, 7),
    horizontalalignment='center',
    verticalalignment='bottom',
    arrowprops=dict(arrowstyle='-|>')
)

iza_volume_axs.set_ylabel('IZA Frequency')
deem_volume_axs.set_ylabel('Deem Frequency')
deem_volume_axs.set_xlabel(r'Relative Volume V - V$_{min}$ ' + u'(\u00c5' + r'$^3$/Si)')
deem_volume_axs.set_ylim((0.5, deem_volume_axs.get_ylim()[1]))
deem_volume_axs.ticklabel_format(axis='y', useMathText=True, scilimits=(0, 0))

handles_deem, labels_deem = deem_volume_axs.get_legend_handles_labels()
handles_iza, labels_iza = iza_volume_axs.get_legend_handles_labels()
handles = [handles_deem[0], handles_iza[0], handles_deem[1], handles_iza[1]]
labels = [labels_deem[0], labels_iza[0], labels_deem[1], labels_iza[1]]
deem_volume_axs.legend(handles, labels)

fig.savefig(f'../../Results/volume_histogram.pdf', bbox_inches='tight')

plt.show()

In [None]:
# Camera view settings
x = -1.25
y = -1.25
z = 0.50

# Compute aspect ratio from data
# so we can set it manually including zoom
zoom = 2.0
aspect_ratio_keys = ['x', 'y', 'z']
xyz_max = np.amax(np.vstack((deem_pca[:, 0:3], iza_pca[:, 0:3])), axis=0)
xyz_min = np.amin(np.vstack((deem_pca[:, 0:3], iza_pca[:, 0:3])), axis=0)
xyz_ratios = np.abs(xyz_max - xyz_min)
xyz_ratios = xyz_ratios / xyz_ratios[0] * zoom

aspect_ratio = {key: value for key, value in zip(aspect_ratio_keys, xyz_ratios)}

In [None]:
# 3D scatter
fig = make_subplots(
    rows=3, cols=3,
    column_widths=[1.0, 1.0, 1.0],
    row_heights=[1.0, 1.0, 1.0],
    horizontal_spacing=0.10,
    specs=[
        [{'type': 'scene', 'rowspan': 2, 'colspan': 3, 'b': 0.0}, None, None],
        [None, None, None],
        [{'type': 'xy'}, {'type': 'xy'}, {'type': 'xy'}]
    ]
)

## Deem
fig.add_trace(
    go.Scatter3d(
        x=deem_pca[:, 0], 
        y=deem_pca[:, 1], 
        z=deem_pca[:, 2],
        mode='markers', 
        marker=dict(color=color_list[11], size=1),
        name='DEEM'
    ),
    row=1, col=1
)

## IZA
for i in range(1, np.amax(cantons) + 1):
    iza_pts = np.nonzero(cantons == i)[0]
    fig.add_trace(
        go.Scatter3d(
            x=iza_pca[iza_pts, 0], 
            y=iza_pca[iza_pts, 1], 
            z=iza_pca[iza_pts, 2],
            mode='markers', 
            marker=dict(color=color_list[i], size=2),
            name=f'IZA{i}'
        ),
        row=1, col=1
    )
    
## Format the 3D scatter
fig.update_scenes(
    xaxis=dict(
        title='PC<sub>1</sub>',
        showgrid=True,
        linewidth=2,
        ticks='',#'inside',
        mirror=True,#'ticks',
        showexponent='all',
        exponentformat='power',
        #minexponent=0
        showticklabels=False
    ),
    yaxis=dict(
        title='PC<sub>2</sub>',
        showgrid=True,
        linewidth=2,
        ticks='',#'inside',
        mirror=True,#'ticks',
        showexponent='all',
        exponentformat='power',
        #minexponent=0
        showticklabels=False
    ),
    zaxis=dict(
        title='PC<sub>3</sub>',
        showgrid=True,
        linewidth=2,
        ticks='',#'inside',
        mirror=True,#'ticks',
        showexponent='all',
        exponentformat='power',
        #minexponent=0
        showticklabels=False
    ),
    camera=dict(
        eye=dict(x=x, y=x, z=z),
        projection=dict(type='orthographic')
    ),
    aspectratio=aspect_ratio,
    row=1, col=1
)
    
# 2D subplots
col_idx = 1
for i in range(0, 3):
    for j in range(i+1, 3):
        
        ## Deem
        fig.add_trace(
            go.Scattergl(
                x=deem_pca[:, i],
                y=deem_pca[:, j],
                mode='markers',
                marker=dict(color=color_list[11], size=2),
                name='DEEM',
                showlegend=False
            ),
            row=3, col=col_idx
        )
        
        ## IZA
        for k in range(1, np.amax(cantons) + 1):
            iza_pts = np.nonzero(cantons == k)[0]
            fig.add_trace(
                go.Scattergl(
                    x=iza_pca[iza_pts, i],
                    y=iza_pca[iza_pts, j],
                    mode='markers',
                    marker=dict(color=color_list[k], size=4),
                    name=f'IZA{k}',
                    showlegend=False
                ),
                row=3, col=col_idx
            )
            
        ## Format the 2D scatter
        fig.update_xaxes(
            title_text=f'PC<sub>{i+1}</sub>',
            showline=True,
            linewidth=2,
            mirror=True,#'ticks',
            #showgrid=True,
            ticks='',#'inside',
            showexponent='all',
            exponentformat='power',
            #minexponent=0,
            showticklabels=False,
            row=3, col=col_idx
        )
        fig.update_yaxes(
            title_text=f'PC<sub>{j+1}</sub>',
            showline=True,
            linewidth=2,
            mirror=True,#'ticks',
            #showgrid=True,
            ticks='',#'inside',
            showexponent='all',
            exponentformat='power',
            #minexponent=0,
            showticklabels=False,
            row=3, col=col_idx
        )
        
        col_idx += 1

# Global layout setup
fig.update_layout(
    template='simple_white',
    legend=dict(
        x=1.0, 
        y=1.0,
        xanchor='left', 
        yanchor='top',
        itemsizing='constant'
    ),
    font=dict(size=16,
              family='Serif'),
    autosize=False,
    width=800, height=800,
    margin=dict(l=0, b=50, t=0, r=0)
)

# Save and show figure
fig.write_html(f'../../Results/{cutoff}/pca_{cutoff}.html')
fig.write_image(f'../../Results/{cutoff}/pca_{cutoff}.pdf')
fig.show()