In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error
import os

In [21]:
# Get data
data = {}
for root, dirs, files in os.walk('/home/hsang/ML/pmops_0.8'):
    if 'test_data.csv' in files:
        ads = root.split('ads/')[0].split('/')[-1]
        features = root.split('ads/')[-1].split('/')[0]
        
        if ads not in data:
            data[ads] = {}

        if features not in data[ads]:
            data[ads][features] = {}

        traindf = pd.read_csv(os.path.join(root,'train_data.csv'))
        y_train_pred = traindf['Predicted'].tolist()
        y_train_true = traindf['True'].tolist()
        data[ads][features]['y_train_pred'] = y_train_pred
        data[ads][features]['y_train_true'] = y_train_true
    
        testdf = pd.read_csv(os.path.join(root,'test_data.csv'))
        y_test_pred = testdf['Predicted'].tolist()
        y_test_true = testdf['True'].tolist()
        data[ads][features]['y_test_pred'] = y_test_pred
        data[ads][features]['y_test_true'] = y_test_true


In [22]:
adsorbates = ['H', 'N', 'NH', 'NH2', 'NH3', 'N2H', 'N2H2']
feature_sets = ['geo', 'tab', 'elec', 'all']

feature_labels = ['Geometric', 'Tabulated', 'Electronic', 'All']
adsorbate_labels = ['H', 'N', 'NH', 'NH$_2$', 'NH$_3$', 'N$_2$H', 'N$_2$H$_2$']

plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 12
plt.rcParams['figure.dpi'] = 600

fig, axs = plt.subplots(4, 7, figsize=(16, 10))

for i, feature_set in enumerate(feature_sets):
    for j, adsorbate in enumerate(adsorbates):

        min_lim = min(data[adsorbate][feature_set]['y_train_true']) - 0.3
        max_lim = max(data[adsorbate][feature_set]['y_train_true']) + 0.3

        lims = [min_lim, max_lim]

        ax = axs[i, j]
        ax.plot(data[adsorbate][feature_set]['y_train_true'], data[adsorbate][feature_set]['y_train_pred'], '.b', markersize=3)
        ax.plot(lims, lims, 'k-')
        ax.set_xlim(lims)
        ax.set_ylim(lims)

        # Add labels
        if i == 0:
            ax.set_title(adsorbate_labels[j], fontweight='bold')
        if j == 0:
            ax.set_ylabel(feature_labels[i], fontweight='bold')

# Add common labels
fig.text(0.5, 0.04, 'Predicted Adsorption Energies (eV)', ha='center', fontweight='bold')
fig.text(0.04, 0.5, 'True Adsorption Energies (eV)', va='center', rotation='vertical', fontweight='bold')

plt.tight_layout(rect=[0.05, 0.05, 1, 1])
plt.savefig('/home/hsang/ML/figures/sisso_pmops0.8ops_train_predicted_vs_true.png')
plt.close()

In [23]:
adsorbates = ['H', 'N', 'NH', 'NH2', 'NH3', 'N2H', 'N2H2']
feature_sets = ['geo', 'tab', 'elec', 'all']

feature_labels = ['Geometric', 'Tabulated', 'Electronic', 'All']
adsorbate_labels = ['H', 'N', 'NH', 'NH$_2$', 'NH$_3$', 'N$_2$H', 'N$_2$H$_2$']

plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 12
plt.rcParams['figure.dpi'] = 600

fig, axs = plt.subplots(4, 7, figsize=(16, 10))

for i, feature_set in enumerate(feature_sets):
    for j, adsorbate in enumerate(adsorbates):

        min_lim = min(data[adsorbate][feature_set]['y_test_true']) - 0.3
        max_lim = max(data[adsorbate][feature_set]['y_test_true']) + 0.3

        lims = [min_lim, max_lim]

        ax = axs[i, j]
        ax.plot(data[adsorbate][feature_set]['y_test_true'], data[adsorbate][feature_set]['y_test_pred'], '.b', markersize=3)
        ax.plot(lims, lims, 'k-')
        ax.set_xlim(lims)
        ax.set_ylim(lims)

        # Add labels
        if i == 0:
            ax.set_title(adsorbate_labels[j], fontweight='bold')
        if j == 0:
            ax.set_ylabel(feature_labels[i], fontweight='bold')

# Add common labels
fig.text(0.5, 0.04, 'Predicted Adsorption Energies (eV)', ha='center', fontweight='bold')
fig.text(0.04, 0.5, 'True Adsorption Energies (eV)', va='center', rotation='vertical', fontweight='bold')

plt.tight_layout(rect=[0.05, 0.05, 1, 1])
plt.savefig('/home/hsang/ML/figures/sisso_pmops0.8ops_test_predicted_vs_true.png')
plt.close()

In [24]:
vmin = 0
vmax = 0.8

adsorbates = ['H', 'N', 'NH', 'NH2', 'NH3', 'N2H', 'N2H2']
feature_sets = ['geo', 'tab', 'elec', 'all']

MAE_train = np.zeros((len(adsorbates), len(feature_sets)))
MAE_test = np.zeros((len(adsorbates), len(feature_sets)))

for i, adsorbate in enumerate(adsorbates):
    for j, feature_set in enumerate(feature_sets):
        MAE_train[i,j] = mean_absolute_error(data[adsorbate][feature_set]['y_train_true'], data[adsorbate][feature_set]['y_train_pred'])
        MAE_test[i,j] = mean_absolute_error(data[adsorbate][feature_set]['y_test_true'], data[adsorbate][feature_set]['y_test_pred'])

feature_labels = ['Geometric', 'Tabulated', 'Electronic', 'All']
adsorbate_labels = ['H', 'N', 'NH', 'NH$_2$', 'NH$_3$', 'N$_2$H', 'N$_2$H$_2$']

# plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 12
plt.rcParams['figure.dpi'] = 600
plt.figure(figsize=(5, 5))
sns.heatmap(MAE_train, annot=True, cmap='viridis', xticklabels=feature_labels, yticklabels=adsorbate_labels, cbar_kws={'label': 'MAE (eV)'}, vmin=vmin, vmax=vmax)
plt.xlabel('Feature Set', fontweight='bold')
plt.ylabel('Adsorbate', fontweight='bold')
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.savefig('/home/hsang/ML/figures/sisso_pmops0.8ops_train_MAE_heatmap.png')
plt.close()

plt.figure(figsize=(5, 5))
sns.heatmap(MAE_test, annot=True, cmap='viridis', xticklabels=feature_labels, yticklabels=adsorbate_labels, cbar_kws={'label': 'MAE (eV)'}, vmin=vmin, vmax=vmax)
plt.xlabel('Feature Set', fontweight='bold')
plt.ylabel('Adsorbate', fontweight='bold')
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.savefig('/home/hsang/ML/figures/sisso_pmops0.8ops_test_MAE_heatmap.png')
plt.close()