In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pylab as pl
import seaborn as sns
import smact
import umap
from matplotlib.ticker import FixedLocator, FormatStrFormatter
from scipy.special import rel_entr
from scipy.stats import energy_distance, pearsonr, spearmanr, wasserstein_distance
from sklearn import decomposition
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler

from AtomicEmbeddings.core import Embedding

sns.set(font_scale=2)

In [None]:
# Set up the Atomic Embedding features

# Create a list of the CBFVs we are interested in
cbfvs = ["mat2vec", "random_200", "magpie_sc", "megnet16"]

# Create a dictionary of {cbfv: AtomicEmbeddings}
AtomEmbeds = {cbfv: Embedding.load_data(cbfv) for cbfv in cbfvs}

# Only keep the first 83 elements for the Atomic Embeddings

# For now Smact is used, but a future update will aim to remove this dependency
el_symbols = smact.ordered_elements(1, 83)

# Iterate over the AtomEmbeds to remove the elements

for cbfv in cbfvs:
    # Get the keys of the atomic embeddings object
    elements = set(AtomEmbeds[cbfv].element_list)
    el_symbols_set = set(el_symbols)

    # Get the element symbols we want to remove
    els_to_remove = list(elements - el_symbols_set)

    # Iteratively delete the elements with atomic number
    # greater than 83 from our embeddings
    for el in els_to_remove:
        del AtomEmbeds[cbfv].embeddings[el]

    # Verify that we have 83 elements
    print(len(AtomEmbeds[cbfv].element_list))

In [None]:
df_mag = AtomEmbeds["magpie_sc"].create_correlation_df()
# df_mag=df_mag.loc[df_mag.pearson_corr!=1].reset_index(drop=True)
print(df_mag.shape)
df_mag.head()

In [None]:
df_meg = AtomEmbeds["megnet16"].create_correlation_df()
# df_meg=df_meg.loc[df_meg.pearson_corr!=1].reset_index(drop=True)
print(df_meg.shape)
df_meg.head()

In [None]:
fig, ax = plt.subplots()

ax.plot(df_meg["pearson_corr"])
plt.show()

In [None]:
meg_pearson = df_meg["pearson_corr"].to_numpy()
mag_pearson = df_mag["pearson_corr"].to_numpy()

In [None]:
energy_distance(meg_pearson, mag_pearson)

In [None]:
df_meg["meg-mag_pearson"] = abs(meg_pearson - mag_pearson)
df_meg.head()

In [None]:
df_meg.describe()

In [None]:
fig, ax = plt.subplots(figsize=(36, 24))
p = df_meg.pivot_table(values="meg-mag_pearson", index="mend_1", columns="mend_2")

xlabels = [i[1] for i in p.index]
ylabels = [i[1] for i in p.columns]
g = sns.heatmap(
    p,
    cmap="bwr",
    square="True",
    linecolor="k",
    ax=ax,
    xticklabels=True,
    yticklabels=True,
)
ax.title.set_text(cbfv)
# ax.set_xticklabels(xlabels, fontsize="medium")
ax.set_yticklabels(ylabels)
majors = np.linspace(1, 83, 2)
minors = np.linspace(2, 83, 2)
# ax.xaxis.set_major_locator(FixedLocator())
ax.set_xlabel("")
ax.set_ylabel("")

plt.show()