In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../')
import numpy as np
from scripts.data import *
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('plot.mplstyle')
%config InlineBackend.figure_format = 'retina'

In [4]:
test_atomistic = read('../atomistic.xyz', index=':')
test_cg = read('../coarse_grained.xyz', index=':')

In [6]:
struct_type = "atomistic"
soap_cutoff, atom_sigma, noise = get_opt_hypers(struct_type)
desc, noise = get_opt_soap_descriptor(struct_type, l_max=8)

In [7]:
energy_type = "e_local_mofff"
e = []
for s in test_cg:
    e.append(s.arrays[f"{energy_type}"][s.numbers == 14])

In [9]:
len(e)

41

In [3]:


complete_cg_df, complete_a_df = get_complete_dataframes(energy_cutoff=1)

mapping = {"C": "N", "H": "Zn", "B": "C", "Be": "C", "He": "H", "Li": "H"}
n_train = 32000

test_structures = get_zif8_compression_data()
for s in test_structures:
    map_chemical_symbols(s, mapping)



In [5]:
s = test_structures[0]
zincs = s.symbols == "Zn"
s = s[zincs]
test_energies = s.arrays["c_myPE"]

In [6]:
s

Atoms(symbols='Zn96', pbc=True, cell=[[33.941731950526574, 0.0, 0.0], [-5.027794804700271e-06, 33.94173374344722, 0.0], [-9.199976236150437e-06, 6.341895880499676e-06, 33.94173372501221]], c_myPE=...)

In [9]:
# set the B_site flag and the atomistic dataframe if needed
if struct_type == "cg":
    B_site = True
    a_df = None
elif struct_type == "A_cg":
    B_site = False
    a_df = None
else:
    B_site = True
    a_df = complete_a_df

struct_ids = get_fold_ids(complete_cg_df, 1)

train_energies = get_energies(complete_cg_df, struct_ids, all_rattled_batches)

# if atomistic_df is not None, use the atomistic SOAPs for training
if a_df is not None:
    train_soaps = calc_soap_vectors(
        a_df, struct_ids, desc, all_rattled_batches, B_site=B_site
    )
# else use the coarse-grained SOAPs
else:
    train_soaps = calc_soap_vectors(
        complete_cg_df, struct_ids, desc, all_rattled_batches, B_site=B_site
    )

train_energies = np.concatenate(train_energies).reshape(-1, 1)
train_soaps = np.concatenate(train_soaps)
assert len(train_soaps) == len(train_energies)

In [10]:
test_desc = build_soap_descriptor(struct_type, soap_cutoff, atom_sigma, l_max=8)

test_energies = []
test_vectors = []
for s in test_structures:
    test_energies.append(s.arrays["c_myPE"])
    test_vectors.append(test_desc.calc(s)["data"])

In [11]:
test_vectors

[array([[3.02547720e-01, 3.75794921e-32, 3.24961627e-23, ...,
         1.93693737e-09, 1.63994601e-10, 0.00000000e+00],
        [3.02547353e-01, 2.94095586e-32, 3.24961198e-23, ...,
         1.93693310e-09, 1.63993818e-10, 0.00000000e+00],
        [3.02546282e-01, 1.55744898e-32, 3.24967191e-23, ...,
         1.93690636e-09, 1.63990910e-10, 0.00000000e+00],
        ...,
        [3.02546887e-01, 3.74122665e-54, 3.24956597e-23, ...,
         1.93689367e-09, 1.63984833e-10, 0.00000000e+00],
        [3.02545759e-01, 3.74121269e-54, 3.24955385e-23, ...,
         1.93688644e-09, 1.63984221e-10, 0.00000000e+00],
        [3.02549431e-01, 4.58239883e-33, 3.24948862e-23, ...,
         1.93691132e-09, 1.63984993e-10, 0.00000000e+00]]),
 array([[3.02630440e-01, 3.87791336e-33, 3.25099984e-23, ...,
         1.93933004e-09, 1.63205993e-10, 0.00000000e+00],
        [3.02630737e-01, 7.67292852e-33, 3.25097887e-23, ...,
         1.93926738e-09, 1.63184202e-10, 0.00000000e+00],
        [3.02630204e-01, 