In [1]:
# This code transforms the data from atoms.npy, hoa.npy and X.npy into the tensor list that the dataloader expects
# when runnning the preprocess_tensors method

import numpy as np

atoms = np.load("atoms.npy")
X = np.load("X.npy")
hoa = np.load("hoa.npy")

# Replace values in the atoms object with 13 where there is 1 and 14 where there is 0
atoms = np.where(atoms == 1, 13, np.where(atoms == 0, 14, atoms))

# Print dimensions of all objects
print("atoms:", atoms.shape)
print("X:", X.shape)
print("hoa:", hoa.shape)


atoms: (5011, 48)
X: (48, 3)
hoa: (5011,)


In [2]:
import pickle

angles = [90, 90, 90]
lengths = [18.256, 20.534, 7.542]

frac_coords = [X] * len(atoms)
angles = [angles] * len(atoms)
lengths = [lengths] * len(atoms)

# Zip the arrays together into a dict with the keys 'frac_coords', 'atom_types', 'lengths', 'angles', 'hoa'
print(len(frac_coords), len(atoms), len(lengths), len(angles), len(hoa))
assert len(frac_coords) == len(atoms) == len(lengths) == len(angles) == len(hoa)

crystal_list = []

for i in range(len(frac_coords)):
    data = {
        'frac_coords': frac_coords[i],
        'atom_types': atoms[i],
        'lengths': lengths[i],
        'angles': angles[i],
        'hoa': hoa[i]
    }

    crystal_list.append(data)

# Save the data to a pickle file called MFI_data.npy
with open("MOR_data.pickle", "wb") as f:
    pickle.dump(crystal_list, f)



5011 5011 5011 5011 5011


In [3]:
import pickle

# Split the data into train, validaiton and test sets and save them to pickle files
with open("MOR_data.pickle", "rb") as f:
    data = pickle.load(f)
    # Calculate how many samples from the list need to be in each set based on 60/20/20 split
    train_size = int(0.6 * len(data))
    val_size = int(0.2 * len(data))
    test_size = len(data) - train_size - val_size

    # Split the data into train, validation and test sets
    train_data = data[:train_size]

    val_data = data[train_size:train_size + val_size]

    test_data = data[train_size + val_size:]

    # Save the data to pickle files
    with open("MOR_train.pickle", "wb") as f:
        pickle.dump(train_data, f)

    with open("MOR_val.pickle", "wb") as f:
        pickle.dump(val_data, f)

    with open("MOR_test.pickle", "wb") as f:
        pickle.dump(test_data, f)


