In [2]:
import pickle 
import os
import numpy as np
import random

def process_zeolite_data():
    current_dir = os.getcwd()
    
    # Get all child directories of the current directory
    child_dirs = [os.path.join(current_dir, d) for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]
    
    print(child_dirs)
    for child_dir in child_dirs:
        # Check if 'atoms.npy', 'X.npy', 'hoa.npy', 'angles.npy', and 'l.npy' files exist in the child directory
        if all(os.path.exists(os.path.join(child_dir, file)) for file in ["atoms.npy", "X.npy", "hoa.npy", "angles.npy", "l.npy"]):
            print(f"Loading atoms from {child_dir}...")
            
            atoms = np.load(os.path.join(child_dir, "atoms.npy"))
            X = np.load(os.path.join(child_dir, "X.npy"))
            hoa = np.load(os.path.join(child_dir, "hoa.npy"))
            angles = np.load(os.path.join(child_dir, "angles.npy"))
            lengths = np.load(os.path.join(child_dir, "l.npy"))

            # Cast angles and lengths to lists
            angles = angles.tolist()
            lengths = lengths.tolist()
            
            # Replace values in the atoms object with 13 where there is 1 and 14 where there is 0
            atoms = np.where(atoms == 1, 13, np.where(atoms == 0, 14, atoms))
            frac_coords = [X] * len(atoms)
            angles = [angles] * len(atoms)
            lengths = [lengths] * len(atoms)
            
            crystal_list = []
            
            for i in range(len(frac_coords)):
                data = {
                    'frac_coords': frac_coords[i],
                    'atom_types': atoms[i],
                    'lengths': lengths[i],
                    'angles': angles[i],
                    'hoa': hoa[i],
                    'zeolite_code': os.path.basename(child_dir)
                }
                
                crystal_list.append(data)
            
            # for data in crystal_list:
            #     counter = 0
                
            #     # Check if any of the angles is negative or greater than 180
            #     for angle in data['angles']:
            #         if angle < 0 or angle > 180:
            #             counter += 1
                
            # print(f"Broken samples for code {os.path.basename(child_dir)}: {counter}")

            # Comment out the saving to test the data for weird angle values
            zeolite_code = os.path.basename(child_dir)
            
            # Save the data to a pickle file called {zeolite_code}_data.pickle
            with open(os.path.join(child_dir, f"{zeolite_code}_data.pkl"), "wb") as f:
                pickle.dump(crystal_list, f)
            
            # Split the data into train, validation and test sets and save them to pickle files
            with open(os.path.join(child_dir, f"{zeolite_code}_data.pkl"), "rb") as f:
                data = pickle.load(f)

                # TODO: Uncomment this and regenerate the dataset
                # Shuffle the data
                # I had forgotten to do it as of the first test of CDiVAE_v2
                random.shuffle(data)
                
                # Calculate how many samples from the list need to be in each set based on 60/20/20 split
                train_size = int(0.6 * len(data))
                val_size = int(0.2 * len(data))
                test_size = len(data) - train_size - val_size
                
                # Split the data into train, validation and test sets
                train_data = data[:train_size]
                val_data = data[train_size:train_size + val_size]
                test_data = data[train_size + val_size:]
                
                # Save the data to pickle files
                with open(os.path.join(child_dir, f"{zeolite_code}_train.pkl"), "wb") as f:
                    pickle.dump(train_data, f)
                
                with open(os.path.join(child_dir, f"{zeolite_code}_val.pkl"), "wb") as f:
                    pickle.dump(val_data, f)
                
                with open(os.path.join(child_dir, f"{zeolite_code}_test.pkl"), "wb") as f:
                    pickle.dump(test_data, f)

# Example usage
process_zeolite_data()


['c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\DDR', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\DDRch1', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\DDRch2', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\FAU', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\FAUch', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\ITW', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\MEL', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\MELch', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\MFI', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\MOR', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\RHO', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\TON', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\TON2', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\TON3', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\TON4', 'c:\\tue\\Thesis\\zeogen\\data\\zeonet_data\\TONch']
Loading atoms from c:\tue\Thesis\zeogen\data\zeonet_data\DDR...
Loading atoms from c:\tue\Thesis\zeogen\data\zeonet_data\DDRch1...
Loading atoms from c:\tue\Thesis\zeog

In [25]:

print(list(os.walk(".")))
print(os.getcwd())


[('.', ['DDR', 'DDRch1', 'DDRch2', 'FAU', 'FAUch', 'ITW', 'MEL', 'MELch', 'MFI', 'MOR', 'RHO', 'TON', 'TON2', 'TON3', 'TON4', 'TONch'], ['process_zeonet_data.ipynb']), ('.\\DDR', [], ['.DS_Store', 'adj.npy', 'angles.npy', 'atoms.npy', 'hoa.npy', 'l.npy', 'X.npy']), ('.\\DDRch1', [], ['adj.npy', 'angles.npy', 'atoms.npy', 'hoa.npy', 'l.npy', 'X.npy']), ('.\\DDRch2', [], ['adj.npy', 'angles.npy', 'atoms.npy', 'hoa.npy', 'l.npy', 'X.npy']), ('.\\FAU', [], ['.DS_Store', 'adj.npy', 'angles.npy', 'atoms.npy', 'hoa.npy', 'l.npy', 'X.npy']), ('.\\FAUch', [], ['.DS_Store', 'adj.npy', 'angles.npy', 'atoms.npy', 'hoa.npy', 'l.npy', 'X.npy']), ('.\\ITW', [], ['adj.npy', 'angles.npy', 'atoms.npy', 'henry.npy', 'hoa.npy', 'hoa_err.npy', 'l.npy', 'X.npy']), ('.\\MEL', [], ['.DS_Store', 'adj.npy', 'angles.npy', 'atoms.npy', 'hoa.npy', 'l.npy', 'X.npy']), ('.\\MELch', [], ['adj.npy', 'angles.npy', 'atoms.npy', 'hoa.npy', 'l.npy', 'X.npy']), ('.\\MFI', [], ['adj.npy', 'angles.npy', 'atoms.npy', 'henry.n

In [24]:
os.chdir("../")

In [2]:
with open(os.path.join("FAU", f"FAU_test.pkl"), "rb") as f:
    data = pickle.load(f)

    print(data[0])
    

{'frac_coords': array([[0.946, 0.125, 0.036],
       [0.875, 0.196, 0.286],
       [0.804, 0.125, 0.536],
       [0.875, 0.054, 0.786],
       [0.946, 0.125, 0.214],
       [0.875, 0.196, 0.464],
       [0.804, 0.125, 0.714],
       [0.875, 0.054, 0.964],
       [0.036, 0.946, 0.125],
       [0.054, 0.286, 0.375],
       [0.714, 0.304, 0.625],
       [0.696, 0.964, 0.875],
       [0.036, 0.304, 0.125],
       [0.696, 0.286, 0.375],
       [0.714, 0.946, 0.625],
       [0.054, 0.964, 0.875],
       [0.125, 0.036, 0.946],
       [0.625, 0.714, 0.304],
       [0.286, 0.875, 0.554],
       [0.125, 0.536, 0.804],
       [0.964, 0.375, 0.554],
       [0.125, 0.214, 0.946],
       [0.286, 0.875, 0.196],
       [0.464, 0.375, 0.696],
       [0.054, 0.875, 0.964],
       [0.125, 0.804, 0.714],
       [0.196, 0.875, 0.464],
       [0.125, 0.946, 0.214],
       [0.054, 0.875, 0.786],
       [0.125, 0.804, 0.536],
       [0.196, 0.875, 0.286],
       [0.125, 0.946, 0.036],
       [0.964, 0.054, 0.