In [11]:
from collections import defaultdict
from tqdm import tqdm
import pandas as pd
from ase.io import read

In [12]:
# Read test.xyz file
atoms_list = read("assets/GoldDAC/test.xyz", index=":")
print(f"# of total configurations: {len(atoms_list)}")

# Collect atoms objects by MOF name
mof_dict = defaultdict(list)
for atoms in atoms_list:
    mof_name = atoms.info["group"]
    mof_dict[mof_name].append(atoms)
print(f"# of MOFs: {len(mof_dict)}")

# of total configurations: 312
# of MOFs: 26


In [13]:
collections = []

for mof_name, atoms_list in tqdm(mof_dict.items()):

    for atoms in atoms_list:
        name_split = atoms.info["name"].split("_")
        gas_name = name_split[1]
        region = name_split[2]  # R: repulsion E: equilibrium P: weak-attraction
        adsorption_type = name_split[3]  # A: aligned, N: non-aligned
        if adsorption_type == "A":
            dist = name_split[4]
        elif adsorption_type == "N":
            if region == "R" or region == "E":
                dist = name_split[4].split("-")[1]
            elif region == "P":
                dist = name_split[4].split("-")[0]
            else:
                raise ValueError(f"Unknown region: {region}")
        else:
            raise ValueError(f"Unknown adsorption type: {adsorption_type}")

        # DFT energies (PBE+D3)
        dft_te = atoms.info["DFT_E_total"]
        dft_ie = atoms.info["DFT_E_int"]

        # Predicted energies
        tags = atoms.get_tags()
        framework_gas = atoms.copy()
        framework = framework_gas[tags == 0]
        gas = framework_gas[tags == 1]

        # Collect results
        row_data = {
            "mof_name": mof_name,
            "gas": gas_name,
            "region": region,
            "adsorption_type": adsorption_type,
            "dist": dist,
            "dft_te": dft_te,
            "dft_ie": dft_ie,
        }
        collections.append(row_data)

# Convert to DataFrame
df = pd.DataFrame(collections)

100%|██████████| 26/26 [00:00<00:00, 522.56it/s]


In [14]:
df

Unnamed: 0,mof_name,gas,region,adsorption_type,dist,dft_te,dft_ie
0,Zn-CFA-1-OH,CO2,P,A,5.0,-1196.705159,0.218382
1,Zn-CFA-1-OH,CO2,R,A,2.0,-1196.413903,0.509638
2,Zn-CFA-1-OH,CO2,P,A,4.0,-1196.897943,0.025598
3,Zn-CFA-1-OH,CO2,P,A,3.1,-1197.057199,-0.133657
4,Zn-CFA-1-OH,CO2,E,A,2.7,-1197.081989,-0.158448
...,...,...,...,...,...,...,...
307,SIFSIX-3-Zn,H2O,R,N,0.9,-1405.071354,3.568739
308,SIFSIX-3-Zn,H2O,R,N,1.5,-1408.753954,-0.113861
309,SIFSIX-3-Zn,H2O,P,N,2.6,-1408.482300,0.157793
310,SIFSIX-3-Zn,H2O,P,N,1.3,-1408.749477,-0.109384
