In [19]:
atom_properties = {
    'HW': {'type': 'HW', 'sigma': '3.165558', 'epsilon': '78.197431', 'charge': '-0.8476', 'num_particles': '1'},
    'OW': {'type': 'OW', 'sigma': '0.0', 'epsilon': '0.0', 'charge': '0.4238', 'num_particles': '2'},
    'SiZ': {'type': 'SiZ', 'sigma': '22.0', 'epsilon': '2.3', 'charge': '-2 + OZ', 'num_particles': '1'},
    'OZ': {'type': 'OZ', 'sigma': '53.0', 'epsilon': '3.3', 'charge': '-0.75', 'num_particles': '1'},
    'AlZ': {'type': 'AlZ', 'sigma': '22.0', 'epsilon': '2.3', 'charge': 'SiZ - 1', 'num_particles': '1'},
    'HZ': {'type': 'HZ', 'sigma': '1.0', 'epsilon': '100.0', 'charge': '1', 'num_particles': '1'}
}

"""
literature
Use SPC/E for water parameters, use TraPPE-Zeo for SiZ and OZ parameters. 
Si - epsilon/kb 22 charge -2q(OZ) epsilon 2.3 
OZ - epsilon/kb 53 charge -0.75 epsilon 3.30
For the initial guess, copy SiZ parameters into AlZ (make q_AlZ = q_SiZ - 1), and 
set HZ parameters to sigma = 1, epsilon = 100, q = +1.
"""

'\nliterature\nUse SPC/E for water parameters, use TraPPE-Zeo for SiZ and OZ parameters. \nSi - epsilon/kb 22 charge -2q(OZ) epsilon 2.3 \nOZ - epsilon/kb 53 charge -0.75 epsilon 3.30\nFor the initial guess, copy SiZ parameters into AlZ (make q_AlZ = q_SiZ - 1), and \nset HZ parameters to sigma = 1, epsilon = 100, q = +1.\n'

In [96]:
import pandas as pd

def extracting_positions(input_file):
        # Extract the positions from the .xyz file
    with open(input_file, "r") as file:
        lines = file.readlines()

    data_lines = lines[2:]

    data_list = []
    for line in data_lines:
        stripped_line = line.strip()
        parts = stripped_line.split()
        if len(parts) >= 5:  
            try:
                x, y, z = map(float, parts[1:4])
                atom_type = parts[0]
                data_list.append([x, y, z, atom_type])
            except ValueError:
                continue  
    # Create a DataFrame with all configurations
    columns = ["X", "Y", "Z", "Atom Type"]
    configuration = pd.DataFrame(data_list, columns=columns)

    # Rename before last three rows
    rename_map = {
        "O": "OZ",  # Renaming "O" to "OZ"
        "Si": "SiZ",  # Renaming "Si" to "SiZ"
        "H": "HZ",   # Renaming "H" to "HZ"
        "Al": "AlZ" # renaming "Al" to "AlZ"
    }
    configuration.loc[configuration.index < len(configuration) - 3, 'Atom Type'] = configuration.loc[configuration.index < len(configuration) - 3, 'Atom Type'].replace(rename_map)
    configuration.iloc[-3:, configuration.columns.get_loc("Atom Type")] = ["HW", "OW", "HW"]  # Rename the last 3 atom types

    return configuration

In [75]:
# Trying for small configurations first
file_paths = [
    #    '../data/spce_sample_config_periodic4.txt',
    #    '../data/spce_sample_config_periodic2.txt',
    #    '../data/spce_sample_config_periodic3.txt',
        'Zeolites_acid_base/configuration_2.xyz'
    ]

In [69]:
import re
import pandas as pd

# Create the target dataframes
def creating_dataframes(file_paths, atom_properties):
    # Creating the force_field dataframe
    force_field = pd.DataFrame.from_dict(atom_properties, orient='index')

    # Create the system dataframe with initialized columns
    system_data = []

    for path in file_paths:
        with open(path, "r") as file:
            lines = file.readlines()

        prefix_lines = lines[:2]

        # Extract energy
        energy_match = re.search(r'energy=([-+]?\d*\.\d+|\d+)', prefix_lines[1])
        energy = float(energy_match.group(1)) if energy_match else None

        # Extract lattice constants
        lattice_match = re.search(r'Lattice="([^"]+)"', prefix_lines[1])
        if lattice_match:
            lattice_values = lattice_match.group(1).split()
            lattice_floats = list(map(float, lattice_values))
            box_length = lattice_floats[0]  # Assuming cubic box
        else:
            lattice_floats = []
            box_length = None

        # Number of particles can be parsed from the first line
        try:
            num_particles = int(prefix_lines[0].strip())
        except ValueError:
            num_particles = None

        # Extracting the pbc value using regex
        pbc_match = re.search(r'pbc="([^"]+)"', prefix_lines[1])

        # Check if the match was found
        if pbc_match:
            pbc_value = pbc_match.group(1)
            print(pbc_value)
        else:
            print("pbc value not found")

        # Append a row of data
        system_data.append({
            "file_paths": path,
            "energy": energy,
            "number of particles": num_particles,
            "box length": box_length,
            "lattice floats": lattice_floats,
            "pbc value": pbc_value,
            "cutoff": 10,
            "alpha": 5.6 / box_length if box_length and box_length != 0 else 0.28,
            "kmax": 5,
            "ε0": 8.854187817E-12,
            "kB": 1.3806488E-23
        })

    system = pd.DataFrame(system_data)
    return system, force_field


In [76]:
system, force_field = creating_dataframes(file_paths, atom_properties)

T T T


In [77]:
system

Unnamed: 0,file_paths,energy,number of particles,box length,lattice floats,pbc value,cutoff,alpha,kmax,ε0,kB
0,Zeolites_acid_base/configuration_2.xyz,-4563.976001,580,24.345,"[24.345, 0.0, 0.0, 0.0, 24.345, 0.0, 0.0, 0.0,...",T T T,10,0.230027,5,8.854188e-12,1.380649e-23


In [78]:
force_field

Unnamed: 0,type,sigma,epsilon,charge,num_particles
HW,HW,3.165558,78.197431,-0.8476,1
OW,OW,0.0,0.0,0.4238,2
SiZ,SiZ,22.0,2.3,-2 + OZ,1
OZ,OZ,53.0,3.3,-0.75,1
AlZ,AlZ,22.0,2.3,SiZ - 1,1
HZ,HZ,1.0,100.0,1,1


In [102]:
configuration = extracting_positions(file_paths[0])
print(configuration)

             X          Y          Z Atom Type
0    21.753056   2.588614   0.000070        OZ
1    21.765630  14.751667  12.172291        OZ
2     9.568266  14.776618   0.014048        OZ
3     9.627733   2.506469  12.121849        OZ
4     0.000676  21.753306   2.589088        OZ
..         ...        ...        ...       ...
575  19.140367  21.322624   1.304992       SiZ
576  12.299560   9.520802  14.948105        HZ
577  11.853617  12.848924  13.037539        HW
578  11.037127  12.271565  13.037539        OW
579  10.220636  12.848924  13.037539        HW

[580 rows x 4 columns]


In [83]:
set(extracting_positions(file_paths[0])['Atom Type'])

{'ALZ', 'HW', 'HZ', 'OW', 'OZ', 'SiZ'}

In [61]:
results = pd.DataFrame()
results['Number of Particles'] = system['number of particles'].astype(int)


In [46]:
def minimum_image_distance(r_ij, cell_length):
    # Apply the minimum image convention to distances.
    return r_ij - cell_length * np.round(r_ij / cell_length)

In [64]:
import numpy as np

def pair_dispersion_energy(system_data, configuration, force_field):
    """
    Compute the total pair dispersion energy for a system of particles.
    
    Parameters:
    - system_row: A row of the system DataFrame containing simulation properties.
    - configuration: DataFrame with atom positions and types.
    - force_field: DataFrame with force field parameters for atom types.

    Returns:
    - total_dispersion_energy: float, the total pair dispersion energy.
    """
    positions = configuration[['X', 'Y', 'Z']].values
    atom_types = configuration['Atom Type'].values
    cutoff = system_data['cutoff']
    cell_length = system_data['box length']
    num_atoms = len(positions)

    total_dispersion_energy = 0.0

    for i in range(num_atoms):
        for j in range(i + 1, num_atoms):
            r_ij = positions[i] - positions[j]
            r_ij = minimum_image_distance(r_ij, cell_length)
            distance = np.linalg.norm(r_ij)

            if 0 < distance < cutoff:
                type_i, type_j = atom_types[i], atom_types[j]

                if type_i not in force_field.index or type_j not in force_field.index:
                    continue

                epsilon_i = force_field.loc[type_i, 'epsilon']
                epsilon_j = force_field.loc[type_j, 'epsilon']
                sigma_i = force_field.loc[type_i, 'sigma']
                sigma_j = force_field.loc[type_j, 'sigma']

                epsilon_ij = np.sqrt(epsilon_i * epsilon_j)
                sigma_ij = (sigma_i + sigma_j) / 2.0
                s_over_r = sigma_ij / distance

                # Lennard-Jones potential
                potential_energy = 4 * epsilon_ij * (s_over_r**12 - s_over_r**6)
                total_dispersion_energy += potential_energy
                
    return total_dispersion_energy


In [65]:
# Calculate pairwise energy for all system configurations
results['dispersion_energies'] = system['file_paths'].apply(
    lambda file_path: pair_dispersion_energy(
        system[system['file_paths'] == file_path].iloc[0],  # Ensure single row selection
        extracting_positions(file_path), 
        force_field
    )
)

In [66]:
results

Unnamed: 0,Number of Particles,dispersion_energies
0,580,0.0


In [84]:
def evaluate_charge(charge_str, force_field):
    """Evaluate charge expression safely."""
    # Replace atom types with corresponding charges in the expression
    for atom_type in force_field.index:
        charge_value = force_field.loc[atom_type, 'charge']
        charge_str = charge_str.replace(atom_type, str(charge_value))
    
    # Evaluate the final expression for charge
    try:
        charge_value = eval(charge_str)
    except Exception as e:
        print(f"Error evaluating charge: {charge_str} -> {e}")
        charge_value = 0.0  # Default value in case of error
    
    return charge_value

In [99]:
atom_types = configuration["Atom Type"].values
set(atom_types)

{'AlZ', 'HW', 'HZ', 'OW', 'OZ', 'SiZ'}

In [100]:
force_field.index

Index(['HW', 'OW', 'SiZ', 'OZ', 'AlZ', 'HZ'], dtype='object')

In [101]:
charges = np.array([evaluate_charge(force_field.loc[t, "charge"], force_field) for t in atom_types])