In [7]:
# import modules
import matplotlib as pyplot
import pandas as pd
import os
import re

In [8]:
"""
Define a function that parses out number of atoms
and XYZ coords called parse_out_file
"""

def parse_out_file(file_name):
    with open(file_name, 'r') as f:
        lines = f.readlines()

    output_lines = [] # initialize a list called output_lines
    natoms = 0 # Initialize number of atoms and set to 0
    start_appending = False  # Initialize the flag

    # Iterate through each line to find 'NAtoms' to
    # get number of atoms and find 'Input orientation'.
    # Append all coordinates for each instance of 'Input orientation'
    # to output_lines list
    for i, line in enumerate(lines):
        match = re.match(r'\s*NAtoms\s*=\s*(\d+)', line)
        if match:
            natoms = int(match.group(1))

        if 'Input orientation' in line:
            start_appending = True
            start_line = i + 5  # Start appending from the 5th line after 'Input orientation'
            end_line = start_line + natoms + 1  # Stop appending after natoms + 1 lines

            # Append lines from start_line to end_line
            for j in range(start_line, end_line):
                if j < len(lines):
                    output_lines.append(lines[j])

            start_appending = False  # Reset the flag

    return output_lines, natoms

In [9]:
"""
Define a function that processes output_lines and 
creates an xyz file that can be processed further 
for input file generation
"""

def gen_xyz_file(output_lines):
    
    # Initialize two lists to store sublists of lines
    # for each molecule
    molecule_data = []
    current_molecule = []
    
    # Skip the first line
    output_lines = output_lines[1:]

    # Search output_lines for "---" and use this as a marker
    # for a new molecule
    for line in output_lines:
        if line.strip() == "---------------------------------------------------------------------":
            if current_molecule:
                molecule_data.append(current_molecule)
                current_molecule = []
        else:
            columns = line.strip().split()
            current_molecule.append([columns[1]] + columns[-3:])

    if current_molecule:
        molecule_data.append(current_molecule)
    
    # Use the number of atoms in the molecule to write an .xyz
    # file that contains the atom types and xyz coordinates
    for i, molecule in enumerate(molecule_data, start=1):
        num_atoms = len(molecule)
        title = f"molecule{i}"
        coords = molecule

        filename = f"{title}.xyz"
        with open(filename, "w") as f:
            f.write(f"{num_atoms}\n")
            f.write(f"{title}\n")
            for line in coords:
                f.write(f"{' '.join(line)}\n")

    print(f"Generated {len(molecule_data)} XYZ files.")

# Example usage
# output_lines = [
#     "1         42           0       -0.752589    0.015077   -0.294351",
#     "2          8           0        0.230472    0.712316   -2.955327",
#     "3          7           0       -1.716913   -0.448017    1.770978",
#     # ... (more lines) ...
#     "---------------------------------------------------------------------",
#     "1         42           0       -0.755059    0.015447   -0.294879",
#     "2          8           0        0.231844    0.711772   -2.954699",
#     # ... (more lines) ...
#     "---------------------------------------------------------------------",
#     # ... (more molecules) ...
# ]

#gen_xyz_file(output_lines)

In [10]:
"""
Opening file test
"""

# Prompt the user to input the name of their file
file_name = input("Enter the name of your .out file: ")

# Open the file
with open(file_name, 'r') as f:
    lines = f.readlines()
    
print(lines)

Enter the name of your .out file: P08_J804_ts_benz_oxide_oxepin_prot_pw6b95_irc.out


In [11]:
output_lines, natoms = parse_out_file(file_name)

for line in output_lines:
    print(line.strip())

1          6           0        0.176547    1.388444    0.694815
1          6           0        0.171279    1.402838    0.691782
2          6           0        0.171279    1.402838   -0.691782
3          6           0       -0.312460    0.330400   -1.446206
4          6           0       -0.299799   -0.946247   -0.987611
5          6           0       -0.299799   -0.946247    0.987611
6          6           0       -0.312460    0.330400    1.446206
7          1           0        0.360007    2.337583    1.195523
8          1           0        0.360007    2.337583   -1.195523
9          1           0       -0.776328    0.505628   -2.405253
10          1           0       -0.776328    0.505628    2.405253
11          8           0        0.708519   -1.269305   -0.000000
12          1           0       -0.667674   -1.844770    1.446140
13          1           0       -0.667674   -1.844770   -1.446140
14          1           0        1.424633   -0.593200   -0.000000
--------------------

In [12]:
xyz_file = gen_xyz_file(output_lines)

print(xyz_file)

Generated 36 XYZ files.
None
