# Generate NWChem `.nw` inputs from saved XYZ conformers (COSMO + D3, geometry optimization)

This notebook scans a `ring_strain_outputs_*` folder containing multi-frame XYZ files like:

- `.../CP1/bond006/M_top3.xyz`
- `.../CP1/bond006/M_open_top3.xyz`
- `.../CP1/bond006/X_top3.xyz`
- `.../CP1/bond006/X_open_top3.xyz`

For each XYZ (optionally selecting a specific conformer frame), it writes an NWChem input file `job.nw`
that performs a **DFT geometry optimization with COSMO solvent + Grimme D3 dispersion**.

Outputs are written to:
- `nwchem_jobs/<name>/bondXXX/<species>/confYYY/job.nw`

Run each job via:
```bash
nwchem job.nw > job.out 2>&1
```


In [None]:
from pathlib import Path
import re
import json
import numpy as np


## 1) Settings

In [None]:
# Folder produced by the generation notebook
INPUT_ROOT = Path('ring_strain_outputs_core_set')  # change if needed

# Where to write NWChem job folders
OUT_ROOT = Path('nwchem_jobs')
OUT_ROOT.mkdir(exist_ok=True)

# NWChem method settings
XC = 'pbe0'                  # e.g. b3lyp, pbe0, wb97x, m06-2x (depends on your NWChem build)
BASIS = 'def2-svp'           # optimization basis
USE_SPHERICAL = True
DISPERSION = 'vdw 3'         # NWChem D3 keyword (DFT-D3)

# COSMO solvent settings
SOLVENT_NAME = 'DMF'
EPS = 36.7                   # DMF dielectric

# Job control
MULT = 1                     # spin multiplicity
CHARGE = 0                   # total charge
MAXITER = 200
GRID = 'fine'

# Which conformer frame to export from each multi-frame XYZ:
#   0 = first frame, 1 = second, ...
CONF_INDEX = 0


## 2) XYZ parsing (multi-frame)

In [None]:
def parse_multiframe_xyz(path: Path):
    """Yield (comment, atoms) for each frame in a multi-frame XYZ.
    atoms = list of (sym, x, y, z)
    """
    txt = path.read_text().splitlines()
    i = 0
    nlines = len(txt)
    while i < nlines:
        while i < nlines and txt[i].strip() == '':
            i += 1
        if i >= nlines:
            break
        n = int(txt[i].strip()); i += 1
        comment = txt[i].rstrip('\n') if i < nlines else ''
        i += 1
        atoms = []
        for _ in range(n):
            if i >= nlines:
                raise ValueError(f'Unexpected EOF while reading {path}')
            parts = txt[i].split()
            if len(parts) < 4:
                raise ValueError(f'Bad XYZ line in {path}: {txt[i]}')
            sym = parts[0]
            x, y, z = map(float, parts[1:4])
            atoms.append((sym, x, y, z))
            i += 1
        yield comment, atoms

def get_frame(path: Path, idx: int = 0):
    frames = list(parse_multiframe_xyz(path))
    if not frames:
        raise ValueError(f'No frames in {path}')
    if idx < 0 or idx >= len(frames):
        raise IndexError(f'Frame idx {idx} out of range for {path} (n={len(frames)})')
    return frames[idx]


## 3) NWChem input writer (COSMO + D3 geometry optimization)

In [None]:
def nwchem_input_from_atoms(
    atoms,
    title: str,
    charge: int = 0,
    mult: int = 1,
    xc: str = 'pbe0',
    basis: str = 'def2-svp',
    spherical: bool = True,
    dispersion_line: str = 'vdw 3',
    eps: float = 36.7,
    maxiter: int = 200,
    grid: str = 'fine',
):
    geom_lines = ['geometry units angstrom']
    for sym, x, y, z in atoms:
        geom_lines.append(f"  {sym:2s} {x: .8f} {y: .8f} {z: .8f}")
    geom_lines.append('end')

    basis_lines = []
    basis_lines.append('basis "ao basis" ' + ('spherical' if spherical else 'cartesian'))
    basis_lines.append(f"  * library {basis}")
    basis_lines.append('end')

    dft_lines = [
        'dft',
        f'  xc {xc}',
        f'  mult {mult}',
        f'  maxiter {maxiter}',
        f'  {dispersion_line}',
        '  convergence energy 1e-7',
        '  convergence density 1e-6',
        f'  grid {grid}',
        'end',
    ]

    cosmo_lines = [
        'cosmo',
        f'  dielec {eps}',
        'end',
    ]

    lines = []
    lines.append('start job')
    lines.append(f'title "{title}"')
    lines.append(f'charge {charge}')
    lines.append('')
    lines.extend(geom_lines)
    lines.append('')
    lines.extend(basis_lines)
    lines.append('')
    lines.extend(cosmo_lines)
    lines.append('')
    lines.extend(dft_lines)
    lines.append('')
    lines.append('task dft optimize')
    lines.append('')
    return '\n'.join(lines)


## 4) Discover XYZ files and write job folders

In [None]:
def iter_species_xyz(root: Path):
    for name_dir in sorted([p for p in root.iterdir() if p.is_dir()]):
        for bond_dir in sorted([p for p in name_dir.iterdir() if p.is_dir() and p.name.startswith('bond')]):
            for xyz in sorted(bond_dir.glob('*_top*.xyz')):
                m = re.match(r'^(M|M_open|X|X_open)_top(\d+)\.xyz$', xyz.name)
                if not m:
                    continue
                species = m.group(1)
                yield name_dir.name, bond_dir.name, species, xyz

written = []
for name, bond, species, xyz_path in iter_species_xyz(INPUT_ROOT):
    comment, atoms = get_frame(xyz_path, CONF_INDEX)

    title = f"{name} {bond} {species} conf{CONF_INDEX} | {XC}/{BASIS} COSMO({SOLVENT_NAME}) eps={EPS} D3"
    text = nwchem_input_from_atoms(
        atoms,
        title=title,
        charge=CHARGE,
        mult=MULT,
        xc=XC,
        basis=BASIS,
        spherical=USE_SPHERICAL,
        dispersion_line=DISPERSION,
        eps=EPS,
        maxiter=MAXITER,
        grid=GRID,
    )

    out_dir = OUT_ROOT / name / bond / species / f"conf{CONF_INDEX:03d}"
    out_dir.mkdir(parents=True, exist_ok=True)
    nw_path = out_dir / 'job.nw'
    nw_path.write_text(text)
    written.append({'name': name, 'bond': bond, 'species': species, 'xyz': str(xyz_path), 'nw': str(nw_path)})

print('Wrote NWChem inputs:', len(written))
print('Example:', written[0] if written else 'none')


## 5) Save an index JSON for convenience

In [None]:
index_path = OUT_ROOT / 'index.json'
index_path.write_text(json.dumps(written, indent=2))
print('Index written:', index_path.resolve())


## 6) How to run
From a terminal, `cd` into any generated job folder (contains `job.nw`) and run:
```bash
nwchem job.nw > job.out 2>&1
tail -n 40 job.out
```
