# Generate NWChem `.nw` inputs from saved XYZ conformers (COSMO + D3, geometry optimization)

This notebook scans a `ring_strain_outputs_*` folder containing multi-frame XYZ files like:

- `.../CP1/bond006/M_top3.xyz`
- `.../CP1/bond006/M_open_top3.xyz`
- `.../CP1/bond006/X_top3.xyz`
- `.../CP1/bond006/X_open_top3.xyz`

For each XYZ (optionally selecting a specific conformer frame), it writes an NWChem input file `job.nw`
that performs a **DFT geometry optimization with COSMO solvent + Grimme D3 dispersion**.

Outputs are written to:
- `nwchem_jobs/<name>/bondXXX/<species>/confYYY/job.nw`

Run each job via:
```bash
nwchem job.nw > job.out 2>&1
```


In [1]:
from pathlib import Path
import re
import json
import numpy as np


## 1) Settings

In [None]:
# Folder produced by the generation notebook
INPUT_ROOT = Path('ring_strain_outputs_core_set_test')  # change if needed

# Where to write NWChem job folders
OUT_ROOT = Path('nwchem_jobs')
OUT_ROOT.mkdir(exist_ok=True)

# Which conformer frame to export from each multi-frame XYZ:
#   0 = first frame, 1 = second, ...
CONF_INDEX = 0

# ---------- Baseline (reference) ----------
# We will compare every "layer" to this baseline:
#   geometry optimization at def2-SVP in gas phase (no solvent, no dispersion)
BASE_XC = 'wb97x-d3'
BASE_BASIS = 'def2-svp'
BASE_SPHERICAL = True
BASE_GRID = 'fine'
BASE_MAXITER = 200
BASE_MULT = 1
BASE_CHARGE = 0

# ---------- Solvent (COSMO/PCM-style in NWChem via COSMO block) ----------
SOLVENT_NAME = 'DMF'
EPS = 36.7  # DMF dielectric

# ---------- Dispersion ----------
# NWChem keyword for Grimme D3 dispersion in the DFT block is commonly: 'vdw 3'
# If your build uses a different spelling, change it here.
D3_LINE = 'vdw 3'

# ---------- Levels of theory to generate ----------
# Each level produces its own subfolder and its own job.nw file.
# You can add/remove levels freely.
LEVELS = [
    {
        "tag": "L0_opt_svp_gas",
        "desc": "OPT: PBE0/def2-SVP, gas phase (baseline)",
        "task": "optimize",
        "xc": BASE_XC,
        "basis": BASE_BASIS,
        "grid": BASE_GRID,
        "maxiter": BASE_MAXITER,
        "spherical": BASE_SPHERICAL,
        "charge": BASE_CHARGE,
        "mult": BASE_MULT,
        "use_cosmo": False,
        "eps": EPS,
        "use_d3": False,
    },
    {
        "tag": "L1_opt_svp_cosmo",
        "desc": "OPT: PBE0/def2-SVP + COSMO(DMF)",
        "task": "optimize",
        "xc": BASE_XC,
        "basis": BASE_BASIS,
        "grid": BASE_GRID,
        "maxiter": BASE_MAXITER,
        "spherical": BASE_SPHERICAL,
        "charge": BASE_CHARGE,
        "mult": BASE_MULT,
        "use_cosmo": True,
        "eps": EPS,
        "use_d3": False,
    },
    {
        "tag": "L2_opt_svp_cosmo_d3",
        "desc": "OPT: PBE0/def2-SVP + COSMO(DMF) + D3",
        "task": "optimize",
        "xc": BASE_XC,
        "basis": BASE_BASIS,
        "grid": BASE_GRID,
        "maxiter": BASE_MAXITER,
        "spherical": BASE_SPHERICAL,
        "charge": BASE_CHARGE,
        "mult": BASE_MULT,
        "use_cosmo": True,
        "eps": EPS,
        "use_d3": True,
    },
    {
        "tag": "L3_sp_tzvp_cosmo_d3",
        "desc": "SP:  PBE0/def2-TZVP + COSMO(DMF) + D3 (single-point on chosen XYZ)",
        "task": "energy",
        "xc": BASE_XC,
        "basis": "def2-tzvp",
        "grid": BASE_GRID,
        "maxiter": BASE_MAXITER,
        "spherical": BASE_SPHERICAL,
        "charge": BASE_CHARGE,
        "mult": BASE_MULT,
        "use_cosmo": True,
        "eps": EPS,
        "use_d3": True,
    },
]

# Tip: If you want a true "opt@SVP then sp@TZVP on optimized geometry",
# run L2 first, then point the L3 jobs at the optimized geometry XYZ that NWChem writes
# (or parse it from job.out). This notebook currently uses the saved conformer XYZ as input.


## 2) XYZ parsing (multi-frame)

In [3]:
def parse_multiframe_xyz(path: Path):
    """Yield (comment, atoms) for each frame in a multi-frame XYZ.
    atoms = list of (sym, x, y, z)
    """
    txt = path.read_text().splitlines()
    i = 0
    nlines = len(txt)
    while i < nlines:
        while i < nlines and txt[i].strip() == '':
            i += 1
        if i >= nlines:
            break
        n = int(txt[i].strip()); i += 1
        comment = txt[i].rstrip('\n') if i < nlines else ''
        i += 1
        atoms = []
        for _ in range(n):
            if i >= nlines:
                raise ValueError(f'Unexpected EOF while reading {path}')
            parts = txt[i].split()
            if len(parts) < 4:
                raise ValueError(f'Bad XYZ line in {path}: {txt[i]}')
            sym = parts[0]
            x, y, z = map(float, parts[1:4])
            atoms.append((sym, x, y, z))
            i += 1
        yield comment, atoms

def get_frame(path: Path, idx: int = 0):
    frames = list(parse_multiframe_xyz(path))
    if not frames:
        raise ValueError(f'No frames in {path}')
    if idx < 0 or idx >= len(frames):
        raise IndexError(f'Frame idx {idx} out of range for {path} (n={len(frames)})')
    return frames[idx]


## 3) NWChem input writer (COSMO + D3 geometry optimization)

In [4]:
def nwchem_input_from_atoms(
    atoms,
    title: str,
    charge: int = 0,
    mult: int = 1,
    xc: str = 'pbe0',
    basis: str = 'def2-svp',
    spherical: bool = True,
    use_d3: bool = False,
    d3_line: str = 'vdw 3',
    use_cosmo: bool = False,
    eps: float = 36.7,
    maxiter: int = 200,
    grid: str = 'fine',
    task: str = 'optimize',   # 'optimize' or 'energy'
):
    """Create an NWChem input text for DFT with optional COSMO and optional D3.

    Notes:
      - COSMO in NWChem is configured in a 'cosmo' block.
      - D3 is enabled via a line like 'vdw 3' inside the 'dft' block (varies by build).
      - 'task dft optimize' does geometry optimization; 'task dft energy' does a single-point.
    """
    geom_lines = ['geometry units angstrom']
    for sym, x, y, z in atoms:
        geom_lines.append(f"  {sym:2s} {x: .8f} {y: .8f} {z: .8f}")
    geom_lines.append('end')

    basis_lines = []
    basis_lines.append('basis "ao basis" ' + ('spherical' if spherical else 'cartesian'))
    basis_lines.append(f"  * library {basis}")
    basis_lines.append('end')

    dft_lines = [
        'dft',
        f'  xc {xc}',
        f'  mult {mult}',
        f'  maxiter {maxiter}',
        '  convergence energy 1e-7',
        '  convergence density 1e-6',
        f'  grid {grid}',
    ]
    if use_d3:
        dft_lines.append(f'  {d3_line}')
    dft_lines.append('end')

    lines = []
    lines.append('start job')
    lines.append(f'title "{title}"')
    lines.append(f'charge {charge}')
    lines.append('')
    lines.extend(geom_lines)
    lines.append('')
    lines.extend(basis_lines)
    lines.append('')

    if use_cosmo:
        lines.extend([
            'cosmo',
            f'  dielec {eps}',
            'end',
            ''
        ])

    lines.extend(dft_lines)
    lines.append('')
    task = task.lower().strip()
    if task not in {'optimize', 'energy'}:
        raise ValueError(f"task must be 'optimize' or 'energy', got {task!r}")
    lines.append(f'task dft {task}')
    lines.append('')
    return '\n'.join(lines)


## 4) Discover XYZ files and write job folders

### Directory layout
Jobs are written under:
- `nwchem_jobs/<LEVEL_TAG>/<NAME>/<bondXXX>/<species>/confYYY/job.nw`

This makes it easy to compare levels side-by-side while keeping the baseline (L0) separate.

In [5]:
def iter_species_xyz(root: Path):
    for name_dir in sorted([p for p in root.iterdir() if p.is_dir()]):
        for bond_dir in sorted([p for p in name_dir.iterdir() if p.is_dir() and p.name.startswith('bond')]):
            for xyz in sorted(bond_dir.glob('*_top*.xyz')):
                m = re.match(r'^(M|M_open|X|X_open)_top(\d+)\.xyz$', xyz.name)
                if not m:
                    continue
                species = m.group(1)
                yield name_dir.name, bond_dir.name, species, xyz

written = []
for name, bond, species, xyz_path in iter_species_xyz(INPUT_ROOT):
    comment, atoms = get_frame(xyz_path, CONF_INDEX)

    for lvl in LEVELS:
        tag = lvl["tag"]
        desc = lvl.get("desc", tag)
        title = f"{name} {bond} {species} conf{CONF_INDEX} | {desc} | COSMO({SOLVENT_NAME}) eps={lvl.get('eps', EPS)}"

        text = nwchem_input_from_atoms(
            atoms,
            title=title,
            charge=int(lvl.get("charge", BASE_CHARGE)),
            mult=int(lvl.get("mult", BASE_MULT)),
            xc=str(lvl.get("xc", BASE_XC)),
            basis=str(lvl.get("basis", BASE_BASIS)),
            spherical=bool(lvl.get("spherical", BASE_SPHERICAL)),
            use_d3=bool(lvl.get("use_d3", False)),
            d3_line=str(D3_LINE),
            use_cosmo=bool(lvl.get("use_cosmo", False)),
            eps=float(lvl.get("eps", EPS)),
            maxiter=int(lvl.get("maxiter", BASE_MAXITER)),
            grid=str(lvl.get("grid", BASE_GRID)),
            task=str(lvl.get("task", "optimize")),
        )

        out_dir = OUT_ROOT / tag / name / bond / species / f"conf{CONF_INDEX:03d}"
        out_dir.mkdir(parents=True, exist_ok=True)
        nw_path = out_dir / 'job.nw'
        nw_path.write_text(text)

        written.append({
            "level": tag,
            "desc": desc,
            "name": name,
            "bond": bond,
            "species": species,
            "conf_index": int(CONF_INDEX),
            "xyz": str(xyz_path),
            "nw": str(nw_path),
        })

print('Wrote NWChem inputs:', len(written))
print('Levels:', sorted(set(w['level'] for w in written)))
print('Example:', written[0] if written else 'none')


Wrote NWChem inputs: 84
Levels: ['L0_opt_svp_gas', 'L1_opt_svp_cosmo', 'L2_opt_svp_cosmo_d3', 'L3_sp_tzvp_cosmo_d3']
Example: {'level': 'L0_opt_svp_gas', 'desc': 'OPT: PBE0/def2-SVP, gas phase (baseline)', 'name': 'CP1', 'bond': 'bond006', 'species': 'M_open', 'conf_index': 0, 'xyz': 'ring_strain_outputs_core_set_test/CP1/bond006/M_open_top5.xyz', 'nw': 'nwchem_jobs/L0_opt_svp_gas/CP1/bond006/M_open/conf000/job.nw'}


## 5) Save an index JSON for convenience

In [6]:
index_path = OUT_ROOT / 'index.json'
index_path.write_text(json.dumps(written, indent=2))
print('Index written:', index_path.resolve())


Index written: /home/thomas-watts/Desktop/pyLIQTR/ring_strain/nwchem_jobs/index.json


## 6) How to run
From a terminal, `cd` into any generated job folder (contains `job.nw`) and run:
```bash
nwchem job.nw > job.out 2>&1
tail -n 40 job.out
```
