In [1]:
import numpy as np
import pandas as pd
import os
import shutil
import re
from pathlib import Path
from typing import Union, Sequence, Optional
import random

In [2]:
def _count_tc_groups(lines) -> int:
    """
    Parse tc-grps line to determine number of temperature coupling groups.
    Returns 0 if not found (then we won't auto-repeat ref_t).
    """
    pat = re.compile(r'^\s*tc-grps\s*=\s*([^;#\n]+)', re.IGNORECASE)
    for ln in lines:
        m = pat.match(ln)
        if m:
            groups = m.group(1).strip().split()
            return len(groups)
    return 0

def _format_value_for_key(key: str, value) -> str:
    """Return the value as it should appear in the mdp (no extra padding)."""
    if key.lower() == "nsteps":
        # ensure integer
        return str(int(value))
    elif key.lower() == "ref_t":
        if isinstance(value, (list, tuple)):
            return " ".join(str(v) for v in value)
        else:
            return str(value)
    else:
        return str(value)

def _replace_key_line(line: str, key: str, new_value_str: str) -> Optional[str]:
    """
    If `line` contains the mdp key, replace its value while preserving whitespace
    before the value and any trailing comment starting with ';'.
    Returns the modified line, or None if the key is not on this line.
    """
    # Match: [spaces]key[spaces]=[spaces]VALUE [spaces][;comment...]
    pat = re.compile(rf'^(\s*{re.escape(key)}\s*=\s*)([^;#\n]*?)(\s*(;.*)?)$',
                     re.IGNORECASE)
    m = pat.match(line)
    if not m:
        return None
    prefix, _old_val, suffix = m.group(1), m.group(2), m.group(3)
    return f"{prefix}{new_value_str}{suffix}\n"

def update_mdp(
    in_path: Union[str, Path],
    out_path: Optional[Union[str, Path]] = None,
    n_steps: Optional[int] = None,
    ref_t: Optional[Union[float, int, Sequence[Union[float, int]]]] = None,
) -> Path:
    """
    Update nsteps and/or ref_t in a GROMACS .mdp file.

    - n_steps: Python arg (int) -> writes to `nsteps` in the mdp.
    - ref_t:  number or sequence. If a single number is given and multiple
              temperature groups are present in `tc-grps`, the value is repeated
              to match the number of groups.

    Returns the path of the written file.
    """
    in_path = Path(in_path)
    if out_path is None:
        out_path = in_path  # in-place
    out_path = Path(out_path)

    text = in_path.read_text()

    lines = text.splitlines()
    # Determine how many tc-grps exist (to align ref_t if needed)
    num_tc_groups = _count_tc_groups(lines)

    # Normalize ref_t if a single value is provided but multiple groups exist
    ref_t_values_str: Optional[str] = None
    if ref_t is not None:
        if isinstance(ref_t, (list, tuple)):
            # If provided list length doesn't match and isn't 1, that's likely a mistake.
            if num_tc_groups and len(ref_t) not in (1, num_tc_groups):
                raise ValueError(
                    f"ref_t length ({len(ref_t)}) does not match number of tc-grps "
                    f"({num_tc_groups}). Provide one value or {num_tc_groups} values."
                )
            vals = list(ref_t)
            if num_tc_groups and len(vals) == 1:
                vals = vals * num_tc_groups
            ref_t_values_str = _format_value_for_key("ref_t", vals)
        else:
            # single numeric ref_t
            if num_tc_groups and num_tc_groups > 1:
                vals = [ref_t] * num_tc_groups
                ref_t_values_str = _format_value_for_key("ref_t", vals)
            else:
                ref_t_values_str = _format_value_for_key("ref_t", ref_t)

    # Prepare nsteps string if given
    nsteps_str = _format_value_for_key("nsteps", n_steps) if n_steps is not None else None

    # Walk lines and replace where needed
    new_lines = []
    nsteps_done = False
    reft_done = False

    for ln in lines:
        if not nsteps_done and nsteps_str is not None:
            repl = _replace_key_line(ln, "nsteps", nsteps_str)
            if repl is not None:
                new_lines.append(repl.rstrip("\n"))
                nsteps_done = True
                continue

        if not reft_done and ref_t_values_str is not None:
            repl = _replace_key_line(ln, "ref_t", ref_t_values_str)
            if repl is not None:
                new_lines.append(repl.rstrip("\n"))
                reft_done = True
                continue

        new_lines.append(ln)

    # If the key didn't exist, append it (rare for nsteps/ref_t, but nice to have)
    if nsteps_str is not None and not nsteps_done:
        new_lines.append(f"nsteps          = {nsteps_str}")
    if ref_t_values_str is not None and not reft_done:
        new_lines.append(f"ref_t           = {ref_t_values_str}")

    out_path.write_text("\n".join(new_lines) + "\n")
    return out_path

def write_topology(
    out_dir: str,
    filename: str = "topol.top",
    cnt_file: str = "cnt.itp",
    forcefield: str = "oplsaa.ff/forcefield.itp",
    water_model: str = "oplsaa.ff/spc.itp",
    system_name: str = "CNT + water",
    molecules: dict = {"CNT": 1},
):
    """
    Generate a GROMACS topology file (topol.top) in a specific directory.

    Parameters
    ----------
    out_dir : str
        Directory where the topology file will be saved.
    filename : str
        Name of the topology file (default = "topol.top").
    cnt_file : str
        CNT topology include file.
    forcefield : str
        Forcefield include file.
    water_model : str
        Water model include file.
    system_name : str
        System description.
    molecules : dict
        Molecules and their counts (e.g., {"CNT": 1, "SOL": 5000, "NA": 10}).
        Na and Cl will be automatically removed if present.
    """
    # Remove Na/Cl if they exist
    clean_molecules = {
        mol: count
        for mol, count in molecules.items()
        if mol.upper() not in {"NA", "CL"}
    }

    # Build content
    content = [
        filename,
        "; ======= topol.top =======",
        f'#include "{forcefield}"',
        "",
        "; your CNT include-topology",
        f'#include "{cnt_file}"',
        "",
        "; SPC/E water model in OPLS-AA",
        f'#include "{water_model}"',
        "",
        "[ system ]",
        system_name,
        "",
        "[ molecules ]",
    ]

    for mol, count in clean_molecules.items():
        content.append(f"{mol:<6} {count}")

    # Ensure directory exists
    out_path = Path(out_dir) / filename
    out_path.parent.mkdir(parents=True, exist_ok=True)

    # Write file
    out_path.write_text("\n".join(content) + "\n")
    print(f"Topology file written to {out_path}")

def reduce_waters(gro_in, gro_out, fraction=0.8, solvent_name="SOL"):
    with open(gro_in) as f:
        lines = f.readlines()

    n_atoms = int(lines[1])
    header, atoms, box = lines[0], lines[2:-1], lines[-1]

    reduced_atoms = []
    current_res = None
    keep = True

    for line in atoms:
        res_id = line[0:5].strip()      # residue number
        res_name = line[5:10].strip()   # residue name (e.g. SOL, CNT, etc.)

        if res_name == solvent_name:  # only thin solvents
            if res_id != current_res:
                keep = random.random() < fraction
                current_res = res_id
        else:
            keep = True  # always keep CNT or other molecules

        if keep:
            reduced_atoms.append(line)

    with open(gro_out, "w") as f:
        f.write(header)
        f.write(f"{len(reduced_atoms)}\n")
        f.writelines(reduced_atoms)
        f.write(box)

def count_sol_molecules(gro_file, solvent_name="SOL"):
    """
    Count the number of solvent molecules (e.g., SOL) in a .gro file.

    Parameters
    ----------
    gro_file : str
        Path to the .gro structure file.
    solvent_name : str
        Residue/molecule name for solvent (default: "SOL").

    Returns
    -------
    int : number of solvent molecules
    """
    residues = set()
    with open(gro_file, "r") as f:
        lines = f.readlines()[2:-1]  # skip header and box line
        for line in lines:
            resname = line[5:10].strip()  # columns 6-10 = residue name
            resid = line[0:5].strip()     # columns 1-5 = residue number
            if resname == solvent_name:
                residues.add(resid)
    return len(residues)


def update_topol_from_gro(top_file, gro_file, molecule_name="SOL"):
    """
    Update the [ molecules ] section of topol.top with the correct count
    based on the number of molecules in the .gro file.

    Parameters
    ----------
    top_file : str
        Path to the topology file (e.g., "topol.top").
    gro_file : str
        Path to the coordinate file (e.g., "solv.gro").
    molecule_name : str
        Name of the molecule in [ molecules ] (default: "SOL").
    """
    new_count = count_sol_molecules(gro_file, molecule_name)

    lines = []
    with open(top_file, "r") as f:
        lines = f.readlines()

    updated_lines = []
    in_molecules = False
    for line in lines:
        if line.strip().startswith("[ molecules ]"):
            in_molecules = True
            updated_lines.append(line)
            continue

        if in_molecules:
            if line.strip() == "" or line.strip().startswith(";"):
                updated_lines.append(line)
                continue
            parts = line.split()
            if len(parts) >= 2 and parts[0] == molecule_name:
                updated_lines.append(f"{molecule_name:<10}{new_count}\n")
                in_molecules = False  # only update first match
            else:
                updated_lines.append(line)
        else:
            updated_lines.append(line)

    with open(top_file, "w") as f:
        f.writelines(updated_lines)

    print(f"Updated {molecule_name} count to {new_count} in {top_file}")

def remove_last_line(file_path):
    with open(file_path, "r") as f:
        lines = f.readlines()
    # Remove last line
    if lines:
        lines = lines[:-1]
    with open(file_path, "w") as f:
        f.writelines(lines)

## Run Gromacs Simulation

In [5]:
def run_exp_loop(Temp):
    gro_file = 'CNT_n09_m06_L5.gro'
    folder_name = 'CNT_n09_m06_L5_Test'
    os.makedirs(folder_name, exist_ok=True)
    exp_name = folder_name + '/' + gro_file[0:-4] + '_' + str(Temp)
    # Source folder
    src = "Template"
    # Destination folder (new name)
    dst = exp_name
    # Copy the entire directory
    shutil.copytree(src, dst)
    updated = update_mdp(
        in_path= exp_name + "/nvt.mdp",
        out_path=None,          # in-place; or provide "nvt_updated.mdp" to write a new file
        n_steps=1_000_000,        # n_steps * 0.002 is the time in picoseconds 
        ref_t=Temp,              # or ref_t=[300, 300]
    )
    updated = update_mdp(
    in_path= exp_name + "/pre_nvt.mdp",
    out_path=None,          # in-place; or provide "nvt_updated.mdp" to write a new file
    n_steps=100_000,        # n_steps * 0.002 is the time in picoseconds 
    ref_t=Temp,              # or ref_t=[300, 300]
    )
   
    path = exp_name
    write_topology(
        out_dir=path,
        molecules={"CNT": 1, "SOL": 0, "NA": 0, "CL": 0}
    )
    !cd "{exp_name}" && printf {gro_file} | python build_cnt_itp.py
    !cd {exp_name}; gmx editconf -f {gro_file} -o boxed.gro -box 2.5 2.5 19 -c  #change back to 3, 3, 19 for tube of 14nm #use 32 for tube length of 28
    !cd {exp_name}; gmx solvate -cp boxed.gro -cs spc216.gro -o solv.gro -p topol.top
    # reduce_waters(exp_name + "/solv.gro", exp_name + "/solv.gro", fraction=1.00)
    # update_topol_from_gro(exp_name + "/topol.top", exp_name + "/solv.gro", "SOL")
    # remove_last_line(exp_name + "/topol.top")
    !cd "{exp_name}" && printf "2\nq\n" | gmx make_ndx -f solv.gro -o index.ndx
    # ---- Energy minimization ----
    !cd {exp_name}; gmx grompp -f minim.mdp -c solv.gro -p topol.top -n index.ndx -o em.tpr -maxwarn 1
    !cd {exp_name}; gmx mdrun -deffnm em

    # ---- Short pre-equilibration (pre_NVT) ----
    !cd {exp_name}; gmx grompp -f pre_nvt.mdp -c em.gro -p topol.top -n index.ndx -o pre_nvt.tpr
    !cd {exp_name}; gmx mdrun -deffnm pre_nvt

    # ---- Full NVT equilibration ----
    !cd {exp_name}; gmx grompp -f nvt.mdp -c pre_nvt.gro -p topol.top -n index.ndx -o nvt.tpr
    !cd {exp_name}; gmx mdrun -deffnm nvt
    !cd "{exp_name}" && printf "2\n0\n" | gmx trjconv -s nvt.tpr -f nvt.xtc -o nvt_whole.xtc -pbc mol -ur compact -center 
    !cd "{exp_name}" && printf "4\n0\n" | gmx energy -f nvt.edr -o potential_energy.xvg
    !cd "{exp_name}" && printf "8\n0\n" | gmx energy -f nvt.edr -o temperature.xvg
    !cd "{exp_name}" && printf "5\n0\n" | gmx energy -f nvt.edr -o kinetic_energy.xvg
    !cd "{exp_name}" && printf "7\n0\n" | gmx energy -f nvt.edr -o conserved_energy.xvg
    !cd "{exp_name}" && printf "6\n0\n" | gmx energy -f nvt.edr -o total_energy.xvg
    !cd "{exp_name}" && printf "9\n0\n" | gmx energy -f nvt.edr -o pressure.xvg

In [6]:
#Temperature_range = np.array([200, 220, 240, 260, 280, 300, 320, 340, 360, 380, 400])
#Temperature_range = np.array([250, 275, 300, 325, 350, 375])
Temperature_range = np.array([350])
for i in range(len(Temperature_range)):
    run_exp_loop(Temperature_range[i])

Topology file written to CNT_n09_m06_L5_Test/CNT_n09_m06_L5_350/topol.top
Enter .gro filename                     :-) GROMACS - gmx editconf, 2023.2 (-:

Executable:   /usr/local/gromacs/bin/gmx
Data prefix:  /usr/local/gromacs
Working dir:  /home/htchi/gromacs-2023.2/Simulations/Carbon Nanotubes/CNT_n09_m06_L5_Test/CNT_n09_m06_L5_350
Command line:
  gmx editconf -f CNT_n09_m06_L5.gro -o boxed.gro -box 2.5 2.5 19 -c

Note that major changes are planned in future for editconf, to improve usability and utility.
Read 684 atoms
Volume: 556.283 nm^3, corresponds to roughly 250300 electrons
No velocities found
    system size :  1.024  1.024  5.530 (nm)
    center      :  0.000 -0.000  2.749 (nm)
    box vectors : 10.000 10.000  5.563 (nm)
    box angles  :  90.00  90.00  90.00 (degrees)
    box volume  : 556.28               (nm^3)
    shift       :  1.250  1.250  6.751 (nm)
new center      :  1.250  1.250  9.500 (nm)
new box vectors :  2.500  2.500 19.000 (nm)
new box angles  :  90.00  90.