<a href="https://colab.research.google.com/github/deltorobarba/sciences/blob/master/hpc_psi4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Parallel Quantum Chemistry Calculations with Psi4**

Supercomputing for quantum chemistry calculations using Psi4, with MPI parallelization:

In [None]:
#!/usr/bin/env python
"""
Parallel quantum chemistry calculations using Psi4 with MPI
This script would be run on a supercomputer to perform large-scale electronic structure calculations
"""

import psi4
import numpy as np
from mpi4py import MPI
import os
import time
import argparse

# Initialize MPI environment
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()

# Parse command line arguments
parser = argparse.ArgumentParser(description='Parallel Quantum Chemistry with Psi4')
parser.add_argument('--molecule', type=str, required=True, help='XYZ file of the molecule')
parser.add_argument('--basis', type=str, default='cc-pVTZ', help='Basis set')
parser.add_argument('--method', type=str, default='MP2', help='Computational method')
parser.add_argument('--memory', type=str, default='8GB', help='Memory per process')
parser.add_argument('--output', type=str, default='qc_results', help='Output directory')
args = parser.parse_args()

# Configure Psi4
psi4.set_num_threads(int(os.cpu_count() / size))  # Threads per MPI process
psi4.set_memory(args.memory)
psi4.core.set_output_file(f'{args.output}/psi4_output_rank{rank}.dat', False)

if rank == 0:
    # Create output directory
    os.makedirs(args.output, exist_ok=True)
    print(f"Starting calculation with {size} MPI processes")
    print(f"Method: {args.method}, Basis: {args.basis}")

def read_xyz_file(filename):
    """Read molecule from XYZ file"""
    with open(filename, 'r') as f:
        lines = f.readlines()

    num_atoms = int(lines[0].strip())
    molecule_str = f"\n{lines[1].strip()}\n"

    for i in range(2, num_atoms+2):
        molecule_str += lines[i].strip() + "\n"

    molecule_str += "units angstrom\n"

    return molecule_str

def run_energy_calculation():
    """Run single point energy calculation"""
    # Read molecule (only needed on rank 0)
    if rank == 0:
        molecule_str = read_xyz_file(args.molecule)
    else:
        molecule_str = None

    # Broadcast molecule to all processes
    molecule_str = comm.bcast(molecule_str, root=0)

    # Create molecule object
    molecule = psi4.geometry(molecule_str)

    # Run calculation
    start_time = time.time()

    if args.method.upper() in ['SCF', 'HF', 'RHF']:
        # Hartree-Fock calculation
        energy = psi4.energy('SCF/' + args.basis, molecule=molecule)

    elif args.method.upper() in ['MP2']:
        # MP2 calculation
        energy = psi4.energy('MP2/' + args.basis, molecule=molecule)

    elif args.method.upper() in ['CCSD', 'CCSD(T)']:
        # Coupled Cluster calculation
        energy = psi4.energy(f'{args.method}/{args.basis}', molecule=molecule)

    elif args.method.upper() in ['DFT', 'B3LYP', 'PBE0', 'WB97X-D']:
        # DFT calculation
        method = 'B3LYP' if args.method.upper() == 'DFT' else args.method
        energy = psi4.energy(f'{method}/{args.basis}', molecule=molecule)

    else:
        if rank == 0:
            print(f"Unknown method: {args.method}, defaulting to HF")
        energy = psi4.energy('SCF/' + args.basis, molecule=molecule)

    end_time = time.time()

    # Print result from rank 0
    if rank == 0:
        print(f"Energy calculation completed in {end_time - start_time:.2f} seconds")
        print(f"Final {args.method} energy: {energy} Eh")

    return molecule, energy

def run_frequency_calculation(molecule):
    """Run frequency calculation to get vibrational modes"""
    if rank == 0:
        print("Starting frequency calculation...")

    start_time = time.time()

    # For frequency calculations, we need to optimize the geometry first
    # This is often done at a lower level of theory than the energy calculation
    if args.method.upper() in ['CCSD', 'CCSD(T)', 'MP2']:
        # Use SCF or DFT for the optimization/frequency
        freq_method = 'B3LYP'
    else:
        freq_method = args.method

    # Run the calculation - this will be automatically parallelized by Psi4
    frequencies, wfn = psi4.frequency(
        f'{freq_method}/{args.basis}',
        molecule=molecule,
        return_wfn=True
    )

    end_time = time.time()

    # Process and output results on rank 0
    if rank == 0:
        print(f"Frequency calculation completed in {end_time - start_time:.2f} seconds")

        # Get vibrational frequencies
        vib_freqs = np.array(wfn.frequencies())

        # Save frequencies to file
        np.savetxt(
            f'{args.output}/frequencies.txt',
            vib_freqs,
            header='Vibrational frequencies (cm^-1)'
        )

        # Check for imaginary frequencies (negative values)
        imag_freqs = vib_freqs[vib_freqs < 0]
        if len(imag_freqs) > 0:
            print(f"Warning: Found {len(imag_freqs)} imaginary frequencies")
            print(f"Lowest frequency: {vib_freqs[0]} cm^-1")

    return frequencies

def calculate_thermochemistry(molecule, energy):
    """Calculate thermochemical properties"""
    if rank == 0:
        print("Calculating thermochemical properties...")

    start_time = time.time()

    # Set temperature and pressure
    T = 298.15  # K
    P = 1.0     # atm

    # Calculate thermochemistry
    thermochemistry = psi4.driver.thermochemistry(
        molecule,
        args.method,
        T=T,
        P=P
    )

    end_time = time.time()

    # Process and output results on rank 0
    if rank == 0:
        print(f"Thermochemistry calculation completed in {end_time - start_time:.2f} seconds")

        # Save thermochemistry data to file
        with open(f'{args.output}/thermochemistry.txt', 'w') as f:
            f.write(f"Temperature: {T} K\n")
            f.write(f"Pressure: {P} atm\n\n")
            f.write(f"Electronic energy: {energy} Eh\n")
            f.write(f"Zero-point energy: {thermochemistry['ZPE']} Eh\n")
            f.write(f"Thermal correction to enthalpy: {thermochemistry['H_thermal_correction']} Eh\n")
            f.write(f"Thermal correction to Gibbs free energy: {thermochemistry['G_thermal_correction']} Eh\n\n")
            f.write(f"Enthalpy (H): {thermochemistry['H']} Eh\n")
            f.write(f"Gibbs free energy (G): {thermochemistry['G']} Eh\n")

        print(f"Enthalpy (H): {thermochemistry['H']} Eh")
        print(f"Gibbs free energy (G): {thermochemistry['G']} Eh")

    return thermochemistry

def analyze_electron_density():
    """Perform electron density analysis"""
    if args.method.upper() in ['SCF', 'HF', 'RHF', 'B3LYP', 'PBE0', 'WB97X-D']:
        if rank == 0:
            print("Performing electron density analysis...")

        start_time = time.time()

        # Re-run SCF to get wavefunction
        molecule_str = read_xyz_file(args.molecule) if rank == 0 else None
        molecule_str = comm.bcast(molecule_str, root=0)
        molecule = psi4.geometry(molecule_str)

        # Run SCF calculation and get wavefunction
        energy, wfn = psi4.energy(f'{args.method}/{args.basis}', return_wfn=True, molecule=molecule)

        # Calculate properties
        # These calculations are distributed across MPI processes automatically by Psi4

        # Dipole moment
        dipole = psi4.core.get_dipole(wfn)

        # Mulliken charges
        mulliken = psi4.core.Molecule.allocate_mulliken_charges(wfn)

        # Wiberg bond orders
        mayer = psi4.oeprop(wfn, "MAYER_INDICES")

        end_time = time.time()

        # Process and output results on rank 0
        if rank == 0:
            print(f"Electron density analysis completed in {end_time - start_time:.2f} seconds")

            # Save dipole moment
            with open(f'{args.output}/dipole.txt', 'w') as f:
                f.write(f"Dipole moment (a.u.): [{dipole[0]}, {dipole[1]}, {dipole[2]}]\n")
                dipole_debye = [d * 2.541746 for d in dipole]  # Convert to Debye
                f.write(f"Dipole moment (Debye): [{dipole_debye[0]:.4f}, {dipole_debye[1]:.4f}, {dipole_debye[2]:.4f}]\n")
                f.write(f"Total dipole moment (Debye): {np.linalg.norm(dipole_debye):.4f}\n")

            # Save Mulliken charges
            with open(f'{args.output}/mulliken.txt', 'w') as f:
                f.write("Atom\tMulliken Charge\n")
                for i, charge in enumerate(mulliken):
                    atom = molecule.symbol(i)
                    f.write(f"{atom}{i+1}\t{charge:.6f}\n")

            print(f"Analysis results saved to {args.output}/")

def main():
    """Main function to run the whole workflow"""
    total_start_time = time.time()

    # Run energy calculation
    molecule, energy = run_energy_calculation()

    # Run frequency calculation
    frequencies = run_frequency_calculation(molecule)

    # Calculate thermochemistry
    thermochemistry = calculate_thermochemistry(molecule, energy)

    # Analyze electron density
    analyze_electron_density()

    # Print summary on rank 0
    if rank == 0:
        total_end_time = time.time()
        print(f"\nAll calculations completed in {total_end_time - total_start_time:.2f} seconds")
        print(f"Results saved to {args.output}/")

if __name__ == "__main__":
    main()
    # Finalize MPI
    MPI.Finalize()