# Replicate current behaviour for closed shells
Once that works we can move on to checking that open shells work.

* [ ] Use unrestricted method for closed shell

* [ ] Check that values match restricted, incl energy contributions

* [ ] Try with open shell

* [ ] Change driver back to allow choice of restricted scf

* [ ] Insert new functions

* [ ] Update pyscf localizers to do unrestricted


Need to force driver to always use unrestricted method.

In [1]:
from nbed.driver import NbedDriver
from pathlib import Path

args = {
    "geometry": str(Path("molecular_structures/cyclopentane.xyz").absolute()),
    "basis": "STO-3G",
    "xc_functional": "b3lyp",
    "n_active_atoms":2,
    "projector": "both",
    "localization": "spade",
    "convergence": 1e-6,
    "savefile": None,
    "run_ccsd_emb": True,
    "run_fci_emb": False,
    'charge':0,
    'spin':0,
}

  h5py.get_config().default_file_mode = 'a'


## Run restricted to get componenets

In [None]:
driver = NbedDriver(**args)


uks = scf.UKS(driver._build_mol())
type(uks)
rks = scf.RKS(driver._build_mol())
isinstance(rks, scf.hf.RHF)


In [2]:
import logging
logger = logging.getLogger(__name__)
from nbed.localizers import Localizer
from typing import Optional, Tuple, Dict
import numpy as np
from pyscf import scf, dft, gto
from scipy import linalg
from nbed.exceptions import NbedConfigError

class OldSPADELocalizer(Localizer):
    """Localizer Class to carry out SPADE"""

    def __init__(
        self,
        pyscf_scf: gto.Mole,
        n_active_atoms: int,
        occ_cutoff: Optional[float] = 0.95,
        virt_cutoff: Optional[float] = 0.95,
        run_virtual_localization: Optional[bool] = False,
    ):
        super().__init__(
            pyscf_scf,
            n_active_atoms,
            occ_cutoff=occ_cutoff,
            virt_cutoff=virt_cutoff,
            run_virtual_localization=run_virtual_localization,
        )

    def _localize(
        self,
    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
        """Localise orbitals using SPADE.
        Returns:
            active_MO_inds (np.array): 1D array of active occupied MO indices
            enviro_MO_inds (np.array): 1D array of environment occupied MO indices
            c_active (np.array): C matrix of localized occupied active MOs (columns define MOs)
            c_enviro (np.array): C matrix of localized occupied ennironment MOs
            c_loc_occ (np.array): full C matrix of localized occupied MOs
        """
        logger.info("Localising with SPADE.")
        n_occupied_orbitals = np.count_nonzero(self._global_ks.mo_occ)

        occupied_orbitals = self._global_ks.mo_coeff[:, :n_occupied_orbitals]

        n_act_aos = self._global_ks.mol.aoslice_by_atom()[self._n_active_atoms - 1][-1]
        logger.debug(f"{n_act_aos} active AOs.")

        ao_overlap = self._global_ks.get_ovlp()

        # Orbital rotation and partition into subsystems A and B
        # rotation_matrix, sigma = embed.orbital_rotation(occupied_orbitals,
        #    n_act_aos, ao_overlap)

        rotated_orbitals = (
            linalg.fractional_matrix_power(ao_overlap, 0.5) @ occupied_orbitals
        )
        _, sigma, right_vectors = linalg.svd(rotated_orbitals[:n_act_aos, :])

        logger.debug(f"Singular Values: {sigma}")

        # n_act_mos, n_env_mos = embed.orbital_partition(sigma)
        # Prevents an error with argmax
        if len(sigma) == 1:
            n_act_mos = 1
        else:
            value_diffs = sigma[:-1] - sigma[1:]
            n_act_mos = np.argmax(value_diffs) + 1
        n_env_mos = n_occupied_orbitals - n_act_mos
        logger.debug(f"{n_act_mos} active MOs.")
        logger.debug(f"{n_env_mos} environment MOs.")

        # get active and enviro indices
        active_MO_inds = np.arange(n_act_mos)
        enviro_MO_inds = np.arange(n_act_mos, n_act_mos + n_env_mos)

        # Defining active and environment orbitals and density
        c_active = occupied_orbitals @ right_vectors.T[:, :n_act_mos]
        c_enviro = occupied_orbitals @ right_vectors.T[:, n_act_mos:]
        c_loc_occ = occupied_orbitals @ right_vectors.T

        # storing condition used to select env system
        self.enviro_selection_condition = sigma

        alpha = active_MO_inds, enviro_MO_inds, c_active, c_enviro, c_loc_occ
        beta = None

        return (alpha, beta)

    def run(self, sanity_check: bool = False) -> None:
        """Function that runs localization.

        Args:
            sanity_check (bool): optional flag to check denisty matrices and electron number after orbital localization
                                 makes sense
        """
        alpha, beta = self._localize()

        (
            self.active_MO_inds,
            self.enviro_MO_inds,
            self.c_active,
            self.c_enviro,
            self._c_loc_occ,
        ) = alpha

        self.dm_active = 2.0 * self.c_active @ self.c_active.T
        self.dm_enviro = 2.0 * self.c_enviro @ self.c_enviro.T

        self.beta_active_MO_inds = None
        self.beta_enviro_MO_inds = None
        self.beta_c_active = None
        self.beta_c_enviro = None
        self._beta_c_loc_occ = None
        self.beta_dm_active = None
        self.beta_dm_enviro = None

        if beta is not None:
            # Weight the DMs by 1 for unrestricted to combine them.
            self.dm_active *= 0.5
            self.dm_enviro *= 0.5

            (
                self.beta_active_MO_inds,
                self.beta_enviro_MO_inds,
                self.beta_c_active,
                self.beta_c_enviro,
                self._beta_c_loc_occ,
            ) = beta

            self.beta_dm_active = self.beta_c_active @ self.beta_c_active.T
            self.beta_dm_enviro = self.beta_c_enviro @ self.beta_c_enviro.T

        if sanity_check is True:
            self._check_values()

        if self._run_virtual_localization is True:
            logger.error("Virtual localization is not implemented.")
            # c_virtual = self._localize_virtual_orbs()
            # logger.error("Defualting to unlocalized virtual orbitals.")
            # c_virtual = self._global_ks.mo_coeff[:, self._global_ks.mo_occ < 2]
        else:
            logger.debug("Not localizing virtual orbitals.")
            # appends standard virtual orbitals from SCF calculation (NOT localized in any way)
            # c_virtual = self._global_ks.mo_coeff[:, self._global_ks.mo_occ < 2]

        # Unused
        # self.c_loc_occ_and_virt = np.hstack((self._c_loc_occ, c_virtual))

        logger.debug("Localization complete.")

In [8]:
import numpy as np
driver = NbedDriver(**args)

## Combine the C matrices and occupations into one to use restricted localizer
if driver._global_ks.mo_coeff.shape[0] == 2:
    driver._global_ks.mo_coeff = np.sum(driver._global_ks.mo_coeff, axis=0)/2
    driver._global_ks.mo_occ = np.sum(driver._global_ks.mo_occ, axis=0)
    
print(driver._global_ks.mo_coeff.shape)
old = OldSPADELocalizer(driver._global_ks, driver.n_active_atoms)
driver.localized_system = old
print(old.active_MO_inds)
print(old.enviro_MO_inds)

(35, 35)
[0 1 2 3 4]
[ 5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


In [3]:
from nbed.localizers import SPADELocalizer


class UnrestrictedDriver(NbedDriver):
    def __init__(
        self,
        geometry: str,
        n_active_atoms: int,
        basis: str,
        xc_functional: str,
        projector: str,
        localization: Optional[str] = "spade",
        convergence: Optional[float] = 1e-6,
        charge: Optional[int] = 0,
        spin: Optional[int] = 0,
        mu_level_shift: Optional[float] = 1e6,
        run_ccsd_emb: Optional[bool] = False,
        run_fci_emb: Optional[bool] = False,
        run_virtual_localization: Optional[bool] = False,
        run_dft_in_dft: Optional[bool] = False,
        max_ram_memory: Optional[int] = 4000,
        pyscf_print_level: int = 1,
        savefile: Optional[Path] = None,
        unit: Optional[str] = "angstrom",
        occupied_threshold: Optional[float] = 0.95,
        virtual_threshold: Optional[float] = 0.95,
        init_huzinaga_rhf_with_mu: bool = False,
        max_hf_cycles: int = 50,
        max_dft_cycles: int = 50,
    ):
        """Initialise class."""
        logger.debug("Initialising driver.")
        config_valid = True
        if projector not in ["mu", "huzinaga", "both"]:
            logger.error(
                "Invalid projector %s selected. Choose from 'mu' or 'huzinzaga'.",
                projector,
            )
            config_valid = False

        if localization not in ["spade", "ibo", "boys", "pipek-mezey"]:
            logger.error(
                "Invalid localization method %s. Choose from 'ibo','boys','pipek-mezey' or 'spade'.",
                localization,
            )
            config_valid = False

        if not config_valid:
            logger.error("Invalid config.")
            raise NbedConfigError("Invalid config.")

        self.geometry = geometry
        self.n_active_atoms = n_active_atoms
        self.basis = basis.lower()
        self.xc_functional = xc_functional.lower()
        self.projector = projector.lower()
        self.localization = localization.lower()
        self.convergence = convergence
        self.charge = charge
        self.spin = spin
        self.mu_level_shift = mu_level_shift
        self.run_ccsd_emb = run_ccsd_emb
        self.run_fci_emb = run_fci_emb
        self.run_virtual_localization = run_virtual_localization
        self.run_dft_in_dft = run_dft_in_dft
        self.max_ram_memory = max_ram_memory
        self.pyscf_print_level = pyscf_print_level
        self.savefile = savefile
        self.unit = unit
        self.occupied_threshold = occupied_threshold
        self.virtual_threshold = virtual_threshold
        self.max_hf_cycles = max_hf_cycles
        self.max_dft_cycles = max_dft_cycles

        self._check_active_atoms()
        self.localized_system = None
        self.two_e_cross = None
        self._dft_potential = None

        self._restricted_scf = False

        # self.embed(init_huzinaga_rhf_with_mu=init_huzinaga_rhf_with_mu) # TODO uncomment.
        logger.debug("Driver initialisation complete.")


## If the C matrix is combined before localisation, we get the right DFT components

In [90]:
driver = UnrestrictedDriver(**args)

## Combine the C matrices and occupations into one to use restricted localizer
if driver._restricted_scf is False:
    driver._global_ks.mo_coeff = np.sum(driver._global_ks.mo_coeff, axis=0)/2
    driver._global_ks.mo_occ = np.sum(driver._global_ks.mo_occ, axis=0)
    
print(driver._global_ks.mo_coeff.shape)
old = OldSPADELocalizer(driver._global_ks, driver.n_active_atoms)
driver.localized_system = old
print(old.active_MO_inds)
print(old.enviro_MO_inds)

(35, 35)
[0 1 2 3 4]
[ 5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


## If the density matrices are combined after localisation, it also passes!

In [126]:
driver = UnrestrictedDriver(**args)

## Combine the C matrices and occupations into one to use restricted localizer
print(driver._global_ks.mo_coeff.shape)
old = SPADELocalizer(driver._global_ks, driver.n_active_atoms)
driver.localized_system = old
print(old.active_MO_inds)
print(old.enviro_MO_inds)

driver.localized_system.dm_active += driver.localized_system.beta_dm_active
driver.localized_system.dm_enviro += driver.localized_system.beta_dm_enviro
driver.localized_system.beta_dm_active = np.zeros(driver.localized_system.dm_active.shape)
driver.localized_system.beta_dm_enviro = np.zeros(driver.localized_system.dm_enviro.shape)

(2, 35, 35)
occupancy=array([2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0.])
occupancy=array([2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0.])
[0 1 2 3 4]
[ 5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


# Localize separately

Create a driver to force unrestricted even for closed shells.

In [2]:
from nbed.localizers import SPADELocalizer
from nbed.driver import NbedDriver
driver = NbedDriver(**args)
driver._restricted_scf = False
new = SPADELocalizer(driver._global_ks, driver.n_active_atoms)
print(new.active_MO_inds)
print(new.beta_active_MO_inds)
print(new.enviro_MO_inds)
print(new.beta_enviro_MO_inds)

[0 1 2 3 4]
[0 1 2 3 4]
[ 5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[ 5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


# Mock Driver.embed()

In [3]:
import logging
logger = logging.getLogger(__name__)
from nbed.localizers import Localizer
from typing import Optional, Tuple, Dict
import numpy as np
from pyscf import scf, dft, gto

init_huzinaga_rhf_with_mu = False

"""Generate embedded Hamiltonian.

Note run_mu_shift (bool) and run_huzinaga (bool) flags define which method to use (can be both)
This is done when object is initialized.
"""
logger.debug("Embedding molecule.")
localized_system = driver._localize()
driver.localized_system = localized_system

# logger.info(localized_system.active_MO_inds, localized_system.beta_active_MO_inds)
# logger.info(localized_system.enviro_MO_inds, localized_system.beta_enviro_MO_inds)

e_nuc = driver._global_ks.energy_nuc()

# Run subsystem DFT (calls localized rks)
driver._subsystem_dft()


In [4]:

def energy_elec(mf, dm=None, h1e=None, vhf=None) -> Tuple[float, float]:
    '''Electronic energy of Unrestricted Hartree-Fock

    Note this function has side effects which cause mf.scf_summary updated.
    This version is 

    Returns:
        e_elec (np.ndarray): Hartree-Fock electronic energy
        e_coul (np.ndarray): 2-electron contribution to electronic energy
    '''
    if dm is None: dm = mf.make_rdm1()
    if h1e is None:
        h1e = mf.get_hcore()
    if isinstance(dm, np.ndarray) and dm.ndim == 2:
        dm = np.array((dm*.5, dm*.5))
    if vhf is None:
        vhf = mf.get_veff(mf.mol, dm)
    e1 = np.einsum('ij,ji->', h1e[0], dm[0])
    e1+= np.einsum('ij,ji->', h1e[1], dm[1])
    e_coul =(np.einsum('ij,ji->', vhf[0], dm[0]) +
             np.einsum('ij,ji->', vhf[1], dm[1])) * .5
    e_elec = (e1 + e_coul).real
    mf.scf_summary['e1'] = e1.real
    mf.scf_summary['e2'] = e_coul.real
    logger.debug(mf, 'E1 = %s  Ecoul = %s', e1, e_coul.real)
    return e_elec, e_coul


print("Running embedded scf calculation.")

localized_scf = driver._init_local_hf()
localized_scf.energy_elec = lambda *args: energy_elec(localized_scf, *args)
# modify hcore to embedded version
v_emb = (driver.mu_level_shift * driver._env_projector) + driver._dft_potential
hcore_std = localized_scf.get_hcore()
localized_scf.get_hcore = lambda *args: np.array([hcore_std, hcore_std]) + v_emb

localized_scf.kernel()
print(
    f"Embedded scf energy MU_SHIFT: {localized_scf.e_tot}, converged: {localized_scf.converged}"
)

Running embedded scf calculation.


AttributeError: 'NbedDriver' object has no attribute '_dft_potential'

## Apply projector to each spin

In [5]:
g_act = driver._global_ks.get_veff(dm=[localized_system.dm_active, localized_system.dm_enviro])
g_act.shape

(2, 35, 35)

In [7]:
logger.debug("Getting global DFT potential to optimize embedded calc in.")

total_dm = localized_system.dm_active + localized_system.dm_enviro
if driver._restricted_scf:
    total_dm += localized_system.beta_dm_active + localized_system.beta_dm_enviro

g_act_and_env = driver._global_ks.get_veff(
    dm=total_dm,
)

if driver._restricted_scf:
    g_act = driver._global_ks.get_veff(dm=localized_system.dm_active)
else:
    g_act = driver._global_ks.get_veff(dm=[localized_system.dm_active, localized_system.beta_dm_active])

dft_potential = g_act_and_env - g_act
logger.info(f"DFT potential average {np.mean(dft_potential)}.")

# To add a projector, put it in this dict with a function
# if we want any more it's also time to turn them into a class
embeddings: Dict[str, callable] = {
    "huzinaga": driver._huzinaga_embed,
    "mu": driver._mu_embed,
}
if driver.projector in embeddings:
    embeddings = {driver.projector: embeddings[driver.projector]}

# This is reverse so that huz can be initialised with mu
for name in sorted(embeddings, reverse=True):
    logger.debug(f"Runnning embedding with {name} projector.")
    setattr(driver, "_" + name, {})
    result = getattr(driver, "_" + name)

    embedding_method: callable = embeddings[name]
    local_rhf = driver._init_local_hf()

    if init_huzinaga_rhf_with_mu and (name == "huzinaga"):
        logger.debug("Initializing huzinaga with mu-shift.")
        # seed huzinaga calc with mu result!
        result["v_emb"], result["scf"] = embedding_method(
            local_rhf, dft_potential, dmat_initial_guess=driver._mu["scf"].make_rdm1()
        )
    else:
        result["v_emb"], result["scf"] = embedding_method(local_rhf, dft_potential)


ValueError: operand has more dimensions than subscripts given in einstein sum, but no '...' ellipsis provided to broadcast the extra dimensions.

In [8]:

    from typing import Callable
    from pyscf.lib import StreamObject
    from nbed.scf import huzinaga_RHF, huzinaga_RKS
    def _huzinaga_embed(
        self, localized_scf: StreamObject, dmat_initial_guess=None
    ) -> np.ndarray:
        """Embed using Huzinaga projector.

        Args:
            localized_scf (StreamObject): A PySCF scf method in the localized basis.

        Returns:
            np.ndarray: Matrix form of the embedding potential.
            StreamObject: The embedded scf object.
        """
        logger.debug("Starting Huzinaga embedding method.")
        if isinstance(localized_scf, dft.rks.RKS):
            huz_method: Callable = huzinaga_RKS
        elif isinstance(localized_scf, scf.hf.RHF):
            huz_method: Callable = huzinaga_RHF

        # We need to run manual HF to update
        # Fock matrix with each cycle
        (
            c_active_embedded,
            mo_embedded_energy,
            dm_active_embedded,
            huzinaga_op_std,
            huz_scf_conv_flag,
        ) = huz_method(
            localized_scf,
            self._dft_potential,
            self.localized_system.dm_enviro,
            dm_conv_tol=1e-6,
            dm_initial_guess=None,
        )  # TODO: use dm_active_embedded (use mu answer to initialize!)
        print(f"{c_active_embedded=}")

        # write results to pyscf object
        logger.debug("Writing results to PySCF object.")
        hcore_std = localized_scf.get_hcore()
        v_emb = huzinaga_op_std + self._dft_potential
        localized_scf.get_hcore = lambda *args: hcore_std + v_emb
        localized_scf.mo_coeff = c_active_embedded
        localized_scf.mo_occ = localized_scf.get_occ(
            mo_embedded_energy, c_active_embedded
        )
        localized_scf.mo_energy = mo_embedded_energy
        localized_scf.e_tot = localized_scf.energy_tot(dm=dm_active_embedded)
        # localized_scf.conv_check = huz_scf_conv_flag
        localized_scf.converged = huz_scf_conv_flag

        logger.info(f"Huzinaga scf energy: {localized_scf.e_tot}")
        return v_emb, localized_scf

driver._huzinaga_embed = _huzinaga_embed

In [9]:

    result["mo_energies_emb_pre_del"] = local_rhf.mo_energy
    result["scf"] = driver._delete_environment(result["scf"], name)
    result["mo_energies_emb_post_del"] = local_rhf.mo_energy

    logger.info(f"V emb mean {name}: {np.mean(result['v_emb'])}")

    # calculate correction
    result["correction"] = np.einsum(
        "ij,ij", result["v_emb"], localized_system.dm_active
    )
    result["e_rhf"] = (
        result["scf"].e_tot
        + driver.e_env
        + driver.two_e_cross
        - result["correction"]
    )
    logger.info(f"RHF energy: {result['e_rhf']}")

    # classical energy
    result["classical_energy"] = (
        driver.e_env + driver.two_e_cross + e_nuc - result["correction"]
    )

    # Calculate ccsd or fci energy
    if driver.run_ccsd_emb is True:
        logger.debug("Performing CCSD-in-DFT embedding.")
        ccsd_emb, e_ccsd_corr = driver._run_emb_CCSD(
            result["scf"], frozen_orb_list=None
        )
        result["e_ccsd"] = (
            ccsd_emb.e_hf
            + e_ccsd_corr
            + driver.e_env
            + driver.two_e_cross
            - result["correction"]
        )
        logger.info(f"CCSD Energy {name}:\t{result['e_ccsd']}")

    if driver.run_fci_emb is True:
        logger.debug("Performing FCI-in-DFT embedding.")
        fci_emb = driver._run_emb_FCI(result["scf"], frozen_orb_list=None)
        result["e_fci"] = (
            (fci_emb.e_tot)
            + driver.e_env
            + driver.two_e_cross
            - result["correction"]
        )
        logger.info(f"FCI Energy {name}:\t{result['e_fci']}")

    if driver.run_dft_in_dft is True:
        did = driver.embed_dft_in_dft(driver._global_ks.xc, embedding_method)
        result["e_dft_in_dft"] = did["e_rks"]

if driver.projector == "both":
    logger.warning(
        "Outputting both mu and huzinaga embedding results as tuple."
    )
    driver.embedded_scf = (
        driver._mu["scf"],
        driver._huzinaga["scf"],
    )
    driver.classical_energy = (
        driver._mu["classical_energy"],
        driver._huzinaga["classical_energy"],
    )
elif driver.projector == "mu":
    driver.embedded_scf = driver._mu["scf"]
    driver.classical_energy = driver._mu["classical_energy"]
elif driver.projector == "huzinaga":
    driver.embedded_scf = driver._huzinaga["scf"]
    driver.classical_energy = driver._huzinaga["classical_energy"]

logger.info("Embedding complete.")

IndentationError: expected an indented block (388936446.py, line 55)