In [None]:
import psi4
from qcportal import PortalClient
from pprint import pprint as pp
from qcelemental.models import Molecule
from qcportal.singlepoint import SinglepointDataset, SinglepointDatasetEntry, QCSpecification

# need manybodydataset
from qcportal.manybody import ManybodyDataset, ManybodyDatasetEntry, ManybodyDatasetSpecification, ManybodySpecification

In [1]:
from setup_qcfractal import setup_qcarchive_qcfractal
import os

setup_qcarchive_qcfractal(
    QCF_BASE_FOLDER=os.path.join(os.getcwd(), "qcfractal"),
    start=False,
    reset=False,
    db_config={
        "name": None,
        "enable_security": "false",
        "allow_unauthenticated_read": None,
        "logfile": None,
        "loglevel": None,
        "service_frequency": 10,
        "max_active_services": None,
        "heartbeat_frequency": None,
        "log_access": None,
        "database": {
            "base_folder": None,
            "host": None,
            "port": 5432,
            "database_name": "qca",
            "username": None,
            "password": None,
            "own": None,
        },
        "api": {
            "host": None,
            "port": 7777,
            "secret_key": None,
            "jwt_secret_key": None,
        },
    },
    resources_config={
            "update_frequency": 5,
            "cores_per_worker": 8,
            "max_workers": 3,
            "memory_per_worker": 20,
    }
)

/home/amwalla3/gits/cybershuttle_demo/qcfractal
/home/amwalla3/gits/cybershuttle_demo/qcfractal/postgres

--------------------------------------------------------------------------------
Python executable:  /home/amwalla3/miniconda3/envs/p4_qcml/bin/python
QCFractal version:  0.59
QCFractal alembic revision:  d5988aa750ae
pg_ctl path:  /home/amwalla3/miniconda3/envs/p4_qcml/bin/pg_ctl
PostgreSQL server version:  PostgreSQL 17.4 on x86_64-conda-linux-gnu, compiled by x86_64-conda-linux-gnu-cc (conda-forge gcc 13.3.0-2) 13.3.0, 64-bit
--------------------------------------------------------------------------------


Displaying QCFractal configuration below
--------------------------------------------------------------------------------
access_log_keep: 0
allow_unauthenticated_read: true
api:
  extra_flask_options: null
  extra_waitress_options: null
  host: localhost
  jwt_access_token_expires: 3600
  jwt_refresh_token_expires: 86400
  jwt_secret_key: wubNZjdakq4fwaYDfUnWdNSgaYv1Abbrv8xD

In [None]:
!qcfractal-server --config=`pwd`/qcfractal/qcfractal_config.yaml start > qcfractal_server.log & disown

# NOTE kill server when finished by running:
#     ps aux | grep qcfractal-server | awk '{ print $2 }'
#     kill -9 <PID>

In [None]:
!qcfractal-compute-manager --config=`pwd`/qcfractal/resources.yml > qcfractal_compute.log & disown
# NOTE kill server when finished by running:;
#    ps aux | grep qcfractal-server | awk '{ print $2 }'
#    kill -9 <PID>

In [2]:
# Running a single job
client = PortalClient("http://localhost:7777", verify=False)
for rec in client.query_records():
    pp(rec)

mol = Molecule.from_data(
    """
     0 1
     O  -1.551007  -0.114520   0.000000
     H  -1.934259   0.762503   0.000000
     H  -0.599677   0.040712   0.000000
     --
     0 1
     O   1.350625   0.111469   0.000000
     H   1.680398  -0.373741  -0.758561
     H   1.680398  -0.373741   0.758561

     units angstrom
     no_reorient
     symmetry c1
"""
)

psi4.set_options(
    {"basis": "aug-cc-pvdz", "scf_type": "df", "e_convergence": 6, "freeze_core": True}
)

client.add_singlepoints(
    [mol],
    "psi4",
    driver="energy",
    method="b3lyp",
    basis="aug-cc-pvdz",
    keywords={"scf_type": "df", "e_convergence": 6, "freeze_core": True},
    tag="local",
)

for rec in client.query_records():
    pp(rec.dict)
    pp(rec.error)

ManybodyRecord(id=36, record_type='manybody', is_service=True, properties=None, extras={}, status=<RecordStatusEnum.waiting: 'waiting'>, manager_name=None, created_on=datetime.datetime(2025, 4, 24, 19, 15, 25, 538230, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), modified_on=datetime.datetime(2025, 4, 24, 19, 15, 25, 538230, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), owner_user=None, owner_group=None, compute_history_=None, task_=None, service_=None, comments_=None, native_files_=None, specification=ManybodySpecification(program='qcmanybody', levels={2: QCSpecification(program='psi4', driver=<SinglepointDriver.energy: 'energy'>, method='b3lyp', basis='6-31g*', keywords={'scf_type': 'df', 'd_convergence': 8}, protocols=AtomicResultProtocols(wavefunction=<WavefunctionProtocolEnum.none: 'none'>, stdout=True, error_correction=ErrorCorrectionProtocol(default_policy=True, policies=None), native_files=<NativeFilesProtocolEnum.none: 'none'>)

In [3]:
# Now create S22 Interaction Energy Dataset
from s22 import geoms

# geoms is a list of qcelemental Molecule objects that can be used to create a
# QCArchive dataset
print(len(geoms), geoms)

7 [Molecule(name='H4O2', formula='H4O2', hash='49ecd61'), Molecule(name='C2H4O4', formula='C2H4O4', hash='cb6a49d'), Molecule(name='C2H6N2O2', formula='C2H6N2O2', hash='b1ca3e5'), Molecule(name='C2H8', formula='C2H8', hash='67d7d29'), Molecule(name='C4H8', formula='C4H8', hash='47f28b2'), Molecule(name='C4H6', formula='C4H6', hash='2f8f067'), Molecule(name='C6H8O', formula='C6H8O', hash='22eeb90')]


In [4]:
# Create client dataset

ds_name = 'S22-multipoles'

try:
    ds = client.add_dataset("singlepoint", ds_name,
                            f"Dataset to contain {ds_name}")
    print(f"Added {ds_name} as dataset")
    # Insert entries into dataset
    entry_list = []
    for idx, mol in enumerate(geoms):
        extras = {
            "name": 'S22-' + str(idx),
            "idx": idx,
        }
        mol = Molecule.from_data(mol.dict(), extras=extras)
        ent = SinglepointDatasetEntry(name=extras['name'], molecule=mol)
        entry_list.append(ent)
    ds.add_entries(entry_list)
    print(f"Added {len(entry_list)} molecules to dataset")
except Exception:
    ds = client.get_dataset("singlepoint", ds_name)
    print(f"Found {ds_name} dataset, using this instead")
    print(ds)

Added S22-multipoles as dataset
Added 7 molecules to dataset


In [5]:
# Set the method and basis for lower requirements?
method, basis = "hf", "sto-3g"

# Set the QCSpecification (QM interaction energy in our case)
spec = QCSpecification(
    program="psi4",
    driver="energy",
    method=method,
    basis=basis,
    keywords={
        "d_convergence": 8,
        "dft_radial_points": 99,
        "dft_spherical_points": 590,
        "e_convergence": 10,
        "guess": "sad",
        "mbis_d_convergence": 9,
        "mbis_radial_points": 99,
        "mbis_spherical_points": 590,
        "scf_properties": ["mbis_charges", "MBIS_VOLUME_RATIOS"],
        "scf_type": "df",
    },
    protocols={"wavefunction": "orbitals_and_eigenvalues"},
)
ds.add_specification(name=f"psi4/{method}/{basis}", specification=spec)




InsertMetadata(error_description=None, errors=[], inserted_idx=[0], existing_idx=[])

In [6]:
# Run the computations
ds.submit()
print(f"Submitted {ds_name} dataset")

Submitted S22-multipoles dataset


In [7]:
# Check the status of the dataset - can repeatedly run this to see the progress
ds.status()




{'psi4/hf/sto-3g': {<RecordStatusEnum.waiting: 'waiting'>: 7}}

In [8]:
# Create client dataset

ds_name_mb = 'S22-manybody'

try:
    ds_mb = client.add_dataset("manybody", ds_name_mb,
                            f"Dataset to contain {ds_name_mb}")
    print(f"Added {ds_name_mb} as dataset")
except Exception:
    ds_mb = client.get_dataset("manybody", ds_name_mb)
    print(f"Found {ds_name_mb} dataset, using this instead")
    print(ds)

# Insert entries into dataset

entry_list = []
for idx, mol in enumerate(geoms):
    print(mol)
    ent = ManybodyDatasetEntry(name=f"S22-IE-{idx}", initial_molecule=mol)
    entry_list.append(ent)
ds_mb.add_entries(entry_list)
print(f"Added {len(entry_list)} molecules to dataset")

# Set the method and basis for lower requirements?
method, basis = "hf", "sto-3g"

# Set the QCSpecification (QM interaction energy in our case)

qc_spec_mb = QCSpecification(
    program="psi4",
    driver="energy",
    method=method,
    basis=basis,
    keywords={
        "d_convergence": 8,
        "scf_type": "df",
    },
)

spec_mb = ManybodySpecification(
    program='qcmanybody',
    bsse_correction=['cp', 'nocp'],
    levels={
        1: qc_spec_mb,
        2: qc_spec_mb,
    },
)
print("spec_mb", spec_mb)

ds_mb.add_specification(name=f"psi4/{method}/{basis}", specification=spec_mb)

# Run the computations
ds_mb.submit()
print(f"Submitted {ds_name} dataset")

# Check the status of the dataset - can repeatedly run this to see the progress
ds_mb.status()

Added S22-manybody as dataset
Molecule(name='H4O2', formula='H4O2', hash='49ecd61')
Molecule(name='C2H4O4', formula='C2H4O4', hash='cb6a49d')
Molecule(name='C2H6N2O2', formula='C2H6N2O2', hash='b1ca3e5')
Molecule(name='C2H8', formula='C2H8', hash='67d7d29')
Molecule(name='C4H8', formula='C4H8', hash='47f28b2')
Molecule(name='C4H6', formula='C4H6', hash='2f8f067')
Molecule(name='C6H8O', formula='C6H8O', hash='22eeb90')
Added 7 molecules to dataset
spec_mb program='qcmanybody' levels={1: QCSpecification(program='psi4', driver=<SinglepointDriver.energy: 'energy'>, method='hf', basis='sto-3g', keywords={'d_convergence': 8, 'scf_type': 'df'}, protocols=AtomicResultProtocols(wavefunction=<WavefunctionProtocolEnum.none: 'none'>, stdout=True, error_correction=ErrorCorrectionProtocol(default_policy=True, policies=None), native_files=<NativeFilesProtocolEnum.none: 'none'>)), 2: QCSpecification(program='psi4', driver=<SinglepointDriver.energy: 'energy'>, method='hf', basis='sto-3g', keywords={'d_

{'psi4/hf/sto-3g': {<RecordStatusEnum.waiting: 'waiting'>: 7}}

In [9]:
ds_mb.status()




{'psi4/hf/sto-3g': {<RecordStatusEnum.waiting: 'waiting'>: 7}}

In [10]:
# Want multiple levels of theory

methods = [
    'hf', 'pbe', 'b3lyp',
]
basis_sets = [
    '6-31g*'
]

for method in methods:
    for basis in basis_sets:
        # Set the QCSpecification (QM interaction energy in our case)
        qc_spec_mb = QCSpecification(
            program="psi4",
            driver="energy",
            method=method,
            basis=basis,
            keywords={
                "d_convergence": 8,
                "scf_type": "df",
            },
        )

        spec_mb = ManybodySpecification(
            program='qcmanybody',
            bsse_correction=['cp'],
            levels={
                1: qc_spec_mb,
                2: qc_spec_mb,
            },
        )
        print("spec_mb", spec_mb)

        ds_mb.add_specification(name=f"psi4/{method}/{basis}", specification=spec_mb)

        # Run the computations
        ds_mb.submit()
        print(f"Submitted {ds_name} dataset")
# Check the status of the dataset - can repeatedly run this to see the progress
ds_mb.status()

spec_mb program='qcmanybody' levels={1: QCSpecification(program='psi4', driver=<SinglepointDriver.energy: 'energy'>, method='hf', basis='6-31g*', keywords={'d_convergence': 8, 'scf_type': 'df'}, protocols=AtomicResultProtocols(wavefunction=<WavefunctionProtocolEnum.none: 'none'>, stdout=True, error_correction=ErrorCorrectionProtocol(default_policy=True, policies=None), native_files=<NativeFilesProtocolEnum.none: 'none'>)), 2: QCSpecification(program='psi4', driver=<SinglepointDriver.energy: 'energy'>, method='hf', basis='6-31g*', keywords={'d_convergence': 8, 'scf_type': 'df'}, protocols=AtomicResultProtocols(wavefunction=<WavefunctionProtocolEnum.none: 'none'>, stdout=True, error_correction=ErrorCorrectionProtocol(default_policy=True, policies=None), native_files=<NativeFilesProtocolEnum.none: 'none'>))} bsse_correction=[<BSSECorrectionEnum.cp: 'cp'>] keywords=ManybodyKeywords(return_total_data=False) protocols={}
Submitted S22-multipoles dataset
spec_mb program='qcmanybody' levels={1

{'psi4/b3lyp/6-31g*': {<RecordStatusEnum.waiting: 'waiting'>: 7},
 'psi4/hf/6-31g*': {<RecordStatusEnum.waiting: 'waiting'>: 7},
 'psi4/hf/sto-3g': {<RecordStatusEnum.waiting: 'waiting'>: 7},
 'psi4/pbe/6-31g*': {<RecordStatusEnum.waiting: 'waiting'>: 7}}

In [11]:
print(ds.status())
print(ds_mb.status())

{'psi4/hf/sto-3g': {<RecordStatusEnum.complete: 'complete'>: 7}}
{'psi4/b3lyp/6-31g*': {<RecordStatusEnum.waiting: 'waiting'>: 7}, 'psi4/hf/6-31g*': {<RecordStatusEnum.running: 'running'>: 7}, 'psi4/hf/sto-3g': {<RecordStatusEnum.running: 'running'>: 7}, 'psi4/pbe/6-31g*': {<RecordStatusEnum.running: 'running'>: 6, <RecordStatusEnum.waiting: 'waiting'>: 1}}


In [None]:
# Be careful with this for it can corrupt running status...
# !ps aux | grep qcfractal | awk '{ print $2 }' | xargs kill -9