### sample script kindly provided by Pavan Behara
https://github.com/openforcefield/qca-dataset-submission/pull/301#issuecomment-1195780165

In [1]:
import qcportal as ptl
from collections import Counter
client =ptl.FractalClient()

In [2]:
%%time
ds = client.get_collection("Dataset", "RNA Single Point Dataset v1.0")

CPU times: user 28.2 ms, sys: 6.16 ms, total: 34.4 ms
Wall time: 206 ms


In [3]:
# here both the dispersion and functional evaluation are handled in the same program and are not split, so there is only one dataframe
recs_wb97m = ds.get_records(method='wb97m-d3bj', basis='def2-tzvppd', program='psi4', keywords='wb97m-d3bj/def2-tzvppd')
print("No. of records: ", len(recs_wb97m))

No. of records:  4489


In [4]:
# QCFractal divides the B3LYP-D3BJ calculation into two separate parts; functional b3lyp evaluation, and dispersion d3bj evaluation
# index=0 in this refers to the d3bj calculation
# and index=1 is the b3lyp calculation
# properties from respective record dictionaries can be added to get the final property, for example final energy of b3lyp-d3bj calculation
recs_default = ds.get_records(method='b3lyp-d3bj', basis='dzvp', program='psi4', keywords='default')
print("length of dataframe", len(recs_default), "\n", "No. of records in each: ",  len(recs_default[0]), len(recs_default[1]))

length of dataframe 2 
 No. of records in each:  4489 4489


In [5]:
# Sample dictionary of a record, and the properties that can be accessed through this
d3bj_correction_dataframe = recs_default[0]
# first record 
print(d3bj_correction_dataframe.iloc[0].record.dict())

{'id': '109521931', 'hash_index': None, 'procedure': 'single', 'program': 'dftd3', 'version': 1, 'protocols': {'wavefunction': <WavefunctionProtocolEnum.none: 'none'>}, 'extras': {'_qcfractal_tags': {'program': 'dftd3', 'keywords': None}, 'info': {'dashlevel': 'd3bj', 'dashparams': {'s6': 1.0, 's8': 1.9889, 'a1': 0.3981, 'a2': 4.4211}, 'fctldash': 'b3lyp-d3(bj)', 'dashparams_citation': ''}, 'local_keywords': {'dashlevel': 'd3bj', 'dashparams': {'s6': 1.0, 's8': 1.9889, 'a1': 0.3981, 'a2': 4.4211}, 'fctldash': 'b3lyp-d3(bj)', 'dashparams_citation': ''}, 'qcvars': {'CURRENT ENERGY': '-0.12241289', 'DISPERSION CORRECTION ENERGY': '-0.12241289', '2-BODY DISPERSION CORRECTION ENERGY': '-0.12241289', 'B3LYP-D3(BJ) DISPERSION CORRECTION ENERGY': '-0.12241289', 'CURRENT GRADIENT': array([[ 2.81302828e-04, -4.80197433e-04,  5.63011062e-04],
       [ 9.83013508e-05, -1.68913023e-04,  2.66803068e-04],
       [ 2.55541752e-04, -7.70356570e-04,  2.94138195e-04],
       [ 1.84664060e-05, -3.08778999

In [6]:
# Sample dictionary of a record, and the properties that can be accessed through this
b3lyp_dataframe = recs_default[1]
# first record 
print(b3lyp_dataframe.iloc[0].record.dict())

{'id': '109522131', 'hash_index': None, 'procedure': 'single', 'program': 'psi4', 'version': 1, 'protocols': {'wavefunction': <WavefunctionProtocolEnum.none: 'none'>}, 'extras': {'_qcfractal_tags': {'program': 'psi4', 'keywords': '34'}, 'qcvars': {'CURRENT DIPOLE X': 14.284629238711577, 'CURRENT DIPOLE Y': -6.273212669846057, 'CURRENT DIPOLE Z': 6.251830429771571, 'CURRENT ENERGY': -1782.5173009461919, 'CURRENT REFERENCE ENERGY': -1782.5173009461919, 'DFT FUNCTIONAL TOTAL ENERGY': -1782.5173009461928, 'DFT TOTAL ENERGY': -1782.5173009461928, 'DFT VV10 ENERGY': 0.0, 'DFT XC ENERGY': -189.75362775587155, 'GRID ELECTRONS ALPHA': 128.00005542252904, 'GRID ELECTRONS BETA': 128.00005542252904, 'GRID ELECTRONS TOTAL': 256.0001108450581, 'NUCLEAR REPULSION ENERGY': 3602.9164088388934, 'ONE-ELECTRON ENERGY': -9604.149126346882, 'PCM POLARIZATION ENERGY': 0.0, 'PE ENERGY': 0.0, 'SCF DIPOLE X': 14.284629238711577, 'SCF DIPOLE Y': -6.273212669846057, 'SCF DIPOLE Z': 6.251830429771571, 'SCF ITERATI

In [7]:
# Combining two energies
# Some calculations failed due to SCF convergence errors, so excluding those by checking the record status of b3lyp calculation
for i in range(len(recs_default[0])):
    if recs_default[1].iloc[i].record.status == 'COMPLETE':
        print("B3LYP + D3BJ energy of ", recs_default[1].iloc[i].name, recs_default[1].iloc[i].record.properties.return_energy + recs_default[0].iloc[i].record.properties.return_energy)

B3LYP + D3BJ energy of  Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1.Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1-0 -1782.639713836192
B3LYP + D3BJ energy of  Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1.Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1-1 -1782.6302990214972
B3LYP + D3BJ energy of  Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1.Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1-2 -1782.641790205362
B3LYP + D3BJ energy of  Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1.Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1-3 -1782.62233649516
B3LYP + D3BJ energy of  Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1.Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1-4 -1782.648962953126
B3LYP + D3BJ energy of  Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1.Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1-5 -1782.6406730116719
B3LYP + D3BJ energy of  Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1.Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)n1-6 -1782.63958950459

In [8]:
# Similary for the other QC Specification
for i in range(len(recs_wb97m)):
    if recs_wb97m.iloc[i].record.status == 'COMPLETE':
        print("wb97m-d3bj/def2-tzvppd energy of ", recs_wb97m.iloc[i].name, recs_wb97m.iloc[i].record.properties.return_energy)

wb97m-d3bj/def2-tzvppd energy of  Nc1ccn([C@@H]2O[C@H](CO)[C@@H](OP(=O)([O-])OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3OP(=O)([O-])OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3O)[C@H]2O)c(=O)n1-0 -3697.0213274311614
wb97m-d3bj/def2-tzvppd energy of  Nc1ccn([C@@H]2O[C@H](CO)[C@@H](OP(=O)([O-])OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3OP(=O)([O-])OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3O)[C@H]2O)c(=O)n1-1 -3697.027289638834
wb97m-d3bj/def2-tzvppd energy of  Nc1ccn([C@@H]2O[C@H](CO)[C@@H](OP(=O)([O-])OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3OP(=O)([O-])OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3O)[C@H]2O)c(=O)n1-10 -3697.049497739711
wb97m-d3bj/def2-tzvppd energy of  Nc1ccn([C@@H]2O[C@H](CO)[C@@H](OP(=O)([O-])OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3OP(=O)([O-])OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3O)[C@H]2O)c(=O)n1-12 -3697.1185314635636
wb97m-d3bj/def2-tzvppd energy of  Nc1ccn([C@@H]2O[C@H](CO)[C@@H](OP(=O)([O-])OC[C@H]3O[C@@H]