In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import shutil
import logging
import subprocess
from copy import deepcopy
from time import time
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.rdDistGeom import EmbedMultipleConfs
from linux_qm.src.util import _load_smiles3D, _create_tmp_dir, draw3Dconfs, SetPositions
from linux_qm.qm.crest.crest import conformer_pipeline


In [3]:
smi = 'COC1CN(C)C1'
# smi = 'CO'


# mol = _load_smiles3D(smi)
# EmbedMultipleConfs(mol, numConfs=3)

mol = conformer_pipeline(smi)

# Get the conformers

conformers = mol.GetConformers()

# Print the number of conformers generated
print(f"Number of conformers generated: {len(conformers)}")
print("Energies:", [c.GetProp('energy') for c in conformers])

normal termination of xtb
Note: The following floating-point exceptions are signalling: IEEE_UNDERFLOW_FLAG IEEE_DENORMAL


Number of conformers generated: 4
Energies: ['-23.275472730000001', '-23.27351354', '-23.273084489999999', '-23.272812779999999']


In [5]:
query = Chem.MolFromSmarts('C1CCN1')
atom_ids = mol.GetSubstructMatch(query)
print(atom_ids)
AllChem.AlignMolConformers(mol, atomIds=atom_ids)
draw3Dconfs(mol, autoalign=False)

(3, 2, 6, 4)
num conformers 4


In [44]:
import os
os.getcwd()

'/home/ergot/linux_qm/notebooks/.orca_tmp/ad383fa1-ca2c-4edb-b6c6-25bbf7e7f286/.orca_tmp/59e6d026-c7fa-4314-a0cb-59597b561b93'

In [6]:
logging.getLogger().setLevel(logging.DEBUG)

In [7]:
from linux_qm.qm.orca.orca import OrcaDriver
orca = OrcaDriver()

conf = deepcopy(mol).GetConformer()

orca.options['method'] = 'HF def2-SVP'
orca.single_point(conf)

[24-11-2023 11:13:20] [DEBUG] ORCA INPUT:
!HF def2-SVP   
%geom MaxIter 100 end
%pal nprocs 16 end
%maxcore 2000
*xyz 0 1
C            2.71478745          0.42885555          0.15407105
O            1.61467591         -0.39694175          0.44575731
C            0.56782222         -0.27297030         -0.47466669
C           -0.36187253          0.95002315         -0.33959467
N           -1.51499900          0.04688818         -0.36976609
C           -2.58121715          0.29507078          0.56449305
C           -0.66125767         -1.11992214         -0.13116503
H            2.43967275          1.48951657          0.18103374
H            3.12947671          0.19821003         -0.83505883
H            3.46298693          0.22811261          0.91812790
H            0.93022925         -0.41958709         -1.50069903
H           -0.36201874          1.68033908         -1.15006733
H           -0.22254032          1.45665962          0.62351419
H           -3.06194924          1.23790942   

In [8]:
orca.geometry_optimization(conf)

[24-11-2023 11:13:32] [DEBUG] ORCA INPUT:
!HF def2-SVP OPT  
%geom MaxIter 100 end
%pal nprocs 16 end
%maxcore 2000
*xyz 0 1
C            2.71478745          0.42885555          0.15407105
O            1.61467591         -0.39694175          0.44575731
C            0.56782222         -0.27297030         -0.47466669
C           -0.36187253          0.95002315         -0.33959467
N           -1.51499900          0.04688818         -0.36976609
C           -2.58121715          0.29507078          0.56449305
C           -0.66125767         -1.11992214         -0.13116503
H            2.43967275          1.48951657          0.18103374
H            3.12947671          0.19821003         -0.83505883
H            3.46298693          0.22811261          0.91812790
H            0.93022925         -0.41958709         -1.50069903
H           -0.36201874          1.68033908         -1.15006733
H           -0.22254032          1.45665962          0.62351419
H           -3.06194924          1.23790942

In [9]:
%%time

logging.getLogger().setLevel(logging.INFO)
orca.options['solvent'] = 'THF'

conf = deepcopy(mol).GetConformer()

orca.options['method'] = 'M062X 6-311++G(d,p) NMR'
orca.geometry_optimization(conf)

[24-11-2023 11:18:50] [INFO] Identified logfile to be in ORCA format
[24-11-2023 11:18:51] [INFO] Success: True
[24-11-2023 11:18:51] [INFO] Num Iter: 9


CPU times: user 163 ms, sys: 31.8 ms, total: 195 ms
Wall time: 3min 54s


In [10]:
%%time

conf = deepcopy(mol).GetConformer()

orca.options['method'] = 'M062X 6-31G'
orca.geometry_optimization(conf)

orca.options['method'] = 'M062X 6-311++G(d,p) NMR'
orca.geometry_optimization(conf)

[24-11-2023 11:32:45] [INFO] Geometry optimization: M062X 6-31G
[24-11-2023 11:34:06] [INFO] Identified logfile to be in ORCA format
[24-11-2023 11:34:06] [INFO] Success: True
[24-11-2023 11:34:06] [INFO] Num Iter: 9
[24-11-2023 11:34:06] [INFO] Geometry optimization: M062X 6-311++G(d,p) NMR
[24-11-2023 11:37:09] [INFO] Identified logfile to be in ORCA format
[24-11-2023 11:37:09] [INFO] Success: True
[24-11-2023 11:37:09] [INFO] Num Iter: 7


CPU times: user 240 ms, sys: 49.3 ms, total: 289 ms
Wall time: 4min 24s


In [12]:
%%time

conf = deepcopy(mol).GetConformer()
orca.options['method'] = 'M062X cc-pVDZ NMR'
orca.geometry_optimization(conf)

[24-11-2023 12:56:25] [INFO] Geometry optimization
[24-11-2023 12:56:25] [INFO] Method: M062X cc-pVDZ NMR
[24-11-2023 12:58:38] [INFO] Identified logfile to be in ORCA format
[24-11-2023 12:58:38] [INFO] Success: True
[24-11-2023 12:58:38] [INFO] Num Iter: 9


CPU times: user 148 ms, sys: 18.2 ms, total: 166 ms
Wall time: 2min 12s


In [None]:
%%time
conf = deepcopy(mol).GetConformer()
orca.options['method'] = 'M062X cc-pVTZ NMR'
orca.geometry_optimization(conf)

[24-11-2023 01:01:12] [INFO] Geometry optimization
[24-11-2023 01:01:12] [INFO] Method: M062X cc-pVTZ NMR


In [None]:
%%time
conf = deepcopy(mol).GetConformer()
orca.options['method'] = 'M062X aug-cc-pVTZ NMR'
orca.geometry_optimization(conf)

In [None]:
%%time

conf = deepcopy(mol).GetConformer()
orca.options['method'] = 'M062X cc-pVDZ'
orca.geometry_optimization(conf)

orca.options['method'] = 'M062X aug-cc-pVTZ OPT NMR'
orca.geometry_optimization(conf)

In [43]:
query = Chem.MolFromSmarts('C1CCN1')
atom_ids = mol.GetSubstructMatch(query)
print(atom_ids)
AllChem.AlignMolConformers(mol, atomIds=atom_ids)
draw3Dconfs(mol, confIds=[0], autoalign=False)

(3, 2, 6, 4)
num conformers 4


In [52]:
data.scfvalues

[array([[ 0.00000000e+00,  1.75780800e-01,  1.13529000e-03],
        [-7.83250489e-02,  1.44294260e-01,  9.00510000e-04],
        [-3.82209220e-02,  3.60672850e-01,  2.12364000e-03],
        [-5.95070839e-02,  3.45931600e-02,  3.53380000e-04],
        [-1.10323800e-02,  1.38156100e-02,  1.32650000e-04],
        [-2.43487796e-04,  4.96315000e-03,  5.04600000e-05],
        [-3.80466000e-05,  2.50000000e-04,  7.07300000e-03],
        [-5.39920000e-06,  1.49000000e-04,  3.84100000e-03],
        [-3.79500000e-07,  6.80000000e-05,  8.82000000e-04],
        [-4.23500000e-07,  1.50000000e-05,  6.69000000e-04],
        [-8.50000000e-09,  7.00000000e-06,  7.70000000e-05],
        [-5.17330000e-09,  3.99680000e-15,  1.17560000e-17]]),
 array([[-3.27054152e+02,  2.26400000e-03,  6.88800000e-03],
        [-3.97856500e-04,  4.46000000e-04,  5.24900000e-03],
        [-3.05938000e-05,  2.93000000e-04,  2.26000000e-03],
        [-1.46754000e-05,  6.50000000e-05,  7.32000000e-04],
        [-2.08900000e-

In [51]:
data.scftargets

array([[1.e-08, 1.e-07, 5.e-09],
       [1.e-08, 1.e-07, 5.e-09],
       [1.e-08, 1.e-07, 5.e-09],
       [1.e-08, 1.e-07, 5.e-09],
       [1.e-08, 1.e-07, 5.e-09]])

In [36]:
# NMR shielding
for k,v in data.nmrtensors.items():
    print(mol.GetAtomWithIdx(k).GetSymbol(), k, v['isotropic'])

C 0 125.508
O 1 275.301
C 2 110.446
C 3 114.12
N 4 223.565
C 5 133.679
C 6 115.657
H 7 28.766
H 8 28.318
H 9 28.777
H 10 27.965
H 11 28.106
H 12 29.335
H 13 29.221
H 14 30.004
H 15 29.269
H 16 29.104
H 17 28.096


In [15]:
dir(data)

['OPT_DONE',
 'OPT_NEW',
 'OPT_UNCONVERGED',
 'OPT_UNKNOWN',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_attributes',
 '_attrlist',
 '_dictsofarrays',
 '_intarrays',
 '_listsofarrays',
 'arrayify',
 'atomcharges',
 'atomcoords',
 'atommasses',
 'atomnos',
 'charge',
 'check_values',
 'closed_shell',
 'converged_geometries',
 'coreelectrons',
 'geotargets',
 'geovalues',
 'getattributes',
 'grads',
 'homos',
 'listify',
 'metadata',
 'moenergies',
 'moments',
 'mosyms',
 'mult',
 'natom',
 'nbasis',
 'nelectrons',
 'new_geometries',
 'nmo',
 'nmrtensors',
 'optdone',
 'scfenergies',
 'scftargets',
 'scfvalues',
 'setattributes',
 'typecheck',
 'unconverged_geometr

In [16]:
print(data.metadata['cpu_time'][0])

0:01:17.614000


In [17]:
len(data.converged_geometries)

5

In [18]:
dir(data)

['OPT_DONE',
 'OPT_NEW',
 'OPT_UNCONVERGED',
 'OPT_UNKNOWN',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_attributes',
 '_attrlist',
 '_dictsofarrays',
 '_intarrays',
 '_listsofarrays',
 'arrayify',
 'atomcharges',
 'atomcoords',
 'atommasses',
 'atomnos',
 'charge',
 'check_values',
 'closed_shell',
 'converged_geometries',
 'coreelectrons',
 'geotargets',
 'geovalues',
 'getattributes',
 'grads',
 'homos',
 'listify',
 'metadata',
 'moenergies',
 'moments',
 'mosyms',
 'mult',
 'natom',
 'nbasis',
 'nelectrons',
 'new_geometries',
 'nmo',
 'nmrtensors',
 'optdone',
 'scfenergies',
 'scftargets',
 'scfvalues',
 'setattributes',
 'typecheck',
 'unconverged_geometr

In [19]:
data.optdone

True

In [20]:
data.metadata

{'package': 'ORCA',
 'methods': ['DFT', 'DFT', 'DFT', 'DFT', 'DFT'],
 'success': True,
 'legacy_package_version': '5.0.3',
 'package_version': '5.0.3',
 'basis_set': '6-311++G(d,p)',
  'Minnesota functionals are quite sensitive to the integration grid.see SE Wheeler, KN Houk, JCTC 2010, 6, 395,N Mardirossian, M Head-Gordon, JCTC 2016, 12, 4303.DEFGRID3 seems to be a minimum grid for reliable results with these functionals!Please increase the integration grid!'],
 'info': ['the flag for use of the SHARK integral package has been found!'],
 'input_file_name': 'task.inp',
 'input_file_contents': '!M062X 6-311++G(d,p) OPT NMR\n%geom MaxIter 100 end\n%cpcm\n  smd true\n  SMDsolvent "THF"\n  end\n%pal nprocs 16 end\n*xyz 0 1\nC           -2.73166000         -0.42891300          0.06568100\nO           -1.69258800          0.47020300          0.38413500\nC           -0.58496100          0.29469900         -0.44442300\nC            0.31844600         -0.93709600         -0.21753700\nN         

In [21]:
data.new_geometries

array([[[-2.73166 , -0.428913,  0.065681],
        [-1.692588,  0.470203,  0.384135],
        [-0.584961,  0.294699, -0.444423],
        [ 0.318446, -0.937096, -0.217537],
        [ 1.481163, -0.047712, -0.386694],
        [ 2.63364 , -0.299075,  0.451037],
        [ 0.645131,  1.118235, -0.052189],
        [-3.11054 , -0.260819, -0.952362],
        [-3.541348, -0.262969,  0.778925],
        [-2.400831, -1.474237,  0.143859],
        [-0.856075,  0.412823, -1.502824],
        [ 0.270658, -1.784797, -0.907525],
        [ 0.201829, -1.289001,  0.821651],
        [ 3.376249,  0.492224,  0.308155],
        [ 2.374666, -0.342195,  1.523777],
        [ 3.093524, -1.251502,  0.169314],
        [ 0.615582,  1.32106 ,  1.030892],
        [ 0.87263 ,  2.037938, -0.598603]],

       [[-2.732772, -0.433447,  0.065903],
        [-1.697911,  0.474342,  0.37912 ],
        [-0.585376,  0.300297, -0.445689],
        [ 0.315233, -0.934156, -0.217946],
        [ 1.481057, -0.046769, -0.385799],
        [

In [22]:
# HOMO LUMO
homo = data.moenergies[0][data.homos[0]]
lumo = data.moenergies[0][data.homos[0] + 1]
homo, lumo

(-7.7628095042939, 0.14092776317395)

In [23]:
print(data.metadata['cpu_time'][0])

0:01:17.614000


In [24]:
data.natom, data.nmo

(18, 231)

In [25]:
data.metadata

{'package': 'ORCA',
 'methods': ['DFT', 'DFT', 'DFT', 'DFT', 'DFT'],
 'success': True,
 'legacy_package_version': '5.0.3',
 'package_version': '5.0.3',
 'basis_set': '6-311++G(d,p)',
  'Minnesota functionals are quite sensitive to the integration grid.see SE Wheeler, KN Houk, JCTC 2010, 6, 395,N Mardirossian, M Head-Gordon, JCTC 2016, 12, 4303.DEFGRID3 seems to be a minimum grid for reliable results with these functionals!Please increase the integration grid!'],
 'info': ['the flag for use of the SHARK integral package has been found!'],
 'input_file_name': 'task.inp',
 'input_file_contents': '!M062X 6-311++G(d,p) OPT NMR\n%geom MaxIter 100 end\n%cpcm\n  smd true\n  SMDsolvent "THF"\n  end\n%pal nprocs 16 end\n*xyz 0 1\nC           -2.73166000         -0.42891300          0.06568100\nO           -1.69258800          0.47020300          0.38413500\nC           -0.58496100          0.29469900         -0.44442300\nC            0.31844600         -0.93709600         -0.21753700\nN         

In [26]:
data.scfenergies

array([-8898.73503472, -8898.73716429, -8898.73758198, -8898.73766933,
       -8898.73770797])