In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import shutil
import logging
import subprocess
from copy import deepcopy
from time import time
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.rdDistGeom import EmbedMultipleConfs
from linux_qm.src.util import load_smiles3D, _create_tmp_dir, draw3Dconfs, SetPositions
from linux_qm.qm.crest.crest import conformer_pipeline
import pandas as pd

In [3]:
smi = 'COC1CN(C)C1'
# smi = 'CO'


# mol = load_smiles3D(smi)
# EmbedMultipleConfs(mol, numConfs=3)

mol = conformer_pipeline(smi)

# Get the conformers

conformers = mol.GetConformers()

# Print the number of conformers generated
print(f"Number of conformers generated: {len(conformers)}")
print("Energies:", [c.GetProp('energy') for c in conformers])

[30-11-2023 02:14:24] [INFO] Conformer generation pipeline


Number of conformers generated: 4
Energies: ['-23.275472740000001', '-23.273513579999999', '-23.273084740000002', '-23.27281279']


In [4]:
query = Chem.MolFromSmarts('C1CCN1')
atom_ids = mol.GetSubstructMatch(query)
print(atom_ids)
AllChem.AlignMolConformers(mol, atomIds=atom_ids)
draw3Dconfs(mol, autoalign=False)

(3, 2, 6, 4)
num conformers 4


In [5]:
import os
os.getcwd()

'/home/ergot/linux_qm/notebooks'

In [6]:
logging.getLogger().setLevel(logging.DEBUG)

In [7]:
from linux_qm.qm.orca.orca import OrcaDriver
orca = OrcaDriver()

conf = deepcopy(mol).GetConformer()

orca.options['method'] = 'XTB2'
orca.geometry_optimization(conf)

orca.options['n_jobs'] = 1
orca.options['method'] = 'HF-3c'
# orca.options['method'] = ''
data = orca.single_point(conf, calc_npa=True)

[30-11-2023 02:14:39] [INFO] Method: XTB2
[30-11-2023 02:14:39] [DEBUG] ORCA INPUT:
!XTB2 OPT  
%geom MaxIter 100 end
%maxcore 2000
*xyz 0 1
C            2.71484785          0.42880002         -0.15402688
O            1.61469509         -0.39692753         -0.44575527
C            0.56782242         -0.27291115          0.47464054
C           -0.66124411         -1.11990244          0.13118774
N           -1.51499908          0.04690021          0.36976656
C           -2.58128705          0.29500301         -0.56443383
C           -0.36189456          0.95005799          0.33948174
H            3.12947886          0.19812896          0.83511982
H            2.43980366          1.48947819         -0.18100250
H            3.46306590          0.22800678         -0.91805067
H            0.93021308         -0.41945683          1.50068874
H           -0.88479859         -1.96629005          0.78047707
H           -0.62918538         -1.44665820         -0.91402267
H           -3.31308480    

In [8]:
data.metadata['input_file_name']

'task.inp'

In [65]:
for k,v in data.atomcharges.items():
    print(k)
    print(v)

mulliken
[-0.170523 -0.437285  0.066211 -0.152177 -0.323495 -0.258984 -0.142196
  0.12537   0.142833  0.124913  0.131895  0.126081  0.12645   0.132145
  0.111749  0.131234  0.135055  0.130726]
lowdin
[-0.076184 -0.319441  0.080399 -0.08962  -0.215448 -0.155909 -0.076887
  0.074517  0.089711  0.073961  0.074574  0.074539  0.075737  0.083163
  0.064268  0.082471  0.082129  0.07802 ]
npa
[-0.088416 -0.402427  0.1292   -0.112921 -0.272649 -0.207107 -0.101906
  0.091123  0.107136  0.090399  0.085854  0.091763  0.094222  0.106272
  0.08669   0.105488  0.101569  0.095708]


In [12]:
orca.geometry_optimization(conf)

[29-11-2023 04:35:13] [INFO] Method: HF-3c
[29-11-2023 04:35:13] [DEBUG] ORCA INPUT:
!HF-3c OPT  
%geom MaxIter 100 end
%maxcore 2000
*xyz 0 1
C            2.71480274          0.42884082          0.15406060
O            1.61467957         -0.39693722          0.44575767
C            0.56782226         -0.27295524         -0.47466078
C           -0.36187849          0.95003183         -0.33956833
N           -1.51499942          0.04689095         -0.36976641
C           -2.58123322          0.29505378          0.56448005
C           -0.66125414         -1.11991726         -0.13117159
H            3.12947891          0.19818606         -0.83507233
H            3.46300549          0.22808620          0.91811094
H            2.43970687          1.48950658          0.18102358
H            0.93022660         -0.41955453         -1.50069652
H           -0.36202347          1.68036633         -1.15002437
H           -0.22255383          1.45664687          0.62355310
H           -3.31304459  

<cclib.parser.data.ccData_optdone_bool at 0x7f8ee81c5ea0>

In [9]:
%%time

logging.getLogger().setLevel(logging.INFO)
orca.options['solvent'] = 'THF'

conf = deepcopy(mol).GetConformer()

orca.options['method'] = 'M062X 6-311++G(d,p) NMR'
orca.geometry_optimization(conf)

[24-11-2023 02:32:03] [INFO] Geometry optimization
[24-11-2023 02:32:03] [INFO] Method: M062X 6-311++G(d,p) NMR
[24-11-2023 02:34:15] [INFO] Identified logfile to be in ORCA format
[24-11-2023 02:34:15] [INFO] Success: True
[24-11-2023 02:34:15] [INFO] Num Iter: 9
[24-11-2023 02:34:15] [INFO] Elapsed Time: 131.8s


CPU times: user 126 ms, sys: 61.7 ms, total: 187 ms
Wall time: 2min 11s


In [10]:
%%time

conf = deepcopy(mol).GetConformer()

orca.options['method'] = 'M062X 6-31G'
orca.geometry_optimization(conf)

orca.options['method'] = 'M062X 6-311++G(d,p) NMR'
orca.geometry_optimization(conf)

[24-11-2023 02:34:15] [INFO] Geometry optimization
[24-11-2023 02:34:15] [INFO] Method: M062X 6-31G
[24-11-2023 02:35:07] [INFO] Identified logfile to be in ORCA format
[24-11-2023 02:35:07] [INFO] Success: True
[24-11-2023 02:35:07] [INFO] Num Iter: 9
[24-11-2023 02:35:07] [INFO] Elapsed Time: 52.0s
[24-11-2023 02:35:07] [INFO] Geometry optimization
[24-11-2023 02:35:07] [INFO] Method: M062X 6-311++G(d,p) NMR
[24-11-2023 02:36:48] [INFO] Identified logfile to be in ORCA format
[24-11-2023 02:36:48] [INFO] Success: True
[24-11-2023 02:36:48] [INFO] Num Iter: 7
[24-11-2023 02:36:48] [INFO] Elapsed Time: 101.1s


CPU times: user 243 ms, sys: 48.6 ms, total: 291 ms
Wall time: 2min 33s


In [11]:
%%time

conf = deepcopy(mol).GetConformer()
orca.options['method'] = 'M062X cc-pVDZ NMR'
orca.geometry_optimization(conf)

[24-11-2023 02:36:48] [INFO] Geometry optimization
[24-11-2023 02:36:48] [INFO] Method: M062X cc-pVDZ NMR
[24-11-2023 02:38:10] [INFO] Identified logfile to be in ORCA format
[24-11-2023 02:38:10] [INFO] Success: True
[24-11-2023 02:38:10] [INFO] Num Iter: 9
[24-11-2023 02:38:10] [INFO] Elapsed Time: 82.3s


CPU times: user 127 ms, sys: 48.6 ms, total: 175 ms
Wall time: 1min 22s


In [12]:
%%time
conf = deepcopy(mol).GetConformer()
orca.options['method'] = 'M062X cc-pVTZ NMR'
orca.geometry_optimization(conf)

[24-11-2023 02:38:10] [INFO] Geometry optimization
[24-11-2023 02:38:10] [INFO] Method: M062X cc-pVTZ NMR
[24-11-2023 02:41:28] [INFO] Identified logfile to be in ORCA format
[24-11-2023 02:41:28] [INFO] Success: True
[24-11-2023 02:41:28] [INFO] Num Iter: 9
[24-11-2023 02:41:28] [INFO] Elapsed Time: 198.2s


CPU times: user 175 ms, sys: 49.4 ms, total: 225 ms
Wall time: 3min 18s


In [13]:
%%time
conf = deepcopy(mol).GetConformer()
orca.options['method'] = 'M062X aug-cc-pVTZ NMR'
orca.geometry_optimization(conf)

[24-11-2023 02:41:28] [INFO] Geometry optimization
[24-11-2023 02:41:28] [INFO] Method: M062X aug-cc-pVTZ NMR
[24-11-2023 02:50:45] [INFO] Identified logfile to be in ORCA format
[24-11-2023 02:50:45] [INFO] Success: True
[24-11-2023 02:50:45] [INFO] Num Iter: 9
[24-11-2023 02:50:45] [INFO] Elapsed Time: 556.3s


CPU times: user 195 ms, sys: 80.9 ms, total: 276 ms
Wall time: 9min 16s


In [17]:
%%time

conf = deepcopy(mol).GetConformer()
orca.options['method'] = 'M062X cc-pVDZ'
orca.geometry_optimization(conf)

orca.options['method'] = 'M062X aug-cc-pVTZ NMR'
orca.geometry_optimization(conf)

[24-11-2023 03:03:21] [INFO] Geometry optimization
[24-11-2023 03:03:21] [INFO] Method: M062X cc-pVDZ
[24-11-2023 03:04:42] [INFO] Identified logfile to be in ORCA format
[24-11-2023 03:04:42] [INFO] Success: True
[24-11-2023 03:04:42] [INFO] Num Iter: 9
[24-11-2023 03:04:42] [INFO] Elapsed Time: 80.5s
[24-11-2023 03:04:42] [INFO] Geometry optimization
[24-11-2023 03:04:42] [INFO] Method: M062X aug-cc-pVTZ NMR
[24-11-2023 03:10:10] [INFO] Identified logfile to be in ORCA format
[24-11-2023 03:10:10] [INFO] Success: True
[24-11-2023 03:10:10] [INFO] Num Iter: 5
[24-11-2023 03:10:10] [INFO] Elapsed Time: 328.7s


CPU times: user 234 ms, sys: 122 ms, total: 356 ms
Wall time: 6min 49s


In [15]:
query = Chem.MolFromSmarts('C1CCN1')
atom_ids = mol.GetSubstructMatch(query)
print(atom_ids)
AllChem.AlignMolConformers(mol, atomIds=atom_ids)
draw3Dconfs(mol, confIds=[0], autoalign=False)

(3, 2, 6, 4)
num conformers 4


In [16]:
data.scfvalues

NameError: name 'data' is not defined

In [None]:
data.scftargets

In [None]:
# NMR shielding
for k,v in data.nmrtensors.items():
    print(mol.GetAtomWithIdx(k).GetSymbol(), k, v['isotropic'])

In [None]:
dir(data)

In [None]:
print(data.metadata['cpu_time'][0])

In [None]:
len(data.converged_geometries)

In [None]:
dir(data)

In [None]:
data.optdone

In [None]:
data.metadata

In [None]:
data.new_geometries

In [None]:
# HOMO LUMO
homo = data.moenergies[0][data.homos[0]]
lumo = data.moenergies[0][data.homos[0] + 1]
homo, lumo

In [None]:
print(data.metadata['cpu_time'][0])

In [None]:
data.natom, data.nmo

In [None]:
data.metadata

In [None]:
data.scfenergies