In [114]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [115]:
import os
import shutil
import subprocess
from copy import deepcopy
from time import time
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.rdDistGeom import EmbedMultipleConfs
from linux_qm.src.util import _load_smiles3D, _create_tmp_dir, draw3Dconfs, SetPositions
from linux_qm.qm.crest.crest import conformer_pipeline


ORCA_TMP = '.orca_tmp'

In [129]:
smi = 'COC1CN(C)C1'
# smi = 'CO'


# mol = _load_smiles3D(smi)
# EmbedMultipleConfs(mol, numConfs=3)

mol = conformer_pipeline(smi)

# Get the conformers

conformers = mol.GetConformers()

# Print the number of conformers generated
print(f"Number of conformers generated: {len(conformers)}")
print("Energies:", [c.GetProp('energy') for c in conformers])

normal termination of xtb
Note: The following floating-point exceptions are signalling: IEEE_INVALID_FLAG IEEE_UNDERFLOW_FLAG IEEE_DENORMAL


Number of conformers generated: 4
Energies: ['-23.275472740000001', '-23.27351354', '-23.27308459', '-23.27281279']


In [137]:
query = Chem.MolFromSmarts('C1CCN1')
atom_ids = mol.GetSubstructMatch(query)
print(atom_ids)
AllChem.AlignMolConformers(mol, atomIds=atom_ids)
draw3Dconfs(mol, autoalign=False)

(3, 2, 6, 4)
num conformers 4


In [132]:
### QM calculation by ORCA for each conformer

def gen_input(conf, options: dict):
    method, solvent = options['method'], options.get('solvent')
    geom_maxiter, n_jobs = options.get('geom_maxiter', 100), options.get('n_jobs', 8)

    input_str =""
    input_str += f"!{method}\n"
    input_str += f"%geom MaxIter {geom_maxiter} end\n"
    if solvent:
        input_str += (f"%cpcm\n"
        f"  smd true\n"
        f"  SMDsolvent \"{solvent}\"\n"
        f"  end\n")
    
    # input_str += (f"%output\n "
    #     f"  PrintLevel Huge\n"
    #     # f"  Print[P_MOs] 1\n"
    #     # f"  Print[ P_gradient ] 1\n"
    #     f"  end\n")    
    input_str += f"%pal nprocs {n_jobs} end\n"
    input_str += gen_xyz_block(conf)
    return input_str

def gen_xyz_block(conf):
    mol = conf.GetOwningMol()
    xyz_block = "*xyz 0 1\n"
    for i in range(conf.GetNumAtoms()):
        symbol = mol.GetAtomWithIdx(i).GetSymbol()
        x, y, z = conf.GetAtomPosition(i)
        xyz_block += f"{symbol:3}{x:20.8f}{y:20.8f}{z:20.8f}\n"
    xyz_block += "*\n"
    return xyz_block

def write_input(content):
    fname = 'task.inp'
    with open(fname, 'w') as f:
        f.write(content)
    return fname

def write_output(content):
    fname = 'output'
    with open(fname, 'w') as f:
        f.write(content)
    return fname

options = {
    'method': 'B3LYP DEF2-SVP OPT',
    'solvent': 'THF',
    'nmr_atoms': [0],
    'geom_maxiter': 100,
    'n_jobs': 22,
}    

conf = mol.GetConformer()
print(gen_input(conf, options))

!B3LYP DEF2-SVP OPT
%geom MaxIter 100 end
%cpcm
  smd true
  SMDsolvent "THF"
  end
%pal nprocs 22 end
*xyz 0 1
C            2.71482321         -0.42882321         -0.15404434
O            1.61468863          0.39693482         -0.44575483
C            0.56782227          0.27293487          0.47465041
C           -0.66124967          1.11991019          0.13117805
N           -1.51499863         -0.04689599          0.36976613
C           -2.58126036         -0.29502984         -0.56445599
C           -0.36188508         -0.95004435          0.33952498
H            3.46303386         -0.22805144         -0.91808164
H            2.43974837         -1.48949382         -0.18101531
H            3.12947868         -0.19816379          0.83509548
H            0.93021754          0.41950822          1.50069302
H           -0.62918764          1.44665467         -0.91403531
H           -0.88481570          1.96630277          0.78045681
H           -2.23233154         -0.34932409         -1.6

In [133]:
def set_orca_env():
    ORCA_PATH = "/opt/orca-5.0.3"
    os.environ['PATH'] = f"{ORCA_PATH}:{os.environ.get('PATH')}"
    os.environ['LD_LIBRARY_PATH'] = f"{ORCA_PATH}:{os.environ.get('LD_LIBRARY_PATH', '')}"
    # supposed to be path
    os.environ['PATH'] = f"/opt/openmpi-4.1.1/bin:{os.environ['PATH']}"
    os.environ['LD_LIBRARY_PATH'] = f"/opt/openmpi-4.1.1/lib:{os.environ['LD_LIBRARY_PATH']}"
    
    # intel MKL
    os.environ['LD_LIBRARY_PATH'] = f"/usr/lib/x86_64-linux-gnu:{os.environ['LD_LIBRARY_PATH']}"
    
    # nbo
    NBOBIN = '/opt/NBO6'
    os.environ['NBOEXE'] = f"{NBOBIN}/nbo6.exe"
    os.environ['GENEXE'] = f"{NBOBIN}/gennbo6.exe"
    os.environ['PATH'] = f"{NBOBIN}:{os.environ['PATH']}"

    # real openmpi path
    # os.environ['LD_LIBRARY_PATH'] = "/opt/orca-5.0.3/openmpi-4.1.1/lib:/opt/orca-5.0.3/orca:"
    # os.environ['PATH'] = f"/opt/orca-5.0.3/openmpi-4.1.1/bin:{os.environ['PATH']}"

if 'orca' not in os.environ['PATH']:
    set_orca_env()

In [134]:
os.environ['LD_LIBRARY_PATH']

'/usr/lib/x86_64-linux-gnu:/opt/openmpi-4.1.1/lib:/opt/orca-5.0.3:'

In [135]:
import cclib

In [122]:
saved_work_dir = os.getcwd()
tmp_path = _create_tmp_dir(ORCA_TMP)
os.chdir(tmp_path)

data = None

options = {
    'method': 'B3LYP  6-31G(d) OPT',
    # 'method': 'B3LYP  6-31++G(d,p) OPT',
    'solvent': None,
    'geom_maxiter': 100,
    'n_jobs': 16,
}

input_str = gen_input(conf, options)
fname = write_input(input_str)
    
res = subprocess.run(
    ['/opt/orca-5.0.3/orca', fname, '--use-hwthread-cpus'], 
    capture_output=True,
    text=True
)

fname = write_output(res.stdout)

# parse ORCA output
data = cclib.io.ccread(fname)    

# resotore
os.chdir(saved_work_dir)
shutil.rmtree(tmp_path)

if not data.metadata['success']:
    print('Failed')

[22-11-2023 03:46:53] [INFO] Identified logfile to be in ORCA format


In [136]:
def update_conformer(conf, cclib_data):
    SetPositions(conf, cclib_data.atomcoords[-1])
    conf.SetDoubleProp('energy', cclib_data.scfenergies[-1])


def run_orca(conf, options):
    # save current dir
    saved_work_dir = os.getcwd()
    tmp_path = _create_tmp_dir(ORCA_TMP)
    os.chdir(tmp_path)    
        
    input_str = gen_input(conf, options)
    fname = write_input(input_str)
        
    output = subprocess.run(
        ['/opt/orca-5.0.3/orca', fname, '--use-hwthread-cpus'], 
        capture_output=True,
        text=True
    ).stdout
    
    fname = write_output(output)
    
    # parse ORCA output
    data = cclib.io.ccread(fname)    
    
    # restore
    os.chdir(saved_work_dir)
    shutil.rmtree(tmp_path)
    
    success = data.metadata['success']
    conf.SetBoolProp('success', success)

    # DEV
    print('Success:', success)
    
    # update conformer
    if success:
        update_conformer(conf, data)
        # DEV
        print('Elapsed time:', data.metadata['cpu_time'][0])
        print('Num iter:', len(data.atomcoords))
    else:
        print(output)
    
    return data, output    

options = {
    'method': 'M062X 6-31++G(d,p) OPT NMR',
    'solvent': 'THF',
    'geom_maxiter': 100,
    'n_jobs': 16,
}

# conf = deepcopy(mol).GetConformer()
for conf in mol.GetConformers():
    print(f'======= Conformer {conf.GetId()} ======= ')
    data, output = run_orca(conf, options)
# single_point(conf, options)



[22-11-2023 03:56:28] [INFO] Identified logfile to be in ORCA format


Success: True
Elapsed time: 0:01:54.044000
Num iter: 9


[22-11-2023 03:58:27] [INFO] Identified logfile to be in ORCA format


Success: True
Elapsed time: 0:01:58.548000
Num iter: 9


[22-11-2023 04:00:32] [INFO] Identified logfile to be in ORCA format


Success: True
Elapsed time: 0:02:05.387000
Num iter: 10


[22-11-2023 04:02:35] [INFO] Identified logfile to be in ORCA format


Success: True
Elapsed time: 0:02:03.034000
Num iter: 10


In [113]:
options = {
    'method': 'M062X 6-31++G(d,p) NMR',
    'solvent': 'THF',
    'geom_maxiter': 100,
    'n_jobs': 16,
}

# conf = deepcopy(mol).GetConformer()
for conf in mol.GetConformers():
    print(f'======= Conformer {conf.GetId()} ======= ')
    data, output = run_orca(conf, options)



[22-11-2023 03:45:37] [INFO] Identified logfile to be in ORCA format


Success: True
Elapsed time: 0:00:11.146000
Num iter: 1


[22-11-2023 03:45:48] [INFO] Identified logfile to be in ORCA format


Success: True
Elapsed time: 0:00:10.834000
Num iter: 1


In [126]:
query = Chem.MolFromSmarts('C1CCN1')

atom_ids = mol.GetSubstructMatch(query)
print(atom_ids)
AllChem.AlignMolConformers(mol, atomIds=atom_ids)

draw3Dconfs(mol, autoalign=False)

(3, 2, 5, 4)
num conformers 2


In [91]:
conf = deepcopy(mol).GetConformer()

start = time()

# options['method'] = 'B3LYP  6-31G OPT'
# optimize_geom(conf, options)

options['method'] = 'B3LYP  6-31G(d,p) OPT'
optimize_geom(conf, options)

options['method'] = 'B3LYP  6-31++G(d,p) OPT'
optimize_geom(conf, options)

print(f'Total Elapsed: {time() - start:.2f}s')

[22-11-2023 03:25:56] [INFO] Identified logfile to be in ORCA format


Success: True
Elapsed time: 0:00:41.805000
Num iter: 7


[22-11-2023 03:26:37] [INFO] Identified logfile to be in ORCA format


Success: True
Elapsed time: 0:00:40.600000
Num iter: 5
Total Elapsed: 82.50s


In [51]:
conf = deepcopy(mol).GetConformer()

start = time()

options['method'] = 'B3LYP  6-31++G(d,p) OPT'
data, output = optimize_geom(conf, options)

print(f'Total Elapsed: {time() - start:.2f}s')

[22-11-2023 01:20:11] [INFO] Identified logfile to be in ORCA format


Success: True
Elapsed time: 0:01:16.839000
Num iter: 10
Elapsed: 76.90s


In [102]:
conf = deepcopy(mol).GetConformer()

start = time()

options['method'] = "M062X 6-311++G(d,p) OPT NMR"
options['solvent'] = 'THF'
data, output = optimize_geom(conf, options)

print(f'Total Elapsed: {time() - start:.2f}s')

[22-11-2023 03:31:15] [INFO] Identified logfile to be in ORCA format


Success: True
Elapsed time: 0:01:49.349000
Num iter: 10
Total Elapsed: 109.42s


In [105]:
print(output)


                                 *****************
                                 * O   R   C   A *
                                 *****************

                                            #,                                       
                                            ###                                      
                                            ####                                     
                                            #####                                    
                                            ######                                   
                                           ########,                                 
                                     ,,################,,,,,                         
                               ,,#################################,,                 
                          ,,##########################################,,             
                       ,#########################################, ''##

In [89]:
options['method'] = "M062X 6-311++G(d,p) FREQ"
options['solvent'] = None
data, output = single_point(conf, options)

[22-11-2023 01:58:40] [INFO] Identified logfile to be in ORCA format


Success: True
Elapsed time: 0:01:19.202000


In [82]:
print(output)


                                 *****************
                                 * O   R   C   A *
                                 *****************

                                            #,                                       
                                            ###                                      
                                            ####                                     
                                            #####                                    
                                            ######                                   
                                           ########,                                 
                                     ,,################,,,,,                         
                               ,,#################################,,                 
                          ,,##########################################,,             
                       ,#########################################, ''##

In [78]:
data.nmrtensors

{0: {'diamagnetic': array([[255.813,   6.143,  -5.462],
         [ 12.812, 240.694,  -1.564],
         [ -2.121,  -1.115, 235.182]]),
  'paramagnetic': array([[ -98.673,   30.719,   -7.666],
         [  26.467, -127.735,   -7.416],
         [ -11.481,   -8.014, -127.091]]),
  'total': array([[157.14 ,  36.862, -13.128],
         [ 39.28 , 112.959,  -8.98 ],
         [-13.602,  -9.129, 108.091]]),
  'isotropic': 126.063}}

In [48]:
dir(data)

['OPT_DONE',
 'OPT_NEW',
 'OPT_UNCONVERGED',
 'OPT_UNKNOWN',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_attributes',
 '_attrlist',
 '_dictsofarrays',
 '_intarrays',
 '_listsofarrays',
 'arrayify',
 'atomcharges',
 'atomcoords',
 'atommasses',
 'atomnos',
 'charge',
 'check_values',
 'closed_shell',
 'converged_geometries',
 'coreelectrons',
 'getattributes',
 'homos',
 'listify',
 'metadata',
 'moenergies',
 'moments',
 'mosyms',
 'mult',
 'natom',
 'nbasis',
 'nelectrons',
 'new_geometries',
 'nmo',
 'nmrtensors',
 'scfenergies',
 'scftargets',
 'scfvalues',
 'setattributes',
 'typecheck',
 'unconverged_geometries',
 'unknown_geometries',
 'write',
 'writecml',

In [22]:
print(data.metadata['cpu_time'][0])

0:00:47.251000


In [23]:
len(data.converged_geometries)

10

In [24]:
dir(data)

['OPT_DONE',
 'OPT_NEW',
 'OPT_UNCONVERGED',
 'OPT_UNKNOWN',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_attributes',
 '_attrlist',
 '_dictsofarrays',
 '_intarrays',
 '_listsofarrays',
 'arrayify',
 'atomcharges',
 'atomcoords',
 'atommasses',
 'atomnos',
 'charge',
 'check_values',
 'closed_shell',
 'converged_geometries',
 'coreelectrons',
 'geotargets',
 'geovalues',
 'getattributes',
 'grads',
 'homos',
 'listify',
 'metadata',
 'moenergies',
 'moments',
 'mosyms',
 'mult',
 'natom',
 'nbasis',
 'nelectrons',
 'new_geometries',
 'nmo',
 'optdone',
 'scfenergies',
 'scftargets',
 'scfvalues',
 'setattributes',
 'typecheck',
 'unconverged_geometries',
 'unknown

In [25]:
data.optdone

True

In [26]:
data.metadata

{'package': 'ORCA',
 'methods': ['DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT'],
 'success': True,
 'legacy_package_version': '5.0.3',
 'package_version': '5.0.3',
 'basis_set': '6-31G(d)',
 'info': ['the flag for use of the SHARK integral package has been found!'],
 'input_file_name': 'task.inp',
 'input_file_contents': '!B3LYP  6-31G(d) OPT\n%geom MaxIter 100 end\n%pal nprocs 16 end\n*xyz 0 1\nC            2.33418693          0.28724210         -0.03479032\nO            1.21564057         -0.49474602          0.30438599\nC            0.06488430         -0.16347045         -0.41939719\nC           -0.73539125          1.07536686          0.03878348\nN           -1.94835778          0.25290766          0.06363013\nC           -1.16881486         -0.98547401         -0.02625754\nH            2.58403110          0.17760805         -1.09732236\nH            3.16045873         -0.07607399          0.57278208\nH            2.16242903          1.34862

In [27]:
data.new_geometries

array([[[ 2.334187e+00,  2.872420e-01, -3.479000e-02],
        [ 1.215641e+00, -4.947460e-01,  3.043860e-01],
        [ 6.488400e-02, -1.634700e-01, -4.193970e-01],
        [-7.353910e-01,  1.075367e+00,  3.878300e-02],
        [-1.948358e+00,  2.529080e-01,  6.363000e-02],
        [-1.168815e+00, -9.854740e-01, -2.625800e-02],
        [ 2.584031e+00,  1.776080e-01, -1.097322e+00],
        [ 3.160459e+00, -7.607400e-02,  5.727820e-01],
        [ 2.162429e+00,  1.348625e+00,  1.781310e-01],
        [ 2.686050e-01, -1.631790e-01, -1.498129e+00],
        [-8.048940e-01,  1.910925e+00, -6.601590e-01],
        [-3.978760e-01,  1.436237e+00,  1.016349e+00],
        [-2.530743e+00,  3.492550e-01,  8.877150e-01],
        [-1.004265e+00, -1.509676e+00,  9.195460e-01],
        [-1.555893e+00, -1.680968e+00, -7.717810e-01]],

       [[ 2.341314e+00,  2.844670e-01, -3.648000e-02],
        [ 1.216987e+00, -5.033860e-01,  2.997270e-01],
        [ 6.215100e-02, -1.630780e-01, -4.218010e-01],
        

In [28]:
# HOMO LUMO
homo = data.moenergies[0][data.homos[0]]
lumo = data.moenergies[0][data.homos[0] + 1]
homo, lumo

(-5.9764364985315, 2.1235220665319)

In [29]:
print(data.metadata['cpu_time'][0])

0:00:47.251000


In [30]:
data.natom, data.nmo

(15, 102)

In [31]:
data.metadata

{'package': 'ORCA',
 'methods': ['DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT'],
 'success': True,
 'legacy_package_version': '5.0.3',
 'package_version': '5.0.3',
 'basis_set': '6-31G(d)',
 'info': ['the flag for use of the SHARK integral package has been found!'],
 'input_file_name': 'task.inp',
 'input_file_contents': '!B3LYP  6-31G(d) OPT\n%geom MaxIter 100 end\n%pal nprocs 16 end\n*xyz 0 1\nC            2.33418693          0.28724210         -0.03479032\nO            1.21564057         -0.49474602          0.30438599\nC            0.06488430         -0.16347045         -0.41939719\nC           -0.73539125          1.07536686          0.03878348\nN           -1.94835778          0.25290766          0.06363013\nC           -1.16881486         -0.98547401         -0.02625754\nH            2.58403110          0.17760805         -1.09732236\nH            3.16045873         -0.07607399          0.57278208\nH            2.16242903          1.34862

In [32]:
data.scfenergies

array([-7825.27525831, -7825.30191975, -7825.30649044, -7825.3097901 ,
       -7825.31161489, -7825.31265192, -7825.3131273 , -7825.31332186,
       -7825.31339288, -7825.31340513])