In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import shutil
import subprocess
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.rdDistGeom import EmbedMultipleConfs
from linux_qm.src.util import _load_smiles3D, _create_tmp_dir, draw3Dconfs, SetPositions
from linux_qm.qm.crest.crest import conformer_pipeline


ORCA_TMP = '/tmp/.orca_tmp'

In [3]:
smi = 'COC1CNC1'
# smi = 'CO'


# mol = _load_smiles3D(smi)
# EmbedMultipleConfs(mol, numConfs=3)

mol = conformer_pipeline(smi)

# Get the conformers

conformers = mol.GetConformers()

# Print the number of conformers generated
print(f"Number of conformers generated: {len(conformers)}")

normal termination of xtb
Note: The following floating-point exceptions are signalling: IEEE_INVALID_FLAG IEEE_UNDERFLOW_FLAG IEEE_DENORMAL


Number of conformers generated: 2


In [4]:
draw3Dconfs(mol)

num conformers 2


In [18]:
### QM calculation by ORCA for each conformer

options = {
    'method': 'B3LYP def2-TZVP OPT',
    'solvent': 'THF',
    'geom_maxiter': 100,
    'n_jobs': 22,
}

def gen_input(conf, options: dict):
    method, solvent = options['method'], options['solvent']
    geom_maxiter, n_jobs = options['geom_maxiter'], options['n_jobs']

    input_str =""
    input_str += f"!{method}\n"
    if solvent:
        input_str += (f"%cpcm\n"
        f"  smd true\n"
        f"  SMDsolvent \"{solvent}\"\n"
        f"  end\n")
    # input_str += (f"%output\n "
    #     f"  PrintLevel Huge\n"
    #     # f"  Print[P_MOs] 1\n"
    #     # f"  Print[ P_gradient ] 1\n"
    #     f"  end\n")
    input_str += f"%geom MaxIter {geom_maxiter} end\n"
    input_str += f"%pal nprocs {n_jobs} end\n"
    input_str += gen_xyz_block(conf)
    return input_str

def gen_xyz_block(conf):
    mol = conf.GetOwningMol()
    xyz_block = "*xyz 0 1\n"
    for i in range(conf.GetNumAtoms()):
        symbol = mol.GetAtomWithIdx(i).GetSymbol()
        x, y, z = conf.GetAtomPosition(i)
        xyz_block += f"{symbol:3}{x:20.8f}{y:20.8f}{z:20.8f}\n"
    xyz_block += "*\n"
    return xyz_block


conf = mol.GetConformer()
print(gen_input(conf, options))

!B3LYP def2-TZVP OPT
%cpcm
  smd true
  SMDsolvent "THF"
  end
%geom MaxIter 100 end
%pal nprocs 22 end
*xyz 0 1
C            2.33416950          0.28725986          0.03476549
O            1.21564089         -0.49477460         -0.30436198
C            0.06488140         -0.16348648          0.41940967
C           -1.16882544         -0.98546954          0.02625142
N           -1.94834967          0.25292588         -0.06365260
C           -0.73536909          1.07536838         -0.03876436
H            2.16241461          1.34861577         -0.17829044
H            3.16046679         -0.07612526         -0.57273056
H            2.58397142          0.17775524          1.09732023
H            0.26859209         -0.16320648          1.49814406
H           -1.55592664         -1.68095137          0.77177471
H           -1.00427617         -1.50967970         -0.91954727
H           -2.53069235          0.34928233         -0.88776656
H           -0.39783558          1.43626129         -1.

In [6]:
def set_orca_env():
    ORCA_PATH = "/opt/orca-5.0.3"
    os.environ['PATH'] = f"{ORCA_PATH}:{os.environ.get('PATH')}"
    os.environ['LD_LIBRARY_PATH'] = f"{ORCA_PATH}:{os.environ.get('LD_LIBRARY_PATH', '')}"
    # supposed to be path
    os.environ['PATH'] = f"/opt/openmpi-4.1.1/bin:{os.environ['PATH']}"
    os.environ['LD_LIBRARY_PATH'] = f"/opt/openmpi-4.1.1/lib:{os.environ['LD_LIBRARY_PATH']}"
    # real openmpi path
    # os.environ['LD_LIBRARY_PATH'] = "/opt/orca-5.0.3/openmpi-4.1.1/lib:/opt/orca-5.0.3/orca:"
    # os.environ['PATH'] = f"/opt/orca-5.0.3/openmpi-4.1.1/bin:{os.environ['PATH']}"

if 'orca' not in os.environ['PATH']:
    set_orca_env()

In [7]:
os.environ['LD_LIBRARY_PATH']

'/opt/openmpi-4.1.1/lib:/opt/orca-5.0.3:'

In [27]:
saved_work_dir = os.getcwd()
tmp_path = _create_tmp_dir(ORCA_TMP)
os.chdir(tmp_path)

import cclib

data = None

with open('input.inp', 'w') as f:
    inp = gen_input(conf, options)
    f.write(inp)
    
try:
    res = subprocess.run(
        ['/opt/orca-5.0.3/orca', 'input.inp', '--use-hwthread-cpus'], 
        capture_output=True,
        text=True
    )
    
    with open('output', 'w') as f:
        f.write(res.stdout)
        
    # parse ORCA output
    data = cclib.io.ccread('output')
    
except Exception as e:
    print(e)

os.chdir(saved_work_dir)
shutil.rmtree(tmp_path)

if data:
    print(data.metadata['cpu_time'][0])
    print('geom iter:', len(data.converged_geometries))
    print('converged:', data.optdone)
    

[21-11-2023 07:53:46] [INFO] Identified logfile to be in ORCA format


0:01:51.109000
geom iter: 10
converged: True


In [28]:
print(data.metadata['cpu_time'][0])

0:01:51.109000


In [29]:
len(data.converged_geometries)

10

In [30]:
dir(data)

['OPT_DONE',
 'OPT_NEW',
 'OPT_UNCONVERGED',
 'OPT_UNKNOWN',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_attributes',
 '_attrlist',
 '_dictsofarrays',
 '_intarrays',
 '_listsofarrays',
 'arrayify',
 'atomcharges',
 'atomcoords',
 'atommasses',
 'atomnos',
 'charge',
 'check_values',
 'closed_shell',
 'converged_geometries',
 'coreelectrons',
 'geotargets',
 'geovalues',
 'getattributes',
 'grads',
 'homos',
 'listify',
 'metadata',
 'moenergies',
 'moments',
 'mosyms',
 'mult',
 'natom',
 'nbasis',
 'nelectrons',
 'new_geometries',
 'nmo',
 'optdone',
 'scfenergies',
 'scftargets',
 'scfvalues',
 'setattributes',
 'typecheck',
 'unconverged_geometries',
 'unknown

In [31]:
data.metadata

{'package': 'ORCA',
 'methods': ['DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT'],
 'success': True,
 'legacy_package_version': '5.0.3',
 'package_version': '5.0.3',
 'basis_set': 'def2-TZVP',
 'info': ['the flag for use of the SHARK integral package has been found!'],
 'input_file_name': 'input.inp',
 'input_file_contents': '!B3LYP def2-TZVP OPT\n%cpcm\n  smd true\n  SMDsolvent "THF"\n  end\n%geom MaxIter 100 end\n%pal nprocs 22 end\n*xyz 0 1\nC            2.33416950          0.28725986          0.03476549\nO            1.21564089         -0.49477460         -0.30436198\nC            0.06488140         -0.16348648          0.41940967\nC           -1.16882544         -0.98546954          0.02625142\nN           -1.94834967          0.25292588         -0.06365260\nC           -0.73536909          1.07536838         -0.03876436\nH            2.16241461          1.34861577         -0.17829044\nH            3.16046679         -0.07612526         -0.57

In [11]:
data.new_geometries

array([[[ 2.334169,  0.28726 ,  0.034765],
        [ 1.215641, -0.494775, -0.304362],
        [ 0.064881, -0.163486,  0.41941 ],
        [-1.168825, -0.98547 ,  0.026251],
        [-1.94835 ,  0.252926, -0.063653],
        [-0.735369,  1.075368, -0.038764],
        [ 2.162415,  1.348616, -0.17829 ],
        [ 3.160467, -0.076125, -0.572731],
        [ 2.583971,  0.177755,  1.09732 ],
        [ 0.268592, -0.163206,  1.498144],
        [-1.555927, -1.680951,  0.771775],
        [-1.004276, -1.50968 , -0.919547],
        [-2.530692,  0.349282, -0.887767],
        [-0.397836,  1.436261, -1.016314],
        [-0.804876,  1.910907,  0.6602  ]],

       [[ 2.340629,  0.28278 ,  0.035309],
        [ 1.213674, -0.498957, -0.300802],
        [ 0.060503, -0.162887,  0.419456],
        [-1.176965, -0.991363,  0.026361],
        [-1.966482,  0.254251, -0.059411],
        [-0.744593,  1.0843  , -0.039159],
        [ 2.177959,  1.356732, -0.180111],
        [ 3.181933, -0.079484, -0.574525],
        [

In [12]:
# HOMO LUMO
data.moenergies[0][data.homos[0]:data.homos[0]+2]

array([-6.02993408,  1.80555703])

In [13]:
print(data.metadata['cpu_time'][0])

0:01:02.219000


In [14]:
data.natom, data.nmo

(15, 129)

In [15]:
data.metadata

{'package': 'ORCA',
 'methods': ['DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT',
  'DFT'],
 'success': True,
 'legacy_package_version': '5.0.3',
 'package_version': '5.0.3',
 'basis_set': 'def2-SVP',
 'info': ['the flag for use of the SHARK integral package has been found!'],
 'input_file_name': 'input.inp',
 'input_file_contents': '!B3LYP def2-SVP OPT\n%cpcm\n  smd true\n  SMDsolvent "THF"\n  end\n%geom MaxIter 100 end\n%pal nprocs 22 end\n*xyz 0 1\nC            2.33416950          0.28725986          0.03476549\nO            1.21564089         -0.49477460         -0.30436198\nC            0.06488140         -0.16348648          0.41940967\nC           -1.16882544         -0.98546954          0.02625142\nN           -1.94834967          0.25292588         -0.06365260\nC           -0.73536909          1.07536838         -0.03876436\nH            2.16241461          1.34861577         -0.17829044\nH            3.16046679         -0.07612526         -0.5727

In [16]:
data.scfenergies

array([-7819.87372653, -7819.91352808, -7819.91792136, -7819.92131816,
       -7819.92270104, -7819.92371439, -7819.92423985, -7819.92445781,
       -7819.92451822, -7819.92453454])