From 9737addf6303a615c2e3ea9c35f631be41b21b8c Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Mon, 20 Feb 2023 12:06:20 -0500 Subject: [PATCH 01/17] Add type hints --- molSimplify/Informatics/autocorrelation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/molSimplify/Informatics/autocorrelation.py b/molSimplify/Informatics/autocorrelation.py index 6065b84f..9c574c3f 100644 --- a/molSimplify/Informatics/autocorrelation.py +++ b/molSimplify/Informatics/autocorrelation.py @@ -1,4 +1,5 @@ import numpy as np +from molSimplify.Classes.mol3D import mol3D from molSimplify.Classes.ligand import ligand_breakdown, ligand_assign from molSimplify.Scripts.geometry import distance from molSimplify.Classes.globalvars import globalvars @@ -214,7 +215,7 @@ def summetric(mol, prop_vec, orig, d, oct=True, catoms=None): return (result_vector) -def deltametric(mol, prop_vec, orig, d, oct=True, catoms=None): +def deltametric(mol: mol3D, prop_vec, orig, d: int, oct=True, catoms=None): # # this function returns the deltametric # # over the whole molecule # Inputs: @@ -631,7 +632,7 @@ def layer_density_in_3D(mol, prop_vec, orig, d, oct=True): return result_vector -def construct_property_vector(mol, prop, oct=True, modifier=False): +def construct_property_vector(mol: mol3D, prop: str, oct=True, modifier=False): # # assigns the value of property # # for atom i (zero index) in mol # # to position i in returned vector From 6f8c7580b3953f4a3f35518d2f0d534875de87de Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Fri, 3 Mar 2023 14:24:06 -0500 Subject: [PATCH 02/17] Merge branch 'master' --- molSimplify/Classes/mol3D.py | 9 +- molSimplify/Classes/protein3D.py | 61 +++++--- molSimplify/Scripts/addtodb.py | 37 ++--- tests/helperFuncs.py | 207 ++++++++++++------------- tests/inputs/ligand_from_mol_file.in | 8 + tests/inputs/pdp.mol | 108 +++++++++++++ tests/inputs/tutorial_2.in | 3 +- tests/refs/example_5_v3_noff.report | 26 ++++ tests/refs/example_5_v3_noff.xyz | 51 ++++++ tests/refs/example_7_noff.report | 26 ++++ tests/refs/example_7_noff.xyz | 53 +++++++ tests/refs/ligand_from_mol_file.report | 7 + tests/refs/ligand_from_mol_file.xyz | 53 +++++++ tests/refs/tetrahedral_2_noff.xyz | 80 +++++----- tests/test_ligand_from_mol_file.py | 40 +++++ tests/test_tutorial_2.py | 3 +- 16 files changed, 575 insertions(+), 197 deletions(-) create mode 100644 tests/inputs/ligand_from_mol_file.in create mode 100644 tests/inputs/pdp.mol create mode 100644 tests/refs/example_5_v3_noff.report create mode 100644 tests/refs/example_5_v3_noff.xyz create mode 100644 tests/refs/example_7_noff.report create mode 100644 tests/refs/example_7_noff.xyz create mode 100644 tests/refs/ligand_from_mol_file.report create mode 100644 tests/refs/ligand_from_mol_file.xyz create mode 100644 tests/test_ligand_from_mol_file.py diff --git a/molSimplify/Classes/mol3D.py b/molSimplify/Classes/mol3D.py index 3d519cfd..cc041cf1 100644 --- a/molSimplify/Classes/mol3D.py +++ b/molSimplify/Classes/mol3D.py @@ -11,7 +11,6 @@ import tempfile import time import xml.etree.ElementTree as ET -from math import sqrt import numpy as np import openbabel from typing import List, Optional @@ -493,7 +492,7 @@ def BCM(self, idx1, idx2, d): u = 0.0 for u0 in bondv: u += (u0 * u0) - u = sqrt(u) + u = np.sqrt(u) dR = [i * (d / u - 1) for i in bondv] submolidxes = self.findsubMol(idx1, idx2) for submolidx in submolidxes: @@ -2883,7 +2882,7 @@ def rmsd(self, mol2): rmsd = 0 else: rmsd /= Nat0 - return sqrt(rmsd) + return np.sqrt(rmsd) def geo_rmsd(self, mol2): """ @@ -2925,7 +2924,7 @@ def geo_rmsd(self, mol2): rmsd = 0 else: rmsd /= Nat0 - return sqrt(rmsd) + return np.sqrt(rmsd) else: raise ValueError("Number of atom does not match between two mols.") @@ -3057,7 +3056,7 @@ def rmsd_nonH(self, mol2): if (not atom0.sym == 'H') and (not atom1.sym == 'H'): rmsd += (atom0.distance(atom1)) ** 2 rmsd /= Nat0 - return sqrt(rmsd) + return np.sqrt(rmsd) def maxatomdist_nonH(self, mol2): """ diff --git a/molSimplify/Classes/protein3D.py b/molSimplify/Classes/protein3D.py index f28014ac..33d4275f 100644 --- a/molSimplify/Classes/protein3D.py +++ b/molSimplify/Classes/protein3D.py @@ -40,7 +40,7 @@ def __init__(self, pdbCode='undef'): self.nhetmols = 0 # Number of chains self.nchains = 0 - # Dictionary of monomerss + # Dictionary of monomers self.aas = {} # Dictionary of all atoms self.atoms = {} @@ -152,7 +152,6 @@ def setMissingAtoms(self, missing_atoms): def setMissingAAs(self, missing_aas): """ Set missing amino acids of a protein3D class to a new list. - Parameters ---------- missing_aas : list @@ -256,7 +255,7 @@ def getMissingAAs(self): >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB >>> pdb_system.getMissingAAs() # This gives a list of monomer3D objects - >>> [pdb_system.getMissingAAs()[x].three_lc for x in range(len(val.getMissingAAs()))] # This returns + >>> [pdb_system.getMissingAAs()[x].three_lc for x in range(len(pdb_system.getMissingAAs()))] # This returns >>> # the list of missing AAs by their 3-letter codes """ return self.missing_aas @@ -359,42 +358,65 @@ def getChain(self, chain_id): >>> pdb_system.getChain('A') # Get chain A of the PDB """ p = protein3D() + p.setPDBCode(self.pdbCode) p.setChains({chain_id: self.chains[chain_id]}) - p.setAAs(set(self.chains[chain_id])) p.setR(self.R) p.setRfree(self.Rfree) + missing_aas = [] for aa in self.missing_aas: if aa.chain == chain_id: - missing_aas.append(aa) + missing_aas.append(aa) p.setMissingAAs(missing_aas) + + aas = {} + for aa in self.aas: + if aa[0] == chain_id: + aas[aa] = self.aas[aa] + p.setAAs(aas) + gone_atoms = {} for aa in self.missing_atoms.keys(): - if aa.chain == chain_id: + if aa[0] == chain_id: gone_atoms[aa] = self.missing_atoms[aa] p.setMissingAtoms(gone_atoms) - gone_hets = self.hetatms + + hets_flipped = {value[0]:key for key, value in self.hetmols.items()} atoms = {} + a_ids = {} + hets = {} for a_id in self.atoms: - aa = self.getResidue(a_id) - if aa is not None: + aa = self.getMolecule(a_id) + + if type(aa) == monomer3D: if aa.chain == chain_id: atoms[a_id] = self.atoms[a_id] + a_ids[self.atoms[a_id]] = a_id else: - if chain_id not in gone_hets[(a_id, self.atoms[a_id])]: - del gone_hets[(a_id, self.atoms[a_id])] - else: + if aa not in hets_flipped: + print(a_id) + het = hets_flipped[aa] + het_chain_id = het[0] + if het_chain_id == chain_id: + hets[het] = self.hetmols[het] atoms[a_id] = self.atoms[a_id] - p.setHetatms(gone_hets) + a_ids[self.atoms[a_id]] = a_id + + p.setHetmols(hets) p.setAtoms(atoms) + bonds = {} for a in self.bonds.keys(): if a in p.atoms.values(): bonds[a] = set() for b in self.bonds[a]: if b in p.atoms.values(): - bonds[a].add(b) + bonds[a].add(b) p.setBonds(bonds) + + p.setIndices(a_ids) + p.setConf([conf for conf in self.conf if conf[0] == chain_id]) + return p def getMolecule(self, a_id, aas_only=False): @@ -628,7 +650,6 @@ def getIndex(self, atom): if hasattr(self, 'a_ids') and atom in self.a_ids.keys(): idx = self.a_ids[atom] else: - print(atom.sym) idx = list(self.atoms.keys())[list(self.atoms.values()).index(atom)] return idx @@ -730,8 +751,11 @@ def readfrompdb(self, text): text = text.replace(line, '') sp = line.split() if len(sp) > 2: - a = monomer3D(sp[0], sp[1], sp[2]) - missing_aas.append(a) + res_num = int(sp[2]) + # Ignoring expression tags which are negative residues + if res_num > 0: + a = monomer3D(sp[0], sp[1], sp[2]) + missing_aas.append(a) # start getting missing atoms if "M RES CSSEQI ATOMS" in text: @@ -987,7 +1011,8 @@ def setEDIAScores(self): dict2_str = out2.decode("UTF-8") dictionary = ast.literal_eval(dict2_str) link = dictionary["atom_scores"] - df = pd.read_csv(link, error_bad_lines=False) + df = pd.read_csv(link, on_bad_lines='skip') + for i, row in df.iterrows(): EDIA = row["EDIA"] index = row["Infile id"] diff --git a/molSimplify/Scripts/addtodb.py b/molSimplify/Scripts/addtodb.py index 06fd9f14..19cfdd1a 100644 --- a/molSimplify/Scripts/addtodb.py +++ b/molSimplify/Scripts/addtodb.py @@ -14,7 +14,18 @@ import unicodedata import openbabel import shutil -import ast +import pathlib + + +def initialize_custom_database(globs): + print('To add to database, you need to set a custom path. Please enter a writeable file path:') + # Strip double quotes from the string + raw_input = input('path=').replace('"', '') + # Resolve the path to get the absolute path and convert back to string + new_path = str(pathlib.Path(raw_input).resolve()) + globs.add_custom_path(new_path) + globs.custom_path = new_path + copy_to_custom_path() # Add molecule to ligand database @@ -30,11 +41,7 @@ def addtoldb(smimol, sminame, smident, smicat, smigrps, smictg, ffopt, smichg=No emsg = False globs = globalvars() if not globs.custom_path or not os.path.exists(str(globs.custom_path)): - print('To add to database, you need to set a custom path. Please enter a writeable file path:') - new_path = ast.literal_eval(input('path=')) - globs.add_custom_path(new_path) - globs.custom_path = new_path - copy_to_custom_path() + initialize_custom_database(globs) lipath = globs.custom_path + "/Ligands/ligands.dict" licores = readdict(lipath) @@ -123,11 +130,7 @@ def addtocdb(smimol, sminame, smicat): emsg = False globs = globalvars() if not globs.custom_path or not os.path.exists(str(globs.custom_path)): - print('To add to database, you need to set a custom path. Please enter a writeable file path:') - new_path = ast.literal_eval(input('path=')) - globs.add_custom_path(new_path) - globs.custom_path = new_path - copy_to_custom_path() + initialize_custom_database(globs) cpath = globs.custom_path + "/Cores/cores.dict" mcores = readdict(cpath) cores_folder = globs.custom_path + "/Cores/" @@ -184,11 +187,7 @@ def addtocdb(smimol, sminame, smicat): def addtobdb(smimol, sminame): globs = globalvars() if not globs.custom_path or not os.path.exists(str(globs.custom_path)): - print('To add to database, you need to set a custom path. Please enter a writeable file path:') - new_path = ast.literal_eval(input('path=')) - globs.add_custom_path(new_path) - globs.custom_path = new_path - copy_to_custom_path() + initialize_custom_database(globs) bpath = globs.custom_path + "/Bind/bind.dict" bindcores = readdict(bpath) bind_folder = globs.custom_path + "/Bind/" @@ -249,11 +248,7 @@ def removefromDB(sminame, ropt): emsg = False globs = globalvars() if not globs.custom_path or not os.path.exists(str(globs.custom_path)): - print('To database, you need to set a custom path. Please enter a writeable file path:') - new_path = ast.literal_eval(input('path=')) - globs.add_custom_path(new_path) - globs.custom_path = new_path - copy_to_custom_path() + initialize_custom_database(globs) li_path = globs.custom_path + "/Ligands/ligands.dict" li_folder = globs.custom_path + "/Ligands/" core_path = globs.custom_path + "/Cores/cores.dict" diff --git a/tests/helperFuncs.py b/tests/helperFuncs.py index 7daf176d..86b3c4cd 100644 --- a/tests/helperFuncs.py +++ b/tests/helperFuncs.py @@ -10,17 +10,12 @@ from molSimplify.Classes.mol3D import mol3D from pkg_resources import resource_filename, Requirement from typing import Dict +from contextlib import contextmanager from pathlib import Path -def fuzzy_equal(x1, x2, thresh): - return np.fabs(float(x1) - float(x2)) < thresh - - -# check whether the string is a integral/float/scientific - - def is_number(s): + """check whether the string is a integral/float/scientific""" try: float(s) return True @@ -28,6 +23,20 @@ def is_number(s): return False +@contextmanager +def working_directory(path: Path): + prev_cwd = os.getcwd() + try: + os.chdir(path) + yield + finally: + os.chdir(prev_cwd) + + +def fuzzy_equal(x1, x2, thresh): + return np.fabs(float(x1) - float(x2)) < thresh + + def fuzzy_compare_xyz(xyz1, xyz2, thresh): fuzzyEqual = False mol1 = mol3D() @@ -89,10 +98,8 @@ def getMetalLigBondLength(mymol3d): return blength -# Compare number of atoms - - def compareNumAtoms(xyz1, xyz2): + """Compare number of atoms""" print("Checking total number of atoms") mol1 = mol3D() mol1.readfromxyz(xyz1) @@ -104,10 +111,8 @@ def compareNumAtoms(xyz1, xyz2): return passNumAtoms -# Compare Metal Ligand Bond Length - - def compareMLBL(xyz1, xyz2, thresh): + """Compare Metal Ligand Bond Length""" print("Checking metal-ligand bond length") mol1 = mol3D() mol1.readfromxyz(xyz1) @@ -125,10 +130,8 @@ def compareMLBL(xyz1, xyz2, thresh): return passMLBL -# Compare Ligand Geometry - - def compareLG(xyz1, xyz2, thresh): + """Compare Ligand Geometry""" print("Checking the Ligand Geometries") passLG = True ligs1 = getAllLigands(xyz1) @@ -219,7 +222,7 @@ def jobdir(infile): return mydir -def parse4test(infile, tmpdir: Path, isMulti: bool = False, external: Dict[str, str] = {}) -> str: +def parse4test(infile, tmpdir: Path, isMulti: bool = False, extra_args: Dict[str, str] = {}) -> str: name = jobname(infile) f = tmpdir.join(os.path.basename(infile)) newname = f.dirname + "/" + os.path.basename(infile) @@ -229,9 +232,9 @@ def parse4test(infile, tmpdir: Path, isMulti: bool = False, external: Dict[str, data = f_in.readlines() newdata = "" for line in data: - if line.split()[0] in external.keys(): + if line.split()[0] in extra_args.keys(): newdata += (line.split()[0] + ' ' + str(os.path.dirname(infile)) - + '/' + str(external[line.split()[0]]) + '\n') + + '/' + str(extra_args[line.split()[0]]) + '\n') continue if not (("-jobdir" in line) or ("-name" in line)): newdata += line @@ -246,6 +249,7 @@ def parse4test(infile, tmpdir: Path, isMulti: bool = False, external: Dict[str, # smidata=f.read() # fsmi.write(smidata) # print "smi file is copied to the temporary running folder!" + newdata += f"-rundir {tmpdir}\n" newdata += "-jobdir " + name + "\n" print('=====') print(newdata) @@ -254,41 +258,15 @@ def parse4test(infile, tmpdir: Path, isMulti: bool = False, external: Dict[str, print(newdata) f.write(newdata) print("Input file parsed for test is located: ", newname) - return newname + jobdir = str(tmpdir / name) + return newname, jobdir -def parse4testNoFF(infile, tmpdir): +def parse4testNoFF(infile, tmpdir: Path, isMulti: bool = False) -> str: name = jobname(infile) - newname = name + "_noff" - newinfile = name + "_noff.in" - f = tmpdir.join(newinfile) - fullnewname = f.dirname + "/" + newinfile - with open(infile, 'r') as f_in: - data = f_in.readlines() - newdata = "" - hasFF = False - for line in data: - if ("-ff " in line): - hasFF = True - break - if not hasFF: - print("No FF optimization used in original input file. " - "No need to do further test.") - fullnewname = "" - else: - print("FF optimization used in original input file. " - "Now test for no FF result.") - for line in data: - # By getting rid of -ffoption, will use the force field option default, which is "N" - if not (("-jobdir" in line) or ("-name" in line) - or ("-ff " in line) or ("-ffoption " in line)): - newdata += line - newdata += "-jobdir " + newname + "\n" - newdata += "-name " + newname + "\n" - print(newdata) - f.write(newdata) - print("Input file parsed for no FF test is located: ", fullnewname) - return fullnewname + newinfile = str(tmpdir / (name + "_noff.in")) + shutil.copyfile(infile, newinfile) + return parse4test(newinfile, tmpdir, isMulti, extra_args={"-ffoption": "N"}) def report_to_dict(lines): @@ -420,11 +398,11 @@ def runtest(tmpdir, name, threshMLBL, threshLG, threshOG, seed=31415): random.seed(seed) np.random.seed(seed) infile = resource_filename(Requirement.parse( - "molSimplify"), "tests/inputs/" + name + ".in") - newinfile = parse4test(infile, tmpdir) + "molSimplify"), f"tests/inputs/{name}.in") + newinfile, myjobdir = parse4test(infile, tmpdir) args = ['main.py', '-i', newinfile] - startgen(args, False, False) - myjobdir = jobdir(infile) + with working_directory(tmpdir): + startgen(args, False, False) output_xyz = myjobdir + '/' + name + '.xyz' output_report = myjobdir + '/' + name + '.report' output_qcin = myjobdir + '/terachem_input' @@ -438,11 +416,11 @@ def runtest(tmpdir, name, threshMLBL, threshLG, threshOG, seed=31415): output_qcin = myjobdir + '/molcas.input' ref_xyz = resource_filename(Requirement.parse( - "molSimplify"), "tests/refs/" + name + ".xyz") + "molSimplify"), f"tests/refs/{name}.xyz") ref_report = resource_filename(Requirement.parse( - "molSimplify"), "tests/refs/" + name + ".report") + "molSimplify"), f"tests/refs/{name}.report") ref_qcin = resource_filename(Requirement.parse( - "molSimplify"), "tests/refs/" + name + ".qcin") + "molSimplify"), f"tests/refs/{name}.qcin") print("Test input file: ", newinfile) print("Test output files are generated in ", myjobdir) @@ -462,7 +440,7 @@ def runtest(tmpdir, name, threshMLBL, threshLG, threshOG, seed=31415): return [passNumAtoms, passMLBL, passLG, passOG, pass_report, pass_qcin] -def runtest_slab(tmpdir, name, threshOG): +def runtest_slab(tmpdir, name, threshOG, extra_files=None): """ Performs test for slab builder. @@ -476,14 +454,19 @@ def runtest_slab(tmpdir, name, threshOG): tolerance for RMSD comparison of overall geometries. """ infile = resource_filename(Requirement.parse( - "molSimplify"), "tests/inputs/" + name + ".in") - newinfile = parse4test(infile, tmpdir) + "molSimplify"), f"tests/inputs/{name}.in") + newinfile, _ = parse4test(infile, tmpdir) + if extra_files is not None: + for file_name in extra_files: + file_path = resource_filename(Requirement.parse( + "molSimplify"), f"tests/inputs/{file_name}") + shutil.copyfile(file_path, tmpdir / file_name) args = ['main.py', '-i', newinfile] - startgen(args, False, False) - myjobdir = jobdir(infile) + "/slab/" - output_xyz = myjobdir + '/super332.xyz' + with working_directory(tmpdir): + startgen(args, False, False) + output_xyz = str(tmpdir / 'slab' / 'super332.xyz') ref_xyz = resource_filename(Requirement.parse( - "molSimplify"), "tests/refs/" + name + ".xyz") + "molSimplify"), f"tests/refs/{name}.xyz") print("Output xyz file: ", output_xyz) pass_xyz = compareGeo(output_xyz, ref_xyz, threshMLBL=0, threshLG=0, threshOG=threshOG, slab=True) @@ -491,7 +474,7 @@ def runtest_slab(tmpdir, name, threshOG): return [passNumAtoms, passOG] -def runtest_molecule_on_slab(tmpdir, name, threshOG): +def runtest_molecule_on_slab(tmpdir, name, threshOG, extra_files=None): """ Performs test for slab builder with a CO molecule adsorbed. @@ -505,15 +488,20 @@ def runtest_molecule_on_slab(tmpdir, name, threshOG): tolerance for RMSD comparison of overall geometries. """ infile = resource_filename(Requirement.parse( - "molSimplify"), "tests/inputs/" + name + ".in") - newinfile = parse4test(infile, tmpdir, external={ + "molSimplify"), f"tests/inputs/{name}.in") + newinfile, _ = parse4test(infile, tmpdir, extra_args={ '-unit_cell': 'slab.xyz', '-target_molecule': 'co.xyz'}) + if extra_files is not None: + for file_name in extra_files: + file_path = resource_filename(Requirement.parse( + "molSimplify"), f"tests/inputs/{file_name}") + shutil.copyfile(file_path, tmpdir / file_name) args = ['main.py', '-i', newinfile] - startgen(args, False, False) - myjobdir = os.path.split(jobdir(infile))[0] + "/loaded_slab/" - output_xyz = myjobdir + '/loaded.xyz' + with working_directory(tmpdir): + startgen(args, False, False) + output_xyz = str(tmpdir / 'loaded_slab' / 'loaded.xyz') ref_xyz = resource_filename(Requirement.parse( - "molSimplify"), "tests/refs/" + name + ".xyz") + "molSimplify"), f"tests/refs/{name}.xyz") print("Output xyz file: ", output_xyz) pass_xyz = compareGeo(output_xyz, ref_xyz, threshMLBL=0, threshLG=0, threshOG=threshOG, slab=True) @@ -523,26 +511,26 @@ def runtest_molecule_on_slab(tmpdir, name, threshOG): def runtestgeo(tmpdir, name, thresh, deleteH=True, geo_type="oct"): initgeo = resource_filename(Requirement.parse( - "molSimplify"), "tests/inputs/geocheck/" + name + "/init.xyz") + "molSimplify"), f"tests/inputs/geocheck/{name}/init.xyz") optgeo = resource_filename(Requirement.parse( - "molSimplify"), "tests/inputs/geocheck/" + name + "/opt.xyz") + "molSimplify"), f"tests/inputs/geocheck/{name}/opt.xyz") refjson = resource_filename(Requirement.parse( - "molSimplify"), "tests/refs/geocheck/" + name + "/ref.json") + "molSimplify"), f"tests/refs/geocheck/{name}/ref.json") mymol = mol3D() mymol.readfromxyz(optgeo) init_mol = mol3D() init_mol.readfromxyz(initgeo) - if geo_type == "oct": - _, _, dict_struct_info = mymol.IsOct(init_mol=init_mol, - debug=False, - flag_deleteH=deleteH) - elif geo_type == "one_empty": - _, _, dict_struct_info = mymol.IsStructure( - init_mol=init_mol, dict_check=dict_oneempty_check_st, - angle_ref=oneempty_angle_ref, num_coord=5, debug=False, - flag_deleteH=deleteH) - else: - raise ValueError(f"Invalid geo_type {geo_type}") + with working_directory(tmpdir): + if geo_type == "oct": + _, _, dict_struct_info = mymol.IsOct( + init_mol=init_mol, debug=False, flag_deleteH=deleteH) + elif geo_type == "one_empty": + _, _, dict_struct_info = mymol.IsStructure( + init_mol=init_mol, dict_check=dict_oneempty_check_st, + angle_ref=oneempty_angle_ref, num_coord=5, debug=False, + flag_deleteH=deleteH) + else: + raise ValueError(f"Invalid geo_type {geo_type}") with open(refjson, "r") as fo: dict_ref = json.load(fo) # passGeo = (sorted(dict_ref.items()) == sorted(dict_struct_info.items())) @@ -554,9 +542,9 @@ def runtestgeo(tmpdir, name, thresh, deleteH=True, geo_type="oct"): def runtestgeo_optonly(tmpdir, name, thresh, deleteH=True, geo_type="oct"): optgeo = resource_filename(Requirement.parse( - "molSimplify"), "tests/inputs/geocheck/" + name + "/opt.xyz") + "molSimplify"), f"tests/inputs/geocheck/{name}/opt.xyz") refjson = resource_filename(Requirement.parse( - "molSimplify"), "tests/refs/geocheck/" + name + "/ref.json") + "molSimplify"), f"tests/refs/geocheck/{name}/ref.json") mymol = mol3D() mymol.readfromxyz(optgeo) if geo_type == "oct": @@ -572,15 +560,15 @@ def runtestgeo_optonly(tmpdir, name, thresh, deleteH=True, geo_type="oct"): def runtestNoFF(tmpdir, name, threshMLBL, threshLG, threshOG): infile = resource_filename(Requirement.parse( - "molSimplify"), "tests/inputs/" + name + ".in") - newinfile = parse4testNoFF(infile, tmpdir) + "molSimplify"), f"tests/inputs/{name}.in") + newinfile, myjobdir = parse4testNoFF(infile, tmpdir) [passNumAtoms, passMLBL, passLG, passOG, pass_report, pass_qcin] = [True, True, True, True, True, True] if newinfile != "": newname = jobname(newinfile) args = ['main.py', '-i', newinfile] - startgen(args, False, False) - myjobdir = jobdir(newinfile) + with working_directory(tmpdir): + startgen(args, False, False) output_xyz = myjobdir + '/' + newname + '.xyz' output_report = myjobdir + '/' + newname + '.report' with open(newinfile, 'r') as f_in: @@ -591,11 +579,11 @@ def runtestNoFF(tmpdir, name, threshMLBL, threshLG, threshOG): if 'molcas' in molsim_data.lower(): output_qcin = myjobdir + '/molcas.input' ref_xyz = resource_filename(Requirement.parse( - "molSimplify"), "tests/refs/" + newname + ".xyz") + "molSimplify"), f"tests/refs/{newname}.xyz") ref_report = resource_filename(Requirement.parse( - "molSimplify"), "tests/refs/" + newname + ".report") + "molSimplify"), f"tests/refs/{newname}.report") ref_qcin = resource_filename(Requirement.parse( - "molSimplify"), "tests/refs/" + name + ".qcin") + "molSimplify"), f"tests/refs/{name}.qcin") print("Test input file: ", newinfile) print("Test output files are generated in ", myjobdir) print("Output xyz file: ", output_xyz) @@ -620,21 +608,21 @@ def runtest_reportonly(tmpdir, name, seed=31415): random.seed(seed) np.random.seed(seed) infile = resource_filename(Requirement.parse( - "molSimplify"), "tests/inputs/" + name + ".in") + "molSimplify"), f"tests/inputs/{name}.in") # Copy the input file to the temporary folder shutil.copy(infile, tmpdir/f'{name}_reportonly.in') # Add the report only flag with open(tmpdir/f'{name}_reportonly.in', 'a') as f: f.write('-reportonly True\n') - newinfile = parse4test(tmpdir/f'{name}_reportonly.in', tmpdir) + newinfile, myjobdir = parse4test(tmpdir/f'{name}_reportonly.in', tmpdir) args = ['main.py', '-i', newinfile] with open(newinfile, 'r') as f: print(f.readlines()) - startgen(args, False, False) - myjobdir = jobdir(f'{infile}_reportonly') + with working_directory(tmpdir): + startgen(args, False, False) output_report = myjobdir + '/' + name + '_reportonly.report' ref_report = resource_filename(Requirement.parse( - "molSimplify"), "tests/refs/" + name + ".report") + "molSimplify"), f"tests/refs/{name}.report") # Copy the reference report to the temporary folder shutil.copy(ref_report, tmpdir/f'{name}_ref.report') with open(tmpdir/f'{name}_ref.report', 'r') as f: @@ -654,16 +642,15 @@ def runtest_reportonly(tmpdir, name, seed=31415): def runtestMulti(tmpdir, name, threshMLBL, threshLG, threshOG): infile = resource_filename(Requirement.parse( - "molSimplify"), "tests/inputs/" + name + ".in") - newinfile = parse4test(infile, tmpdir, True) + "molSimplify"), f"tests/inputs/{name}.in") + newinfile, myjobdir = parse4test(infile, tmpdir, True) args = ['main.py', '-i', newinfile] - # Need to make the ligand file visible to the input file - startgen(args, False, False) - myjobdir = jobdir(infile) + "/" + with working_directory(tmpdir): + startgen(args, False, False) print("Test input file: ", newinfile) print("Test output files are generated in ", myjobdir) refdir = resource_filename(Requirement.parse( - "molSimplify"), "tests/refs/" + name + "/") + "molSimplify"), f"tests/refs/{name}") [passMultiFileCheck, myfiles] = checkMultiFileGen(myjobdir, refdir) pass_structures = [] if not passMultiFileCheck: @@ -674,10 +661,10 @@ def runtestMulti(tmpdir, name, threshMLBL, threshLG, threshOG): for f in myfiles: if ".xyz" in f: r = f.replace(".xyz", ".report") - output_xyz = myjobdir + f - ref_xyz = refdir + f - output_report = myjobdir + r - ref_report = refdir + r + output_xyz = f"{myjobdir}/{f}" + ref_xyz = f"{refdir}/{f}" + output_report = f"{myjobdir}/{r}" + ref_report = f"{refdir}/{r}" print("Output xyz file: ", output_xyz) print("Reference xyz file: ", ref_xyz) print("Test report file: ", output_report) diff --git a/tests/inputs/ligand_from_mol_file.in b/tests/inputs/ligand_from_mol_file.in new file mode 100644 index 00000000..4590d649 --- /dev/null +++ b/tests/inputs/ligand_from_mol_file.in @@ -0,0 +1,8 @@ +-core co +-geo thd +-lig pdp.mol +-ligocc 1 +-smicat 1,4,7,12 +-skipANN True +-jobdir ligand_from_mol_file +-name ligand_from_mol_file diff --git a/tests/inputs/pdp.mol b/tests/inputs/pdp.mol new file mode 100644 index 00000000..5e7d372a --- /dev/null +++ b/tests/inputs/pdp.mol @@ -0,0 +1,108 @@ + +C20N4H26 - Chemcraft + + 50 53 0 0 0 0 0 0 0 0 1 V2000 + 4.6245 1.1323 0.4536 N 0 0 0 0 0 0 0 0 0 0 0 0 + 5.8871 1.7930 0.7478 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8323 1.9104 -0.4549 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.9749 0.8076 -1.4021 N 0 0 0 0 0 0 0 0 0 0 0 0 + 7.2952 -0.5138 -0.8910 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0668 -1.3020 -0.5174 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.0954 -1.3270 -1.4146 N 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9935 -1.9938 -1.1417 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8040 -2.7136 0.0289 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8460 -2.7509 0.9400 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9982 -2.0349 0.6643 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7541 0.8038 -2.5066 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0602 0.7553 -3.7876 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6179 1.8169 -4.4834 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8269 3.0015 -3.7967 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5195 3.0515 -2.4480 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0195 1.9081 -1.8286 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6874 1.8867 -0.3612 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6923 2.8439 1.0630 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4273 2.7206 -1.0728 H 0 0 0 0 0 0 0 0 0 0 0 0 + 8.0120 -0.5016 -0.0545 H 0 0 0 0 0 0 0 0 0 0 0 0 + 7.7686 -1.0610 -1.7139 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.2284 -1.9528 -1.9049 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8820 -3.2442 0.2136 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.7631 -3.3276 1.8518 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8328 -2.0594 1.3511 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.8360 -0.1796 -4.2841 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.8593 1.7299 -5.5318 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.2218 3.8724 -4.3027 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6526 3.9670 -1.8880 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.7204 1.3867 -0.2598 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.5887 2.9256 0.0131 H 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0295 0.8096 1.7413 C 0 0 0 0 0 0 0 0 0 0 0 0 + 8.0020 1.3310 -2.2852 C 0 0 0 0 0 0 0 0 0 0 0 0 + 9.0520 2.0351 -1.3890 C 0 0 0 0 0 0 0 0 0 0 0 0 + 8.2800 2.3317 -0.0957 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.1909 0.4926 2.6849 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4318 1.0452 1.9729 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.3474 -0.0346 1.6180 H 0 0 0 0 0 0 0 0 0 0 0 0 + 3.4513 1.6654 2.1346 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.2797 -0.5804 2.8422 H 0 0 0 0 0 0 0 0 0 0 0 0 + 7.0801 0.2306 1.6686 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.0404 0.9646 3.6555 H 0 0 0 0 0 0 0 0 0 0 0 0 + 7.0031 1.7174 2.6128 H 0 0 0 0 0 0 0 0 0 0 0 0 + 8.3257 3.3809 0.1909 H 0 0 0 0 0 0 0 0 0 0 0 0 + 8.6634 1.7389 0.7336 H 0 0 0 0 0 0 0 0 0 0 0 0 + 9.8993 1.3760 -1.2010 H 0 0 0 0 0 0 0 0 0 0 0 0 + 9.4275 2.9414 -1.8608 H 0 0 0 0 0 0 0 0 0 0 0 0 + 7.5299 2.0626 -2.9485 H 0 0 0 0 0 0 0 0 0 0 0 0 + 8.4312 0.5444 -2.9036 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 0 0 0 + 1 18 1 0 0 0 0 + 1 33 1 0 0 0 0 + 2 3 1 0 0 0 0 + 2 19 1 0 0 0 0 + 2 38 1 0 0 0 0 + 3 4 1 0 0 0 0 + 3 20 1 0 0 0 0 + 3 36 1 0 0 0 0 + 4 5 1 0 0 0 0 + 4 34 1 0 0 0 0 + 5 6 1 0 0 0 0 + 5 21 1 0 0 0 0 + 5 22 1 0 0 0 0 + 6 7 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 8 1 0 0 0 0 + 8 9 2 0 0 0 0 + 8 23 1 0 0 0 0 + 9 10 1 0 0 0 0 + 9 24 1 0 0 0 0 + 10 11 2 0 0 0 0 + 10 25 1 0 0 0 0 + 11 26 1 0 0 0 0 + 12 13 2 0 0 0 0 + 12 17 1 0 0 0 0 + 13 14 1 0 0 0 0 + 13 27 1 0 0 0 0 + 14 15 2 0 0 0 0 + 14 28 1 0 0 0 0 + 15 16 1 0 0 0 0 + 15 29 1 0 0 0 0 + 16 17 2 0 0 0 0 + 16 30 1 0 0 0 0 + 17 18 1 0 0 0 0 + 18 31 1 0 0 0 0 + 18 32 1 0 0 0 0 + 33 37 1 0 0 0 0 + 33 39 1 0 0 0 0 + 33 40 1 0 0 0 0 + 34 35 1 0 0 0 0 + 34 49 1 0 0 0 0 + 34 50 1 0 0 0 0 + 35 36 1 0 0 0 0 + 35 47 1 0 0 0 0 + 35 48 1 0 0 0 0 + 36 45 1 0 0 0 0 + 36 46 1 0 0 0 0 + 37 38 1 0 0 0 0 + 37 41 1 0 0 0 0 + 37 43 1 0 0 0 0 + 38 42 1 0 0 0 0 + 38 44 1 0 0 0 0 +M END diff --git a/tests/inputs/tutorial_2.in b/tests/inputs/tutorial_2.in index dbbdf77b..35f7218e 100644 --- a/tests/inputs/tutorial_2.in +++ b/tests/inputs/tutorial_2.in @@ -1,6 +1,5 @@ # molSimplify input file generated from CLI input -slab_gen --cif_path ./tests/inputs/pd_test_tutorial_2.cif +-cif_path ./pd_test_tutorial_2.cif -slab_size [10,10,5] -freeze 1 --rundir ./Runs/tutorial_2/ diff --git a/tests/refs/example_5_v3_noff.report b/tests/refs/example_5_v3_noff.report new file mode 100644 index 00000000..b6952f1e --- /dev/null +++ b/tests/refs/example_5_v3_noff.report @@ -0,0 +1,26 @@ +Bad structure?, False +Min_dist (A), 1000 +Was ANN used?, True +geo_label, 1 +geo_prob, 0.535566508769989 +geo_LSE, 0.6460251693156032 +geo_label_trust, very low +sc_label, 1 +sc_prob, 0.6266213059425354 +sc_LSE, 0.46387957090589826 +sc_label_trust, low +split, 14.74660312250645 +split_dist, 4.6684607692499975 +This spin, 2 +ANN_ground_state, 2 +homo, -16.108805172272547 +gap, 3.892649740346613 +homo_dist, 0.5508993 +gap_dist, 0.32417762 +ANN_bondl, [2.0039539722366224, 2.0039539722366224, 2.0039539722366224, 2.0039539722366224, 2.0323831041867453, 2.0134948758888322] +homo_trust, high +gap_trust, high +split_trust, very low +Was Catalytic ANN used?, False +Catalytic ANN reason, False +ML-bl (database, A), 2.1 diff --git a/tests/refs/example_5_v3_noff.xyz b/tests/refs/example_5_v3_noff.xyz new file mode 100644 index 00000000..c52e525c --- /dev/null +++ b/tests/refs/example_5_v3_noff.xyz @@ -0,0 +1,51 @@ +49 +02/20/2023 18:39, XYZ structure generated by mol3D Class, molSimplify +Fe 0.000000 0.000000 0.000000 +C 2.310198 1.312182 -0.008538 +N 3.644933 1.491883 -0.016350 +N 4.155323 0.270208 -0.012734 +N 3.166844 -0.609337 -0.002958 +N 2.003954 -0.000008 -0.000000 +C 1.306201 2.370649 -0.008641 +N 0.000000 2.003954 0.000000 +C -1.003950 2.921287 0.000708 +C -0.704373 4.284711 -0.007808 +C 0.632428 4.689121 -0.016634 +C 1.649979 3.730990 -0.017397 +H 3.293214 -1.645210 0.001685 +H -2.034183 2.590911 0.007630 +H -1.499466 5.019479 -0.007105 +H 0.878982 5.743134 -0.023346 +H 2.685649 4.045650 -0.024319 +C -0.008129 -1.305353 2.314053 +N -0.015759 -1.487032 3.648896 +N -0.012196 -0.266512 4.161307 +N -0.002798 0.614333 3.174248 +N -0.000000 0.000008 2.003954 +C -0.008400 -2.363804 1.309777 +N 0.000000 -2.003954 0.000000 +C 0.000370 -2.929328 -0.996958 +C -0.008035 -4.291475 -0.687121 +C -0.016766 -4.686755 0.652103 +C -0.016999 -3.721621 1.661918 +H 0.001770 1.650095 3.301944 +H 0.007189 -2.607500 -2.029742 +H -0.007780 -5.032033 -1.476412 +H -0.023336 -5.738668 0.906823 +H -0.023775 -4.030029 2.699870 +C -1.305284 -0.008132 -2.314109 +N -1.486888 -0.015765 -3.648970 +N -0.266344 -0.012201 -4.161314 +N 0.614435 -0.002799 -3.174191 +N 0.000008 0.000000 -2.003954 +C -2.363708 -0.008404 -1.309855 +N -2.003954 0.000000 0.000000 +C -2.929528 0.000369 0.996790 +C -4.291647 -0.008040 0.686719 +C -4.686735 -0.016773 -0.652555 +C -3.721462 -0.017006 -1.662192 +H 1.650207 0.001771 -3.301794 +H -2.607910 0.007190 2.029635 +H -5.032354 -0.007787 1.475865 +H -5.738601 -0.023345 -0.907475 +H -4.029707 -0.023784 -2.700199 diff --git a/tests/refs/example_7_noff.report b/tests/refs/example_7_noff.report new file mode 100644 index 00000000..ba94d067 --- /dev/null +++ b/tests/refs/example_7_noff.report @@ -0,0 +1,26 @@ +Bad structure?, False +Min_dist (A), 1000 +Was ANN used?, True +geo_label, 1 +geo_prob, 0.6362841725349426 +geo_LSE, 0.6435379100977079 +geo_label_trust, very low +sc_label, 1 +sc_prob, 0.7506815195083618 +sc_LSE, 0.00404575700263443 +sc_label_trust, very high +split, -10.693817228134996 +split_dist, 6.257216042605385 +This spin, 5 +ANN_ground_state, 5 +homo, -14.88207923127291 +gap, 3.471243378939224 +homo_dist, 0.8219034 +gap_dist, 0.12647906 +ANN_bondl, [2.35870837152265, 2.35870837152265, 2.35870837152265, 2.35870837152265, 2.439952284758965, 2.499759437738978] +homo_trust, high +gap_trust, high +split_trust, very low +Was Catalytic ANN used?, False +Catalytic ANN reason, False +ML-bl (database, A), 2.24 diff --git a/tests/refs/example_7_noff.xyz b/tests/refs/example_7_noff.xyz new file mode 100644 index 00000000..1785e26d --- /dev/null +++ b/tests/refs/example_7_noff.xyz @@ -0,0 +1,53 @@ +51 +02/20/2023 18:44, XYZ structure generated by mol3D Class, molSimplify +Fe 0.000000 0.000000 0.000000 +C -0.054143 5.211994 0.002163 +C 1.114714 4.551162 -0.000100 +C 1.141445 3.091515 -0.002517 +N 0.000000 2.358708 0.000000 +C -1.141445 3.098629 0.002517 +C -1.275022 4.441674 0.003072 +H 2.063465 5.070724 0.000059 +H 2.125942 2.633824 -0.006336 +H -2.087045 2.555601 0.004505 +H -2.236948 4.931210 0.004623 +Cl -0.136468 6.929052 0.005342 +C 5.214504 0.153001 0.028462 +C 4.559928 -0.522137 0.978551 +C 3.099080 -0.633170 0.952384 +N 2.358708 0.000000 0.000000 +C 3.102387 0.633170 -0.952384 +C 4.442437 0.759609 -1.030129 +H 6.290361 0.261150 0.028734 +H 5.094888 -0.988091 1.799057 +H 2.564646 1.105924 -1.772974 +H 4.933234 1.288181 -1.836647 +C 2.418179 -1.252061 2.141236 +O 1.287195 -2.052014 1.809366 +H 3.104156 -1.915360 2.675732 +H 2.110870 -0.461185 2.835740 +H 0.625959 -1.845429 2.483935 +C -0.000502 -5.155806 -0.000268 +C -1.180219 -4.452940 0.222440 +C -1.129949 -3.068051 0.211168 +N 0.000000 -2.358708 0.000000 +C 1.129949 -3.068557 -0.211168 +C 1.179095 -4.453477 -0.222353 +H -0.000947 -6.241998 -0.000405 +H -2.115659 -4.970590 0.400012 +H -2.024406 -2.474393 0.376697 +H 2.024562 -2.475422 -0.376216 +H 2.114817 -4.971646 -0.399594 +C -5.155806 -0.000528 -0.000212 +C -4.452940 -0.588231 -1.047082 +C -3.068051 -0.564553 -1.001327 +N -2.358708 0.000000 0.000000 +C -3.068557 0.564553 1.001327 +C -4.453477 0.587576 1.046165 +H -6.241998 -0.000919 -0.000465 +H -4.970590 -1.053493 -1.877811 +H -2.474393 -1.012697 -1.792921 +H -2.475422 1.013165 1.792731 +H -4.971646 1.053271 1.876898 +Cl 0.000000 0.000000 2.439952 +Cl 0.000000 0.000000 -2.499759 diff --git a/tests/refs/ligand_from_mol_file.report b/tests/refs/ligand_from_mol_file.report new file mode 100644 index 00000000..930dc5d3 --- /dev/null +++ b/tests/refs/ligand_from_mol_file.report @@ -0,0 +1,7 @@ +Bad structure?, False +Min_dist (A), 1000 +Was ANN used?, False +ANN reason, ANN skipped by user +Was Catalytic ANN used?, False +Catalytic ANN reason, False +ML-bl (database, A), 2.15 diff --git a/tests/refs/ligand_from_mol_file.xyz b/tests/refs/ligand_from_mol_file.xyz new file mode 100644 index 00000000..9e7509a4 --- /dev/null +++ b/tests/refs/ligand_from_mol_file.xyz @@ -0,0 +1,53 @@ +51 +02/17/2023 12:02, XYZ structure generated by mol3D Class, molSimplify +Co 0.000020 0.000000 0.000000 +N 1.605456 -1.005343 0.217889 +C 2.092495 -0.911906 -1.150065 +C 1.400385 0.166845 -1.993244 +N -0.039556 0.387711 -1.886100 +C -0.933080 -0.745866 -2.048990 +C -1.184633 -1.473798 -0.754140 +N -1.504946 -0.720760 0.284871 +C -1.729394 -1.299643 1.445818 +C -1.688251 -2.672925 1.637467 +C -1.424981 -3.471009 0.537008 +C -1.169575 -2.863987 -0.680581 +N -0.060954 1.338393 1.383339 +C -0.821938 2.398937 1.201240 +C -0.393714 3.547986 0.554346 +C 0.918020 3.589876 0.111498 +C 1.715759 2.472652 0.288727 +C 1.169374 1.344981 0.897695 +C 1.973561 0.089370 1.099522 +H 3.168777 -0.623372 -1.142722 +H 1.825657 1.121399 -1.661249 +H -0.611040 -1.452949 -2.830019 +H -1.901123 -0.338519 -2.361498 +H -1.960418 -0.626460 2.260058 +H -1.875950 -3.105799 2.608613 +H -1.417533 -4.549454 0.624953 +H -0.976595 -3.463911 -1.559047 +H -1.823238 2.327563 1.605122 +H -1.054482 4.391008 0.421358 +H 1.313925 4.477782 -0.363192 +H 2.749694 2.480287 -0.028092 +H 1.775820 -0.256404 2.117711 +H 3.053981 0.320941 1.008833 +C 2.109673 -2.266356 0.739370 +C -0.239972 1.409245 -2.898546 +C 0.633808 1.015492 -4.116289 +C 1.689177 0.077750 -3.513447 +C 2.135446 -3.240365 -0.439766 +C 2.011595 -2.349662 -1.682123 +H 1.457871 -2.604537 1.548027 +H 3.129926 -2.145431 1.146622 +H 1.305237 -3.941244 -0.380771 +H 1.060022 -2.524707 -2.172360 +H 3.061727 -3.814342 -0.446984 +H 2.807994 -2.540445 -2.401192 +H 2.707522 0.388552 -3.740216 +H 1.559497 -0.941322 -3.875137 +H 0.034288 0.503710 -4.868838 +H 1.080477 1.893155 -4.580153 +H 0.107560 2.361128 -2.484753 +H -1.293899 1.516687 -3.149675 diff --git a/tests/refs/tetrahedral_2_noff.xyz b/tests/refs/tetrahedral_2_noff.xyz index 24451acb..1d4c4680 100644 --- a/tests/refs/tetrahedral_2_noff.xyz +++ b/tests/refs/tetrahedral_2_noff.xyz @@ -1,43 +1,43 @@ 41 -10/26/2021 14:20, XYZ structure generated by mol3D Class, molSimplify +02/20/2023 18:33, XYZ structure generated by mol3D Class, molSimplify Co 0.000020 0.000000 0.000000 N -0.696681 -1.342795 -1.442085 -N -0.696689 -0.744047 1.160117 -C -0.672122 -1.567525 -2.766416 -H -0.095040 -0.863963 -3.358176 -C -1.337890 -2.632368 -3.366709 -H -1.290341 -2.771063 -4.441280 -C -2.061257 -3.499824 -2.550032 -H -2.597304 -4.343471 -2.973786 -C -2.094056 -3.270458 -1.175622 -H -2.660322 -3.937756 -0.537442 -C -2.032256 -2.638107 1.775602 -H -2.580982 -3.528703 1.494940 -C -1.971921 -2.262413 3.116397 -H -2.474098 -2.857886 3.872495 -C -1.262022 -1.115999 3.467403 -H -1.190727 -0.784444 4.497679 -C -0.640559 -0.391263 2.453720 -H -0.077002 0.509084 2.677419 -C -1.400949 -2.179506 -0.640618 -C -1.383548 -1.857791 0.811853 -N 2.090020 -0.000000 0.000000 -N -0.090202 1.502358 -0.345692 -C 3.139731 -0.811585 0.210704 -H 2.903775 -1.848263 0.429396 -C 4.459596 -0.372864 0.156223 -H 5.274068 -1.066018 0.336145 -C 4.694408 0.970948 -0.129670 -H 5.707184 1.358578 -0.181445 -C 3.609875 1.819361 -0.345705 -H 3.786833 2.865938 -0.561529 -C 1.147545 3.502566 -0.811712 -H 2.096379 4.012256 -0.925561 -C -0.038956 4.212104 -0.988604 -H -0.010538 5.268686 -1.236152 -C -1.254414 3.546901 -0.841655 -H -2.202020 4.059492 -0.967239 -C -1.227981 2.192050 -0.521198 -H -2.148725 1.631108 -0.395351 -C 2.309167 1.309488 -0.275034 -C 1.094707 2.141981 -0.488381 +N -0.942467 -1.006203 1.570788 +C -0.603661 -1.481444 -2.781275 +H 0.003559 -0.731913 -3.280272 +C -1.221099 -2.488349 -3.483482 +H -1.117412 -2.562098 -4.559405 +C -1.975229 -3.394712 -2.759502 +H -2.482679 -4.207998 -3.271387 +C -2.082413 -3.260875 -1.366867 +H -2.689072 -3.996789 -0.855363 +C -2.240804 -3.006095 1.625785 +H -2.727898 -3.863478 1.179158 +C -2.346774 -2.873612 3.015224 +H -2.901349 -3.609629 3.590708 +C -1.742777 -1.802473 3.652368 +H -1.808065 -1.675018 4.726766 +C -1.052685 -0.903196 2.874011 +H -0.554405 -0.042629 3.309804 +C -1.432155 -2.220018 -0.684314 +C -1.532387 -2.065753 0.861362 +N 2.090020 0.000000 0.000000 +N -0.123198 2.033234 -0.467845 +C 3.069674 -0.900181 0.227256 +H 2.727487 -1.909680 0.429646 +C 4.407352 -0.584501 0.208098 +H 5.160612 -1.340135 0.394891 +C 4.743562 0.731767 -0.057255 +H 5.787326 1.029129 -0.082549 +C 3.733025 1.677347 -0.293135 +H 4.055916 2.691243 -0.490715 +C 1.557071 3.676268 -0.871531 +H 2.578245 4.023402 -0.959929 +C 0.546382 4.616716 -1.106036 +H 0.806406 5.640562 -1.358859 +C -0.782612 4.239996 -1.015891 +H -1.584818 4.945728 -1.192010 +C -1.053400 2.928897 -0.694382 +H -2.072121 2.563717 -0.609291 +C 2.376743 1.314697 -0.268822 +C 1.244436 2.349505 -0.539639 diff --git a/tests/test_ligand_from_mol_file.py b/tests/test_ligand_from_mol_file.py new file mode 100644 index 00000000..ada65713 --- /dev/null +++ b/tests/test_ligand_from_mol_file.py @@ -0,0 +1,40 @@ +import pytest +from molSimplify.Scripts.generator import startgen +from helperFuncs import working_directory, compareGeo, compare_report_new +from pkg_resources import resource_filename, Requirement +import shutil + + +@pytest.mark.skip("Loading multidentate ligands from files is currently not supported") +def test_ligand_from_mol_file(tmpdir): + input_file = resource_filename(Requirement.parse( + "molSimplify"), "tests/inputs/ligand_from_mol_file.in") + shutil.copyfile(input_file, tmpdir / "ligand_from_mol_file.in") + mol_file = resource_filename(Requirement.parse( + "molSimplify"), "tests/inputs/pdp.mol") + shutil.copyfile(mol_file, tmpdir / "pdp.mol") + + ref_xyz = resource_filename(Requirement.parse( + "molSimplify"), "tests/refs/ligand_from_mol_file.xyz") + ref_report = resource_filename(Requirement.parse( + "molSimplify"), "tests/refs/ligand_from_mol_file.report") + + threshMLBL = 0.1 + threshLG = 0.1 + threshOG = 0.1 + + with working_directory(tmpdir): + startgen(['main.py', '-i', 'ligand_from_mol_file.in'], flag=False, gui=False) + + jobdir = tmpdir / 'Runs' / 'ligand_from_mol_file' + output_xyz = str(jobdir / 'ligand_from_mol_file.xyz') + output_report = str(jobdir / 'ligand_from_mol_file.report') + + passNumAtoms, passMLBL, passLG, passOG = compareGeo( + output_xyz, ref_xyz, threshMLBL, threshLG, threshOG) + assert passNumAtoms + assert passMLBL + assert passLG + assert passOG + pass_report = compare_report_new(output_report, ref_report) + assert pass_report diff --git a/tests/test_tutorial_2.py b/tests/test_tutorial_2.py index 424e416b..4e90adc7 100644 --- a/tests/test_tutorial_2.py +++ b/tests/test_tutorial_2.py @@ -4,6 +4,7 @@ def test_tutorial_2(tmpdir): testName = "tutorial_2" threshOG = 2.0 - [passNumAtoms, passOG] = hp.runtest_slab(tmpdir, testName, threshOG) + [passNumAtoms, passOG] = hp.runtest_slab( + tmpdir, testName, threshOG, extra_files=['pd_test_tutorial_2.cif']) assert passNumAtoms assert passOG \ No newline at end of file From c38855a01f8fb72216c2652b468cf5ac1ec5f508 Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Fri, 3 Mar 2023 14:24:34 -0500 Subject: [PATCH 03/17] Remove print statement --- molSimplify/Informatics/lacRACAssemble.py | 1 - 1 file changed, 1 deletion(-) diff --git a/molSimplify/Informatics/lacRACAssemble.py b/molSimplify/Informatics/lacRACAssemble.py index 14b0d93d..c1e5d9e5 100644 --- a/molSimplify/Informatics/lacRACAssemble.py +++ b/molSimplify/Informatics/lacRACAssemble.py @@ -94,7 +94,6 @@ def get_descriptor_vector(this_complex, custom_ligand_dict=False, 'eq_ligand_list': eq_ligand_list, 'ax_con_int_list': ax_con_int_list, 'eq_con_int_list': eq_con_int_list} - print("custom_ligand_dict: ", custom_ligand_dict) # misc descriptors results_dictionary = generate_all_ligand_misc(this_complex, loud=False, custom_ligand_dict=custom_ligand_dict, From 30e533736dd05b1ac48af6451b84f563dbf61962 Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Fri, 3 Mar 2023 14:33:36 -0500 Subject: [PATCH 04/17] Openbabel version 3 style imports --- molSimplify/Classes/mGUI.py | 5 ++++- molSimplify/Classes/mol3D.py | 5 ++++- molSimplify/Scripts/addtodb.py | 5 ++++- molSimplify/Scripts/cellbuilder_tools.py | 5 ++++- molSimplify/Scripts/dbinteract.py | 5 ++++- molSimplify/Scripts/distgeom.py | 5 ++++- molSimplify/Scripts/io.py | 5 ++++- molSimplify/Scripts/structgen.py | 5 ++++- molSimplify/Scripts/tf_nn_prep.py | 2 -- tests/test_basic_imports.py | 5 ++++- 10 files changed, 36 insertions(+), 11 deletions(-) diff --git a/molSimplify/Classes/mGUI.py b/molSimplify/Classes/mGUI.py index 2f0ad508..748a42a5 100644 --- a/molSimplify/Classes/mGUI.py +++ b/molSimplify/Classes/mGUI.py @@ -42,7 +42,10 @@ from pkg_resources import resource_filename, Requirement import xml.etree.ElementTree as ET -import openbabel +try: + from openbabel import openbabel # version 3 style import +except ImportError: + import openbabel # fallback to version 2 # Main GUI class diff --git a/molSimplify/Classes/mol3D.py b/molSimplify/Classes/mol3D.py index cc041cf1..e3830d93 100644 --- a/molSimplify/Classes/mol3D.py +++ b/molSimplify/Classes/mol3D.py @@ -12,7 +12,10 @@ import time import xml.etree.ElementTree as ET import numpy as np -import openbabel +try: + from openbabel import openbabel # version 3 style import +except ImportError: + import openbabel # fallback to version 2 from typing import List, Optional from scipy.spatial import ConvexHull from molSimplify.utils.decorators import deprecated diff --git a/molSimplify/Scripts/addtodb.py b/molSimplify/Scripts/addtodb.py index 19cfdd1a..6ef1c800 100644 --- a/molSimplify/Scripts/addtodb.py +++ b/molSimplify/Scripts/addtodb.py @@ -12,7 +12,10 @@ import sys import re import unicodedata -import openbabel +try: + from openbabel import openbabel # version 3 style import +except ImportError: + import openbabel # fallback to version 2 import shutil import pathlib diff --git a/molSimplify/Scripts/cellbuilder_tools.py b/molSimplify/Scripts/cellbuilder_tools.py index 46d09041..9c73fbd3 100644 --- a/molSimplify/Scripts/cellbuilder_tools.py +++ b/molSimplify/Scripts/cellbuilder_tools.py @@ -8,7 +8,10 @@ import re import numpy -import openbabel +try: + from openbabel import openbabel # version 3 style import +except ImportError: + import openbabel # fallback to version 2 import copy from math import sqrt, pi from molSimplify.Classes.globalvars import globalvars diff --git a/molSimplify/Scripts/dbinteract.py b/molSimplify/Scripts/dbinteract.py index a01edc55..488a04a3 100644 --- a/molSimplify/Scripts/dbinteract.py +++ b/molSimplify/Scripts/dbinteract.py @@ -14,7 +14,10 @@ import pymol except ImportError: pass -import openbabel +try: + from openbabel import openbabel # version 3 style import +except ImportError: + import openbabel # fallback to version 2 from molSimplify.Classes.globalvars import (amassdict, glob, diff --git a/molSimplify/Scripts/distgeom.py b/molSimplify/Scripts/distgeom.py index 64291ede..4ba3cd1b 100644 --- a/molSimplify/Scripts/distgeom.py +++ b/molSimplify/Scripts/distgeom.py @@ -16,7 +16,10 @@ import numpy as np import numpy -import openbabel +try: + from openbabel import openbabel # version 3 style import +except ImportError: + import openbabel # fallback to version 2 from scipy import optimize from math import sqrt, cos diff --git a/molSimplify/Scripts/io.py b/molSimplify/Scripts/io.py index 1221e6fe..afd7c165 100644 --- a/molSimplify/Scripts/io.py +++ b/molSimplify/Scripts/io.py @@ -14,7 +14,10 @@ import time import difflib -import openbabel +try: + from openbabel import openbabel # version 3 style import +except ImportError: + import openbabel # fallback to version 2 from typing import Any, List, Dict, Tuple, Union, Optional from pkg_resources import resource_filename, Requirement diff --git a/molSimplify/Scripts/structgen.py b/molSimplify/Scripts/structgen.py index 810223a8..79a77f04 100644 --- a/molSimplify/Scripts/structgen.py +++ b/molSimplify/Scripts/structgen.py @@ -8,7 +8,10 @@ import os import subprocess import tempfile -import openbabel +try: + from openbabel import openbabel # version 3 style import +except ImportError: + import openbabel # fallback to version 2 import random import itertools import numpy as np diff --git a/molSimplify/Scripts/tf_nn_prep.py b/molSimplify/Scripts/tf_nn_prep.py index 941cbd3b..6ac525f1 100644 --- a/molSimplify/Scripts/tf_nn_prep.py +++ b/molSimplify/Scripts/tf_nn_prep.py @@ -25,8 +25,6 @@ find_true_min_eu_dist) -# import numpy -# import openbabel def spin_classify(metal: str, spin: Union[int, str], ox: int) -> Tuple[bool, List[int]]: metal_spin_dictionary = {'co': {2: 4, 3: 5}, 'cr': {2: 5, 3: 4}, diff --git a/tests/test_basic_imports.py b/tests/test_basic_imports.py index 78c98f7d..840a72d4 100644 --- a/tests/test_basic_imports.py +++ b/tests/test_basic_imports.py @@ -53,7 +53,10 @@ def test_openbabel_import(): Test whether openbabel can be imported ''' try: - import openbabel # noqa: F401 + try: + from openbabel import openbabel # version 3 style import + except ImportError: # fallback to version 2 + import openbabel # noqa: F401 assert "openbabel" in sys.modules except ImportError: assert 0 From 39da17e1e00f1e8a646f7188b88e0658b302379a Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Fri, 3 Mar 2023 15:08:31 -0500 Subject: [PATCH 05/17] Cleanup create_coulomb_matrix and add test case --- molSimplify/Informatics/coulomb_analyze.py | 52 ++++++++++++---------- tests/informatics/test_coulomb_analyze.py | 28 ++++++++++++ 2 files changed, 56 insertions(+), 24 deletions(-) create mode 100644 tests/informatics/test_coulomb_analyze.py diff --git a/molSimplify/Informatics/coulomb_analyze.py b/molSimplify/Informatics/coulomb_analyze.py index b239e6ad..2958fd7d 100644 --- a/molSimplify/Informatics/coulomb_analyze.py +++ b/molSimplify/Informatics/coulomb_analyze.py @@ -1,53 +1,57 @@ -import numpy +import numpy as np -from molSimplify.Classes.atom3D import (atom3D) +from molSimplify.Classes.atom3D import atom3D from molSimplify.Classes.globalvars import globalvars -from molSimplify.Scripts.geometry import (distance) +from molSimplify.Scripts.geometry import distance +from molSimplify.utils.decorators import deprecated -#name metal ox axlig_charge eqlig charge axlig_dent eqlig_dent axlig_connect eqlig_connect axlig_natoms eqlig_natoms axlig_mdelen eqlig_mdelen - -########### UNIT CONVERSION -HF_to_Kcal_mol = 627.5095 +@deprecated('Use the correctly spelled version create_coulomb_matrix instead.') def create_columb_matrix(mol): - ## create Coulomb matrix from mol3D information + return create_coulomb_matrix(mol) + + +def create_coulomb_matrix(mol): + # create Coulomb matrix from mol3D information index_set = list(range(0, mol.natoms)) - ## fetch the database of nuclear charges + # fetch the database of nuclear charges globs = globalvars() amassdict = globs.amass() - A = numpy.matrix(numpy.zeros((mol.natoms, mol.natoms))) - ## main build + A = np.zeros((mol.natoms, mol.natoms)) + # main build for i in index_set: + Zi = float(amassdict[mol.getAtom(i).symbol()][1]) for j in index_set: if i == j: - A[i, j] = 0.5*numpy.power(float(amassdict[mol.getAtom(i).symbol()][1]), (2.4)) + A[i, j] = 0.5*np.power(Zi, (2.4)) else: + Zj = float(amassdict[mol.getAtom(j).symbol()][1]) this_dist = distance(mol.getAtom(i).coords(), mol.getAtom(j).coords()) if this_dist != 0.0: - A[i, j] = float(amassdict[mol.getAtom(i).symbol()][1])*float(amassdict[mol.getAtom(j).symbol()][1])/float(this_dist) + A[i, j] = Zi*Zj/float(this_dist) else: A[i, j] = 0 - ## sort by columns in increasing order + # sort by columns in increasing order weights = [] for col in A.T: - weights.append(numpy.linalg.norm(col)) - sort_weights = (numpy.argsort(weights))[::-1] + weights.append(np.linalg.norm(col)) + sort_weights = (np.argsort(weights))[::-1] A = A[:, sort_weights] - ## sort by rows in increasing order + # sort by rows in increasing order weights = [] for row in A: - weights.append(numpy.linalg.norm(row)) - sort_weights = (numpy.argsort(weights))[::-1] + weights.append(np.linalg.norm(row)) + sort_weights = (np.argsort(weights))[::-1] A = A[sort_weights, :] return A def pad_mol(mol, target_atoms): - ## adds placeholder atoms - ## with zero nuclear charge - ## located at the origin - ## in order to get consistent size - ## coulomb matrix + # adds placeholder atoms + # with zero nuclear charge + # located at the origin + # in order to get consistent size + # coulomb matrix this_natoms = mol.natoms blank_atom = atom3D(Sym='X') # placeholder type blank_atom.frozen = False diff --git a/tests/informatics/test_coulomb_analyze.py b/tests/informatics/test_coulomb_analyze.py new file mode 100644 index 00000000..95749f98 --- /dev/null +++ b/tests/informatics/test_coulomb_analyze.py @@ -0,0 +1,28 @@ +import numpy as np +from molSimplify.Classes.mol3D import mol3D +from pkg_resources import resource_filename, Requirement +from molSimplify.Informatics.coulomb_analyze import create_coulomb_matrix + + +def test_create_coulomb_matrix(): + xyz_file = resource_filename( + Requirement.parse("molSimplify"), + "tests/inputs/cr3_f6_optimization.xyz" + ) + mol = mol3D() + mol.readfromxyz(xyz_file) + cm = create_coulomb_matrix(mol) + print(cm) + nuclear_charges = np.array([24., 9., 9., 9., 9., 9., 9.]) + xyzs = mol.coordsvect() + r = np.sqrt(np.sum((xyzs[:, np.newaxis, :] - xyzs[np.newaxis, :, :])**2, axis=-1)) + ref = np.outer(nuclear_charges, nuclear_charges) + ref[r > 0.] /= r[r > 0.] + ref[np.diag_indices_from(ref)] = 0.5 * nuclear_charges ** 2.4 + # Finally sort by L2 norm + norms = np.linalg.norm(ref, axis=0) + inds = np.argsort(norms)[::-1] + ref = ref[inds, :] + ref = ref[:, inds] + print(ref) + np.testing.assert_allclose(cm, ref, atol=1e-8) From 3270cec0920eab2439fb6532f813b31122a6dd0c Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Fri, 3 Mar 2023 15:14:36 -0500 Subject: [PATCH 06/17] Replace np.matrix by np.array --- molSimplify/Scripts/cellbuilder_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/molSimplify/Scripts/cellbuilder_tools.py b/molSimplify/Scripts/cellbuilder_tools.py index 9c73fbd3..398a876e 100644 --- a/molSimplify/Scripts/cellbuilder_tools.py +++ b/molSimplify/Scripts/cellbuilder_tools.py @@ -227,7 +227,7 @@ def get_basis_coefficients(point, basis): # for an arbitrary point in a given (complete) # basis set coefficients = numpy.linalg.solve( - numpy.transpose(numpy.asmatrix(basis)), point) + numpy.transpose(numpy.asarray(basis)), point) return coefficients ################################### From a710d3e0b26dd34a5cc6752a3c4643badac31e5a Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Fri, 3 Mar 2023 15:22:05 -0500 Subject: [PATCH 07/17] Silence tf ANN predictions --- molSimplify/python_nn/tf_ANN.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/molSimplify/python_nn/tf_ANN.py b/molSimplify/python_nn/tf_ANN.py index 4d6b4841..8a2acbc1 100644 --- a/molSimplify/python_nn/tf_ANN.py +++ b/molSimplify/python_nn/tf_ANN.py @@ -63,7 +63,7 @@ def perform_ANN_prediction(RAC_dataframe: pd.DataFrame, predictor_name: str, RAC_subset_for_ANN = RAC_dataframe.loc[:, train_vars].astype(float) normalized_input = data_normalize(RAC_subset_for_ANN, train_mean_x, train_var_x) - ANN_prediction = my_ANN.predict(normalized_input) + ANN_prediction = my_ANN.predict(normalized_input, verbose=0) rescaled_output = data_rescale(ANN_prediction, train_mean_y, train_var_y) # Get latent vectors for training data and queried data @@ -399,7 +399,7 @@ def load_train_info(predictor: str, suffix: str = 'info') -> dict: return loaded_info_dict -def load_keras_ann(predictor: str, suffix: str = 'model', compile: bool = False): +def load_keras_ann(predictor: str, suffix: str = 'model', compile: bool = False) -> tf.keras.Model: # this function loads the ANN for property # "predcitor" # disable TF output text to reduce console spam @@ -498,7 +498,7 @@ def ANN_supervisor(predictor: str, ## fetch ANN loaded_model = load_keras_ann(predictor) - result = data_rescale(loaded_model.predict(excitation), train_mean_y, train_var_y, debug=debug) + result = data_rescale(loaded_model.predict(excitation, verbose=0), train_mean_y, train_var_y, debug=debug) if "clf" not in predictor: if debug: print(('LOADED MODEL HAS ' + str( From 3e21c7de8f5d985d468e098a50a7d6e1a7ff45af Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Fri, 3 Mar 2023 16:03:12 -0500 Subject: [PATCH 08/17] Remove passing of globs --- molSimplify/Scripts/generator.py | 4 +-- molSimplify/Scripts/rungen.py | 51 +++++++++++++------------------- 2 files changed, 22 insertions(+), 33 deletions(-) diff --git a/molSimplify/Scripts/generator.py b/molSimplify/Scripts/generator.py index 46f0e88d..6429892f 100644 --- a/molSimplify/Scripts/generator.py +++ b/molSimplify/Scripts/generator.py @@ -208,7 +208,7 @@ def startgen(argv, flag, gui, inputfile_str=None, write_files=True): args.gui = gui args.core = cc if (args.lig or args.coord or args.lignum or args.ligocc): # constraints given? - args, emsg = constrgen(rundir, args, globs) + args, emsg = constrgen(rundir, args) if emsg: del args return emsg @@ -252,7 +252,7 @@ def startgen(argv, flag, gui, inputfile_str=None, write_files=True): print('building an equilibrium complex') for cc in corests: args.core = cc - emsg = multigenruns(rundir, args, globs, write_files=write_files) + emsg = multigenruns(rundir, args, write_files=write_files) if emsg: print(emsg) del args diff --git a/molSimplify/Scripts/rungen.py b/molSimplify/Scripts/rungen.py index 60bbec67..b7800f9e 100644 --- a/molSimplify/Scripts/rungen.py +++ b/molSimplify/Scripts/rungen.py @@ -13,6 +13,7 @@ from collections import Counter import glob +from molSimplify.Classes.globalvars import globalvars from molSimplify.Scripts.isomers import generateisomers from molSimplify.Scripts.jobgen import (sgejobgen, slurmjobgen) @@ -42,7 +43,7 @@ ##################################### -def constrgen(rundir, args, globs): +def constrgen(rundir, args): emsg = False # load global variables licores = getlicores() @@ -110,7 +111,7 @@ def constrgen(rundir, args, globs): args.lig = [a for a in ligs0] args.ligocc = [int(a) for a in ligocc0] # run structure generation - emsg = rungen(rundir, args, False, globs) + emsg = rungen(rundir, args) else: if args.gui: from molSimplify.Classes.mWidgets import mQDialogErr @@ -140,7 +141,7 @@ def constrgen(rundir, args, globs): args.keepHs.append(opt) else: args.keepHs = [opt] - emsg = rungen(rundir, args, False, globs) # run structure generation + emsg = rungen(rundir, args) # run structure generation return args, emsg @@ -198,7 +199,7 @@ def getconstsample(no_rgen, args, licores, coord): # @return Error messages -def multigenruns(rundir, args, globs, write_files=True): +def multigenruns(rundir, args, write_files=True): emsg = False args.jid = 0 # initilize global name identifier multch = False @@ -212,7 +213,6 @@ def multigenruns(rundir, args, globs, write_files=True): if (args.spin and len(args.spin) > 1): multsp = True # iterate over all - fname = False if (multch and multsp): for ch in charges: for sp in spins: @@ -222,10 +222,7 @@ def multigenruns(rundir, args, globs, write_files=True): fname = 'N'+ch[1:]+'S'+sp else: fname = 'P'+ch+'S'+sp - # if args.tsgen: - # emsg = tsgen_supervisor(rundir,args,fname,globs) - # else: - emsg = rungen(rundir, args, fname, globs, write_files=write_files) + emsg = rungen(rundir, args, fname, write_files=write_files) if emsg: return emsg elif (multch): @@ -237,10 +234,7 @@ def multigenruns(rundir, args, globs, write_files=True): fname = 'N'+ch[1:] else: fname = 'P'+ch[1:] - # if args.tsgen: - # emsg = tsgen_supervisor(rundir,args,fname,globs) - # else: - emsg = rungen(rundir, args, fname, globs, write_files=write_files) + emsg = rungen(rundir, args, fname, write_files=write_files) if emsg: return emsg elif (multsp): @@ -249,10 +243,7 @@ def multigenruns(rundir, args, globs, write_files=True): for sp in spins: args.spin = sp fname = 'S'+sp - # if args.tsgen: - # emsg = tsgen_supervisor(rundir,args,fname,globs) - # else: - emsg = rungen(rundir, args, fname, globs, write_files=write_files) + emsg = rungen(rundir, args, fname, write_files=write_files) if emsg: return emsg elif args.isomers or args.stereos: @@ -296,17 +287,14 @@ def multigenruns(rundir, args, globs, write_files=True): print(('******************* Generating isomer ' + str(counter+1) + '! *******************')) print('**************************************************************') - emsg = rungen(rundir, args, fname, globs, write_files=write_files) + emsg = rungen(rundir, args, write_files=write_files) return emsg else: if args.charge: args.charge = args.charge[0] if args.spin: args.spin = args.spin[0] - # if args.tsgen: - # emsg = tsgen_supervisor(rundir,args,fname,globs) - # else: - emsg = rungen(rundir, args, fname, globs, write_files=write_files) + emsg = rungen(rundir, args, write_files=write_files) return emsg # Check for multiple ligands specified in one file @@ -377,7 +365,8 @@ def draw_supervisor(args, rundir): print('Due to technical limitations, we will draw only the first core.') print('Drawing the core.') if args.substrate: - print('Due to technical limitations, we can draw only one structure per run. To draw the substrate, run the program again.') + print('Due to technical limitations, we can draw only one structure per run. ' + 'To draw the substrate, run the program again.') cc, emsg = core_load(args.core[0]) cc.draw_svg(args.core[0]) elif args.substrate: @@ -394,12 +383,12 @@ def draw_supervisor(args, rundir): # @param rundir Run directory # @param args Namespace of arguments # @param chspfname Folder name for charges and spins -# @param globs Global variables # @return Error messages -def rungen(rundir, args, chspfname, globs, write_files=True): +def rungen(rundir, args, chspfname=None, write_files=True): emsg = '' + globs = globalvars() globs.nosmiles = 0 # reset smiles ligands for each run # check for specified ligands/functionalization ligocc = [] @@ -471,14 +460,14 @@ def rungen(rundir, args, chspfname, globs, write_files=True): mligcatoms = args.mligcatoms fname = name_ts_complex(rundir, args.core, args.geometry, ligands, ligocc, substrate, subcatoms, mlig, mligcatoms, mcount, args, nconf=False, sanity=False, bind=args.bind, bsmi=args.nambsmi) - if globs.debug: + if args.debug: print(('fname is ' + str(fname))) rootdir = fname # check for charges/spin rootcheck = False - if (chspfname): + if chspfname is not None: rootcheck = rootdir - rootdir = rootdir + '/'+chspfname + rootdir = rootdir + '/' + chspfname if (args.suff): rootdir += args.suff # check for mannual overwrite of @@ -516,14 +505,14 @@ def rungen(rundir, args, chspfname, globs, write_files=True): rootcheck += '_'+str(ifold) os.mkdir(rootcheck) elif rootcheck and (not os.path.isdir(rootcheck) or not args.checkdirt) and not skip: - if globs.debug: + if args.debug: print(('rootcheck is ' + str(rootcheck))) args.checkdirt = True try: os.mkdir(rootcheck) except FileExistsError: - print(('Directory '+rootcheck+' can not be created. Exiting..\n')) - return 'Directory '+rootcheck+' can not be created. Exiting..' + print(f'Directory {rootcheck} can not be created. Exiting..\n') + return f'Directory {rootcheck} can not be created. Exiting..' # check for actual directory if os.path.isdir(rootdir) and not args.checkdirb and not skip and not args.jobdir: args.checkdirb = True From d13512f4d6724b6f743826bc223a5b2e48bf009b Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Fri, 3 Mar 2023 16:04:37 -0500 Subject: [PATCH 09/17] Remove unhelpful print statements --- molSimplify/Scripts/io.py | 3 ++- molSimplify/Scripts/tf_nn_prep.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/molSimplify/Scripts/io.py b/molSimplify/Scripts/io.py index afd7c165..66e28d8f 100644 --- a/molSimplify/Scripts/io.py +++ b/molSimplify/Scripts/io.py @@ -997,7 +997,8 @@ def name_complex(rootdir: str, core, geometry, ligs, ligoc, sernum, else: # ligand is in ligands.dict name += '_' + str(lig) + '_' + str(ligoc[i]) name += "_s_"+str(spin) - print([nconf, args.nconfs]) + if args.debug: + print([nconf, args.nconfs]) if nconf and int(args.nconfs) >= 1: name += "_conf_"+str(nconf) if args.bind: diff --git a/molSimplify/Scripts/tf_nn_prep.py b/molSimplify/Scripts/tf_nn_prep.py index 6ac525f1..48ac6fc1 100644 --- a/molSimplify/Scripts/tf_nn_prep.py +++ b/molSimplify/Scripts/tf_nn_prep.py @@ -425,8 +425,8 @@ def tf_ANN_preproc(metal: str, oxstate, spin, ligs: List[str], occs: List[int], ax_lig3D.convert2mol3D() # mol3D representation of ligand for jj in range(0, ax_occs[ii]): ax_ligands_list.append(this_lig) - print(('Obtained the net ligand charge, which is... ', net_lig_charge)) if debug: + print(f'Obtained the net ligand charge, which is... {net_lig_charge}') print('ax_ligands_list:') print(ax_ligands_list) print([h.mol.cat for h in ax_ligands_list]) From ed988462574d4ebfa1a8d688cd6c63a27be57251 Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Thu, 20 Apr 2023 17:50:18 -0400 Subject: [PATCH 10/17] Remove my name from this example --- molSimplify/Classes/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/molSimplify/Classes/README.md b/molSimplify/Classes/README.md index 7f1b7a28..547d28c5 100644 --- a/molSimplify/Classes/README.md +++ b/molSimplify/Classes/README.md @@ -1 +1 @@ -To members of the Kulik group: If you have custom data that you want to use for yourself but do not want to push to the main repo, e.g. custom ligands for molSimplify ligand construction, you can make a file `.molSimplify` and include in it a line like `CUSTOM_DATA_PATH=/Users/ralf/molSimplify_custom_data`. In that folder you specified, you can place a folder `Ligands`, and molSimplify will look there first before looking at the Ligands folder in the central repo. This functionality is implemented in `globalvars.py`. \ No newline at end of file +To members of the Kulik group: If you have custom data that you want to use for yourself but do not want to push to the main repo, e.g. custom ligands for molSimplify ligand construction, you can make a file `.molSimplify` and include in it a line like `CUSTOM_DATA_PATH=/Users/your_user/molSimplify_custom_data`. In that folder you specified, you can place a folder `Ligands`, and molSimplify will look there first before looking at the Ligands folder in the central repo. This functionality is implemented in `globalvars.py`. \ No newline at end of file From 350a7227417de6e11635f92d2fc88691ca14a8d3 Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Sat, 22 Apr 2023 18:52:01 -0400 Subject: [PATCH 11/17] Add/Simplify __repr__() of monomer3D/mol3D --- molSimplify/Classes/mol3D.py | 3 +-- molSimplify/Classes/monomer3D.py | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/molSimplify/Classes/mol3D.py b/molSimplify/Classes/mol3D.py index e3830d93..d25617cf 100644 --- a/molSimplify/Classes/mol3D.py +++ b/molSimplify/Classes/mol3D.py @@ -125,8 +125,7 @@ def __init__(self, name='ABC', loc='', use_atom_specific_cutoffs=False): self.use_atom_specific_cutoffs = use_atom_specific_cutoffs def __repr__(self): - """ - Returns all bound methods of the mol3D class. + return f"mol3D({self.make_formula(latex=False)})" Returns ------- diff --git a/molSimplify/Classes/monomer3D.py b/molSimplify/Classes/monomer3D.py index 102d0dbe..2a654bb1 100644 --- a/molSimplify/Classes/monomer3D.py +++ b/molSimplify/Classes/monomer3D.py @@ -42,6 +42,9 @@ def __init__(self, three_lc='GLY', chain='undef', id=-1, occup=1.00, loc=''): # Temporary list for storing conformations self.temp_list = [] + def __repr__(self): + return f"monomer3D({self.three_lc}, id={self.id})" + def identify(self): """ States whether the amino acid is (positively/negatively) charged, polar, or hydrophobic. From b0499301366affd5870c696a4a900a66b3e9222f Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Sat, 22 Apr 2023 18:52:31 -0400 Subject: [PATCH 12/17] Add doctests for files in the Classes folder --- .github/workflows/CI.yaml | 5 ++ molSimplify/Classes/mol3D.py | 130 +++++++++++++++++-------------- molSimplify/Classes/monomer3D.py | 6 +- molSimplify/Classes/protein3D.py | 87 ++++++++++++++------- 4 files changed, 134 insertions(+), 94 deletions(-) diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index cfd2f5ba..bf2a543d 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -79,6 +79,11 @@ jobs: run: | pytest -v --cov=molSimplify --cov-report=xml + - name: Run doctest + # For now only testing the files in the Classes folder + run: | + pytest --doctest-modules molSimplify/Classes + - name: Upload coverage report to codecov uses: codecov/codecov-action@v3 with: diff --git a/molSimplify/Classes/mol3D.py b/molSimplify/Classes/mol3D.py index d25617cf..78003ecd 100644 --- a/molSimplify/Classes/mol3D.py +++ b/molSimplify/Classes/mol3D.py @@ -45,7 +45,7 @@ class mol3D: Example instantiation of an octahedral iron-ammonia complex from an XYZ file: >>> complex_mol = mol3D() - >>> complex_mol.readfromxyz('fe_nh3_6.xyz') + >>> complex_mol.readfromxyz('fe_nh3_6.xyz') # doctest: +SKIP """ @@ -127,18 +127,7 @@ def __init__(self, name='ABC', loc='', use_atom_specific_cutoffs=False): def __repr__(self): return f"mol3D({self.make_formula(latex=False)})" - Returns - ------- - method_string : string - String of methods available in mol3D class. - """ - - method_string = "\nClass mol3D has the following methods:\n" - for method in dir(self): - if callable(getattr(self, method)): - method_string += method + '\n' - return method_string - + @deprecated("Preliminary testing showed this function is unreliable at best") def ACM(self, idx1, idx2, idx3, angle): """ Performs angular movement on mol3D class. A submolecule is @@ -154,8 +143,6 @@ def ACM(self, idx1, idx2, idx3, angle): Index of anchor atom 2. angle : float New bond angle in degrees. - - >>> complex_mol.ACM(2, 1, 0, 180) # Make atom 2 form a 180 degree angle with atoms 1 and 0. """ atidxs_to_move = self.findsubMol(idx1, idx2) @@ -233,7 +220,8 @@ def addAtom(self, atom: atom3D, index: Optional[int] = None, auto_populate_BO_di auto_populate_BO_dict : bool, optional Populate bond order dictionary with newly added atom. Default is True. - >>> C_atom = atom3D('C',[1, 1, 1]) + >>> complex_mol = mol3D() + >>> C_atom = atom3D('C', [1, 1, 1]) >>> complex_mol.addAtom(C_atom) # Add carbon atom at cartesian position 1, 1, 1 to mol3D object. """ @@ -486,7 +474,13 @@ def BCM(self, idx1, idx2, d): d : float Bond distance in angstroms. - >>> complex_mol.BCM(1, 0, 1.5) # Set distance between atoms 0 and 1 to be 1.5 angstroms. Move atom 1. + >>> complex_mol = mol3D() + >>> complex_mol.addAtom(atom3D('H', [0, 0, 0])) + >>> complex_mol.addAtom(atom3D('H', [0, 0, 1])) + >>> complex_mol.BCM(1, 0, 0.7) # Set distance between atoms 0 and 1 to be 1.5 angstroms. Move atom 1. + >>> complex_mol.coordsvect() + array([[0. , 0. , 0. ], + [0. , 0. , 0.7]]) """ bondv = self.getAtom(idx1).distancev(self.getAtom(idx2)) # 1 - 2 @@ -1626,7 +1620,7 @@ def getBondedAtoms(self, idx): """ - if len(self.graph): # The graph exists. + if len(self.graph): # The graph exists. nats = list(np.nonzero(np.ravel(self.graph[idx]))[0]) else: ratom = self.getAtom(idx) @@ -2540,25 +2534,33 @@ def RCAngle(self, idx1, idx2, idx3, anglei, anglef, angleint=1.0, writegeo=False The directory to which generated reaction coordinate geoemtries are written, if writegeo=True. - >>> complex_mol.RCAngle(2, 1, 0, 90, 180, 0.5, True, 'rc_geometries') # Generate reaction coordinate - >>> # geometries using the given structure by changing the angle between atoms 2, 1, - >>> # and 0 from 90 degrees to 180 degrees in intervals of 0.5 degrees, and write the - >>> # generated geometries to 'rc_geometries' directory. - >>> complex_mol.RCAngle(2, 1, 0, 180, 90, -0.5) # Generate reaction coordinates - >>> # with the given geometry by changing the angle between atoms 2, 1, and 0 from - >>> # 180 degrees to 90 degrees in intervals of 0.5 degrees, and the generated - >>> # geometries will not be written to a directory. + >>> complex_mol = mol3D() + >>> complex_mol.addAtom(atom3D('O', [0, 0, 0])) + >>> complex_mol.addAtom(atom3D('H', [0, 0, 1])) + >>> complex_mol.addAtom(atom3D('H', [0, 1, 0])) + + Generate reaction coordinate geometries using the given structure by changing the angle between atoms 2, 1, + and 0 from 90 degrees to 160 degrees in intervals of 10 degrees + >>> complex_mol.RCAngle(2, 1, 0, 90, 160, 10) + [mol3D(O1H2), mol3D(O1H2), mol3D(O1H2), mol3D(O1H2), mol3D(O1H2), mol3D(O1H2), mol3D(O1H2), mol3D(O1H2)] + + Generate reaction coordinates with the given geometry by changing the angle between atoms 2, 1, and 0 from + 160 degrees to 90 degrees in intervals of 10 degrees, and the generated geometries will not be written to + a directory. + >>> complex_mol.RCAngle(2, 1, 0, 160, 90, -10) + [mol3D(O1H2), mol3D(O1H2), mol3D(O1H2), mol3D(O1H2), mol3D(O1H2), mol3D(O1H2), mol3D(O1H2), mol3D(O1H2)] """ if writegeo: os.mkdir(dir_name) - temp_list = [] - for ang_val in np.arange(anglei, anglef+angleint, angleint): - temp_angle = mol3D() - temp_angle.copymol3D(self) - temp_angle.ACM(idx1, idx2, idx3, ang_val) - temp_list.append(temp_angle) + temp_list = [] + for ang_val in np.arange(anglei, anglef+angleint, angleint): + temp_angle = mol3D() + temp_angle.copymol3D(self) + temp_angle.ACM(idx1, idx2, idx3, ang_val) + temp_list.append(temp_angle) + if writegeo: temp_angle.writexyz(str(dir_name)+"/rc_"+str(str("{:.4f}".format(ang_val)))+'.xyz') - return temp_list + return temp_list def RCDistance(self, idx1, idx2, disti, distf, distint=0.05, writegeo=False, dir_name='rc_distance_geometries'): """ @@ -2585,26 +2587,34 @@ def RCDistance(self, idx1, idx2, disti, distf, distint=0.05, writegeo=False, dir The directory to which generated reaction coordinate geoemtries are written if writegeo=True. - >>> complex_mol.RCDistance(1, 0, 1.0, 3.0, 0.01, True, 'rc_geometries') # Generate reaction coordinate - >>> # geometries using the given structure by changing the distance between atoms 1 and 0 - >>> # from 1.0 to 3.0 angstrom (atom 1 is moved) in intervals of 0.01 angstrom, and write - >>> # the generated geometries to 'rc_geometries' directory. - >>> complex_mol.RCDistance(1, 0, 3.0, 1.0, -0.02) # Generate reaction coordinates - >>> # geometries using the given structure by changing the distance between atoms 1 and 0 - >>> # from 3.0 to 1.0 angstrom (atom 1 is moved) in intervals of 0.02 angstrom, and - >>> # the generated geometries will not be written to a directory. + >>> complex_mol = mol3D() + >>> complex_mol.addAtom(atom3D('H', [0, 0, 0])) + >>> complex_mol.addAtom(atom3D('H', [0, 0, 1])) + + Generate reaction coordinate geometries using the given structure by changing the distance between atoms 1 and 0 + from 1.0 to 3.0 angstrom (atom 1 is moved) in intervals of 0.5 angstrom + >>> complex_mol.RCDistance(1, 0, 1.0, 3.0, 0.5) + [mol3D(H2), mol3D(H2), mol3D(H2), mol3D(H2), mol3D(H2)] + + Generate reaction coordinates + geometries using the given structure by changing the distance between atoms 1 and 0 + from 3.0 to 1.0 angstrom (atom 1 is moved) in intervals of 0.2 angstrom, and + the generated geometries will not be written to a directory. + >>> complex_mol.RCDistance(1, 0, 3.0, 1.0, -0.25) + [mol3D(H2), mol3D(H2), mol3D(H2), mol3D(H2), mol3D(H2), mol3D(H2), mol3D(H2), mol3D(H2), mol3D(H2)] """ if writegeo: os.mkdir(dir_name) - temp_list = [] - for dist_val in np.arange(disti, distf+distint, distint): - temp_dist = mol3D() - temp_dist.copymol3D(self) - temp_dist.BCM(idx1, idx2, dist_val) - temp_list.append(temp_dist) + temp_list = [] + for dist_val in np.arange(disti, distf+distint, distint): + temp_dist = mol3D() + temp_dist.copymol3D(self) + temp_dist.BCM(idx1, idx2, dist_val) + temp_list.append(temp_dist) + if writegeo: temp_dist.writexyz(str(dir_name)+"/rc_"+str(str("{:.4f}".format(dist_val)))+'.xyz') - return temp_list + return temp_list def returnxyz(self): """ @@ -4803,7 +4813,7 @@ def Structure_inspection(self, init_mol=None, catoms_arr=None, num_coord=5, dict return flag_oct, flag_list, dict_oct_info, flag_oct_loose, flag_list_loose def get_fcs(self, strict_cutoff=False, catom_list=None): - """ + """ Get first coordination shell of a transition metal complex. Parameters @@ -4828,7 +4838,7 @@ def get_fcs(self, strict_cutoff=False, catom_list=None): return fcs def get_bo_dict_from_inds(self, inds): - """ + """ Recreate bo_dict with correct indices Parameters @@ -4860,7 +4870,7 @@ def get_bo_dict_from_inds(self, inds): return new_bo_dict def create_mol_with_inds(self, inds): - """ + """ Create molecule with indices. Parameters @@ -4889,7 +4899,7 @@ def create_mol_with_inds(self, inds): return molnew def make_formula(self, latex=True): - """ + """ Get a chemical formula from the mol3D class instance. Parameters @@ -4924,7 +4934,7 @@ def make_formula(self, latex=True): return retstr def read_smiles(self, smiles, ff="mmff94", steps=2500): - """ + """ Read a smiles string and convert it to a mol3D class instance. Parameters @@ -4999,7 +5009,7 @@ def get_smiles(self, canonicalize=False, use_mol2=False) -> str: return smi def mols_symbols(self): - """ + """ Store symbols and their frequencies in symbols_dict attributes. """ self.symbols_dict = {} @@ -5010,7 +5020,7 @@ def mols_symbols(self): self.symbols_dict[atom.symbol()] += 1 def read_bonder_order(self, bofile): - """ + """ Get bond order information from file. Parameters @@ -5054,7 +5064,7 @@ def read_bonder_order(self, bofile): self.bodavrg_dict.update({ii: np.mean(devi)}) def read_charge(self, chargefile): - """ + """ Get charge information from file. Parameters @@ -5075,7 +5085,7 @@ def read_charge(self, chargefile): print(("chargefile does not exist.", chargefile)) def get_mol_graph_det(self, oct=True, useBOMat=False): - """ + """ Get molecular graph determinant. Parameters @@ -5484,7 +5494,7 @@ def get_features(self, lac=True, force_generate=False, eq_sym=False, return results def getMLBondLengths(self): - """ + """ Outputs the metal-ligand bond lengths in the complex. Returns @@ -5511,7 +5521,7 @@ def getMLBondLengths(self): @deprecated('Using this function might lead to inconsistent behavior.') def setAtoms(self, atoms): - """ + """ Set atoms of a mol3D class to atoms. Parameters @@ -5523,7 +5533,7 @@ def setAtoms(self, atoms): self.natoms = len(atoms) def setLoc(self, loc): - """ + """ Sets the conformation of an amino acid in the chain of a protein. Parameters diff --git a/molSimplify/Classes/monomer3D.py b/molSimplify/Classes/monomer3D.py index 2a654bb1..9fe5cb26 100644 --- a/molSimplify/Classes/monomer3D.py +++ b/molSimplify/Classes/monomer3D.py @@ -7,7 +7,8 @@ class monomer3D: - """Holds information about a monomer, used to do manipulations. Reads information from structure file (pdb) or is directly built from molsimplify. + """Holds information about a monomer, used to do manipulations. + Reads information from structure file (pdb) or is directly built from molsimplify. """ @@ -231,9 +232,6 @@ def addAtom(self, atom, index=None): Index of added atom. Default is None. auto_populate_BO_dict : bool, optional Populate bond order dictionary with newly added atom. Default is True. - - >>> C_atom = atom3D('C',[1, 1, 1]) - >>> complex_mol.addAtom(C_atom) # Add carbon atom at cartesian position 1, 1, 1 to mol3D object. """ if index is None: diff --git a/molSimplify/Classes/protein3D.py b/molSimplify/Classes/protein3D.py index 33d4275f..86461b03 100644 --- a/molSimplify/Classes/protein3D.py +++ b/molSimplify/Classes/protein3D.py @@ -238,12 +238,13 @@ def getMissingAtoms(self): Example demonstration of this method: >>> pdb_system = protein3D() - >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB - >>> for symbol_list in pdb_system.getMissingAtoms(): - >>> for symbol in symbol_list: - >>> print(symbol.sym) # Prints the symbol of missing atom - >>> print(symbol.coords()) # Prints the coordinates of the missing atom - they are all the - >>> # coordinates of origin by default (0.0,0.0,0.0) for missing atoms + >>> pdb_system.fetch_pdb('1MH1') # Fetch a PDB + fetched: 1MH1 + >>> missing_atoms = pdb_system.getMissingAtoms() + + List atoms in the first set of missing_atoms + >>> [atom.sym for atom in list(missing_atoms)[0]] + ['C', 'C', 'C', 'C', 'C', 'C', 'O'] """ return self.missing_atoms.values() @@ -253,10 +254,10 @@ def getMissingAAs(self): Example demonstration of this method: >>> pdb_system = protein3D() - >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB + >>> pdb_system.fetch_pdb('1MH1') # Fetch a PDB + fetched: 1MH1 >>> pdb_system.getMissingAAs() # This gives a list of monomer3D objects - >>> [pdb_system.getMissingAAs()[x].three_lc for x in range(len(pdb_system.getMissingAAs()))] # This returns - >>> # the list of missing AAs by their 3-letter codes + [monomer3D(VAL, id=182), monomer3D(LYS, id=183), monomer3D(LYS, id=184)] """ return self.missing_aas @@ -267,7 +268,9 @@ def countAAs(self): >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB + fetched: 1os7 >>> pdb_system.countAAs() # This return the number of AAs in the PDB for all the chains. + 1121 """ return self.naas @@ -292,8 +295,11 @@ def findAtom(self, sym="X", aa=True): Example demonstration of this method: >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB + fetched: 1os7 >>> pdb_system.findAtom(sym="S", aa=True) # Returns indices of sulphur atoms present in amino acids + [2166, 4442, 6733, 9041] >>> pdb_system.findAtom(sym="S", aa=False) # Returns indices of sulphur atoms present in heteromolecules + [9164, 9182, 9200] """ inds = [] if aa: @@ -329,8 +335,13 @@ def findAA(self, three_lc="XAA"): Example demonstration of this method: >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB - >>> pdb_system.findAA(three_lc = 'MET') # Returns a set of pairs where each pair is a combination of the chain name - >>> # and the index of the amino acid specified (in this case, 'MET') + fetched: 1os7 + + Return a set of pairs where each pair is a combination of the chain name and + the index of the amino acid specified (in this case, 'MET') + >>> aa_set = pdb_system.findAA(three_lc = 'MET') + >>> sorted(aa_set) # Sorting for reproducible order in doctest + [('A', 268), ('B', 268), ('C', 268), ('D', 268)] """ inds = set() for aa in self.aas.values(): @@ -355,24 +366,25 @@ def getChain(self, chain_id): Example demonstration of this method: >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB - >>> pdb_system.getChain('A') # Get chain A of the PDB + fetched: 1os7 + >>> pdb_system.getChain('A') # doctest: +SKIP """ p = protein3D() p.setPDBCode(self.pdbCode) p.setChains({chain_id: self.chains[chain_id]}) p.setR(self.R) p.setRfree(self.Rfree) - + missing_aas = [] for aa in self.missing_aas: if aa.chain == chain_id: - missing_aas.append(aa) + missing_aas.append(aa) p.setMissingAAs(missing_aas) aas = {} for aa in self.aas: if aa[0] == chain_id: - aas[aa] = self.aas[aa] + aas[aa] = self.aas[aa] p.setAAs(aas) gone_atoms = {} @@ -381,7 +393,7 @@ def getChain(self, chain_id): gone_atoms[aa] = self.missing_atoms[aa] p.setMissingAtoms(gone_atoms) - hets_flipped = {value[0]:key for key, value in self.hetmols.items()} + hets_flipped = {value[0]: key for key, value in self.hetmols.items()} atoms = {} a_ids = {} hets = {} @@ -411,7 +423,7 @@ def getChain(self, chain_id): bonds[a] = set() for b in self.bonds[a]: if b in p.atoms.values(): - bonds[a].add(b) + bonds[a].add(b) p.setBonds(bonds) p.setIndices(a_ids) @@ -438,13 +450,17 @@ def getMolecule(self, a_id, aas_only=False): Example demonstration of this method: >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB - >>> pdb_system.getMolecule(a_id=2166) # This returns an molSimplify.Classes.monomer3D obejct indicating - >>> # that the atom is part of an amino acid or nucleotide - >>> pdb_system.getMolecule(a_id=2166).three_lc() # This prints the three letter code of the amino acid or - >>> # nucleotide of which atom 2166 is a part of - >>> pdb_system.getMolecule(a_id=9164) # This returns a mol3D object indicating that the atom is part of a molecule - >>> # that is not an amino acid or nucleotide + fetched: 1os7 + + This returns an molSimplify.Classes.monomer3D object indicating that the atom is part of an amino acid or nucleotide: + >>> pdb_system.getMolecule(a_id=2166) + monomer3D(MET, id=268) + + This returns a mol3D object indicating that the atom is part of a molecule that is not an amino acid or nucleotide + >>> pdb_system.getMolecule(a_id=9164) + mol3D(S1O3N1C2) >>> pdb_system.getMolecule(a_id=9164).name # This prints the name of the molecule, in this case, it is 'TAU' + 'TAU' """ for s in self.aas.values(): for mol in s: # mol is monomer3D @@ -471,6 +487,7 @@ def stripAtoms(self, atoms_stripped): Example demonstration of this method: >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB + fetched: 1os7 >>> pdb_system.stripAtoms([2166, 4442, 6733, 2165]) # This removes the list of atoms with >>> # indices listedin the code """ @@ -550,8 +567,9 @@ def stripHetMol(self, hetmol): Example demonstration of this method: >>> pdb_system = protein3D() - >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB - >>> pdb_system.stripHetMol() + >>> pdb_system.fetch_pdb('3I40') # Fetch a PDB + fetched: 3I40 + >>> pdb_system.stripHetMol('HOH') """ hets = self.hetmols.copy() for k in hets.keys(): @@ -563,7 +581,10 @@ def stripHetMol(self, hetmol): for a in m.atoms: ids.append(self.a_ids[a]) self.stripAtoms(ids) - del self.hetmols[k] + try: # RM 2023/04/22: I don't think this is necessary as stripAtoms takes care of deleting the hetmol + del self.hetmols[k] + except KeyError: + pass def findMetal(self, transition_metals_only=True): """Find metal(s) in a protein3D class. @@ -581,6 +602,9 @@ def findMetal(self, transition_metals_only=True): >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') + fetched: 1os7 + >>> pdb_system.findMetal() + [9160, 9178, 9196, 9214] """ if not self.metals: metal_list = [] @@ -914,11 +938,13 @@ def readMetaData(self): print("warning: %s not found.\n" % pdbCode) else: try: - ### We then use beautiful soup to read the XML doc. LXML is an XML reader. The soup object is what we then use to parse! + # We then use beautiful soup to read the XML doc. LXML is an XML reader. + # The soup object is what we then use to parse! soup = BeautifulSoup(xml_doc.content, 'lxml-xml') - ### We can then use methods of the soup object to find "tags" within the XML file. This is how we would extract sections. - ### This is an example of getting everything with a "sec" tag. + # We can then use methods of the soup object to find "tags" within the XML file. + # This is how we would extract sections. + # This is an example of getting everything with a "sec" tag. body = soup.find_all('wwPDB-validation-information') entry = body[0].find_all("Entry") if "DataCompleteness" not in entry[0].attrs.keys(): @@ -986,7 +1012,8 @@ def setEDIAScores(self): The 4-character code of the protein3D class. """ code = self.pdbCode - cmd = 'curl -d \'{"edia":{ "pdbCode":"'+code+'"}}\' -H "Accept: application/json" -H "Content-Type: application/json" -X POST https://proteins.plus/api/edia_rest -k' + cmd = ('curl -d \'{"edia":{ "pdbCode":"'+code+'"}}\' -H "Accept: application/json"' + ' -H "Content-Type: application/json" -X POST https://proteins.plus/api/edia_rest -k') args = shlex.split(cmd) result = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) From 70f1fa68daf43ae372affe1d990aaef161862d2b Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Wed, 26 Apr 2023 20:39:32 -0400 Subject: [PATCH 13/17] Convert to scripts with main() functions --- ...measure_HFX_sensitivity_oxo_hat_reb_rel.py | 34 +- .../HFXsensitivity/measure_HFX_stable.py | 37 +- .../Informatics/bridge_functionalizer.py | 382 +++++++++--------- .../Informatics/frag_functionalizer.py | 364 ++++++++--------- molSimplify/Ligands/alphabetizer.py | 27 +- molSimplify/Scripts/convert_2to3.py | 17 +- molSimplify/Scripts/in_b3lyp_usetc.py | 163 ++++---- molSimplify/python_nn/dictionary_toolbox.py | 40 +- 8 files changed, 552 insertions(+), 512 deletions(-) diff --git a/molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py b/molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py index 420955bd..1e3feec5 100644 --- a/molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py +++ b/molSimplify/Informatics/HFXsensitivity/measure_HFX_sensitivity_oxo_hat_reb_rel.py @@ -422,17 +422,23 @@ def slope_sign_check(X, y, name, prop, num_points): return kept_points_X, kept_points_y, removed_dict_list -parser = argparse.ArgumentParser(description='Script to process some sensitivity data.') -parser.add_argument('--data', dest='path_to_csv', action='store', type=str, required=True, - help='Path to CSV containing raw data.') -parser.add_argument('--writepath', dest='path_to_write', action='store', type=str, default=False, - help='Path to dump processed data. Defaults to dumping in script directory.') -parser.add_argument('--R2', dest='R2_cutoff', action='store', type=float, default=0.99, - help='R2 check cutoff value for linearity. Default is 0.99.') -parser.add_argument('--cutoff', dest='CV_tolerance', action='store', type=int, default=5, - help='Heuristic cutoff for eliminating outliers. Defaults to 5 for SSE.') -parser.add_argument('--num_points', dest='num_points', action='store', type=int, default=4, - help='Minimum number of points to form the HFX line. Defaults to 4.') -args = parser.parse_args() -print(args) -measure_sensitivity(args.path_to_csv, args.path_to_write, args.R2_cutoff, args.CV_tolerance, args.num_points) +def main(): + parser = argparse.ArgumentParser(description='Script to process some sensitivity data.') + parser.add_argument('--data', dest='path_to_csv', action='store', type=str, required=True, + help='Path to CSV containing raw data.') + parser.add_argument('--writepath', dest='path_to_write', action='store', type=str, default=False, + help='Path to dump processed data. Defaults to dumping in script directory.') + parser.add_argument('--R2', dest='R2_cutoff', action='store', type=float, default=0.99, + help='R2 check cutoff value for linearity. Default is 0.99.') + parser.add_argument('--cutoff', dest='CV_tolerance', action='store', type=int, default=5, + help='Heuristic cutoff for eliminating outliers. Defaults to 5 for SSE.') + parser.add_argument('--num_points', dest='num_points', action='store', type=int, default=4, + help='Minimum number of points to form the HFX line. Defaults to 4.') + args = parser.parse_args() + print(args) + measure_sensitivity(args.path_to_csv, args.path_to_write, args.R2_cutoff, args.CV_tolerance, args.num_points) + + +if __name__ == "__main__": + main() + diff --git a/molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py b/molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py index 603bc7ab..7af407ca 100644 --- a/molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py +++ b/molSimplify/Informatics/HFXsensitivity/measure_HFX_stable.py @@ -326,19 +326,24 @@ def slope_sign_check(X, y, name, prop, num_points): return kept_points_X, kept_points_y, removed_dict_list -parser = argparse.ArgumentParser(description='Script to process some sensitivity data.') -parser.add_argument('--data', dest='path_to_csv', action='store', type=str, required=True, - help='Path to CSV containing raw data.') -parser.add_argument('--writepath', dest='path_to_write', action='store', type=str,default=False, - help='Path to dump processed data. Defaults to dumping in script directory.') -parser.add_argument('--prop', dest='prop', action='store', type=str, default='SSE', - help='Name for the property as in the CSV. Should be "SSE" for spin splitting.') -parser.add_argument('--R2', dest='R2_cutoff', action='store', type=float, default=0.99, - help='R2 check cutoff value for linearity. Default is 0.99.') -parser.add_argument('--cutoff', dest='CV_tolerance', action='store', type=int, default=5, - help='Heuristic cutoff for eliminating outliers. Defaults to 5 for SSE.') -parser.add_argument('--num_points', dest='num_points', action='store', type=int, default=4, - help='Minimum number of points to form the HFX line. Defaults to 4.') -args = parser.parse_args() -print(args) -measure_sensitivity(args.path_to_csv, args.path_to_write, args.prop, args.R2_cutoff, args.CV_tolerance, args.num_points) +def main(): + parser = argparse.ArgumentParser(description='Script to process some sensitivity data.') + parser.add_argument('--data', dest='path_to_csv', action='store', type=str, required=True, + help='Path to CSV containing raw data.') + parser.add_argument('--writepath', dest='path_to_write', action='store', type=str,default=False, + help='Path to dump processed data. Defaults to dumping in script directory.') + parser.add_argument('--prop', dest='prop', action='store', type=str, default='SSE', + help='Name for the property as in the CSV. Should be "SSE" for spin splitting.') + parser.add_argument('--R2', dest='R2_cutoff', action='store', type=float, default=0.99, + help='R2 check cutoff value for linearity. Default is 0.99.') + parser.add_argument('--cutoff', dest='CV_tolerance', action='store', type=int, default=5, + help='Heuristic cutoff for eliminating outliers. Defaults to 5 for SSE.') + parser.add_argument('--num_points', dest='num_points', action='store', type=int, default=4, + help='Minimum number of points to form the HFX line. Defaults to 4.') + args = parser.parse_args() + print(args) + measure_sensitivity(args.path_to_csv, args.path_to_write, args.prop, args.R2_cutoff, args.CV_tolerance, args.num_points) + + +if __name__ == "__main__": + main() diff --git a/molSimplify/Informatics/bridge_functionalizer.py b/molSimplify/Informatics/bridge_functionalizer.py index dfc92ffe..2ca316d6 100644 --- a/molSimplify/Informatics/bridge_functionalizer.py +++ b/molSimplify/Informatics/bridge_functionalizer.py @@ -12,26 +12,28 @@ ''' functional groups considered ''' -funcs = {'C6=CC=CC=C6':'phenyl', - 'C':'methyl', - 'F':'fluoro', - 'C(F)(F)F':'tetrafluoro', - 'Br':'bromo', - 'Cl':'chloro', - 'I':'iodo', - 'C#N':'cyano', - 'N':'amino', - 'O':'hydroxy', - 'S':'thiol'} +funcs = { + 'C6=CC=CC=C6': 'phenyl', + 'C': 'methyl', + 'F': 'fluoro', + 'C(F)(F)F': 'tetrafluoro', + 'Br': 'bromo', + 'Cl': 'chloro', + 'I': 'iodo', + 'C#N': 'cyano', + 'N': 'amino', + 'O': 'hydroxy', + 'S': 'thiol' +} + -''' -takes in a batch of synthesized macrocycles -''' -input_file = sys.argv[1] def read_synthesized_macrocycles(input_file): - with open(sys.argv[1],'r') as f: + ''' + takes in a batch of synthesized macrocycles + ''' + with open(input_file, 'r') as f: data = json.load(f) - print('----',len(data)) + print('----', len(data)) counter = 0 temp_list_first = [] for i, row in enumerate(data): @@ -46,12 +48,10 @@ def read_synthesized_macrocycles(input_file): temp_list_first.append(temp_dict) return temp_list_first -temp_list = read_synthesized_macrocycles(input_file) - def split_at_idx(smiles, idx): alphabet_indices = [i for i, val in enumerate(smiles) if val.isalpha()] - forbidden_end = ['=', '/', '\\', ')','['] + forbidden_end = ['=', '/', '\\', ')', '['] if (idx+1) == len(alphabet_indices): left = smiles right = '' @@ -63,181 +63,191 @@ def split_at_idx(smiles, idx): left = left[:-1] return left, right + def compatibility(func, bridge): - # not_compatible = ['C','O','S','X','x','none','NONE','N=','P='] - not_compatible = ['O','S','X','x','none','NONE','N=','P='] - if ((funcs[func] == 'phenyl') and any([bridge==val for val in not_compatible])): + # not_compatible = ['C', 'O', 'S', 'X', 'x', 'none', 'NONE','N=','P='] + not_compatible = ['O', 'S', 'X', 'x', 'none', 'NONE', 'N=', 'P='] + if ((funcs[func] == 'phenyl') and any([bridge == val for val in not_compatible])): return False - elif bridge in ['x','X','none','NONE','O','S', 'N=','P=']: + elif bridge in ['O', 'S', 'X', 'x', 'none', 'NONE', 'N=', 'P=']: return False - elif bridge in ['N','P']: - ### Only allow functional groups at CH bonds + elif bridge in ['N', 'P']: + # Only allow functional groups at CH bonds return False else: return True + def count_atoms(smiles): return len([val for val in smiles if val.isalpha()]) -''' -to functionalize, you need to start at the first one and move inwards in the order -of 1 --> 2 --> 3 --> 4 -''' -double_func = ['C'] # cases that have more than one functionalization -rounds = 0 -lig_func_list = [] -func_lig_list = [] -functionalized_counter = 0 -for lignum, ligand in enumerate(temp_list): - - used_func_groups = set() - for func in list(funcs.keys()): - temp_ligand = ligand.copy() - rounds += 1 - func_shift = count_atoms(func) - smicat = temp_ligand['coord_atoms_smicat'].copy() - smicat_zero = temp_ligand['coord_atoms_zero_index'].copy() - if 'frag_func_smiles' in list(ligand.keys()): - smiles = temp_ligand['frag_func_smiles'] - original = temp_ligand['frag_func_smiles'] - else: - smiles = temp_ligand['macrocycle_smiles'] - original = temp_ligand['macrocycle_smiles'] - if temp_ligand['bridge1'] in double_func: - bridge1_double_func = True - else: - bridge1_double_func = False - if temp_ligand['bridge2'] in double_func: - bridge2_double_func = True - else: - bridge2_double_func = False - if temp_ligand['bridge3'] in double_func: - bridge3_double_func = True - else: - bridge3_double_func = False - inds1 = temp_ligand['bridge1_func'].copy() - inds2 = temp_ligand['bridge2_func'].copy() - inds3 = temp_ligand['bridge3_func'][0].copy() - inds4 = temp_ligand['bridge3_func'][1].copy() - func_counter = 0 - shifter2 = 0 - shifter3 = 0 - shifter4 = 0 - if (isinstance(inds4, list) and compatibility(func, temp_ligand['bridge3'])): - for ind in reversed(inds4): - left, right = split_at_idx(smiles,ind) - if bridge3_double_func: - smiles = left+'('+func+')'+'('+func+')'+right - func_counter += 2 - used_func_groups.add(func) - else: - smiles = left+'('+func+')'+right - func_counter += 1 - used_func_groups.add(func) - if (isinstance(inds2, list) and compatibility(func, temp_ligand['bridge2'])): - for ind in reversed(inds2): - left, right = split_at_idx(smiles,ind) - if bridge2_double_func: - smiles = left+'('+func+')'+'('+func+')'+right - func_counter += 2 - shifter4 += 2*func_shift - used_func_groups.add(func) - else: - smiles = left+'('+func+')'+right - func_counter += 1 - shifter4 += func_shift - used_func_groups.add(func) - if (isinstance(inds3, list) and compatibility(func, temp_ligand['bridge3'])): - for ind in reversed(inds3): - left, right = split_at_idx(smiles,ind) - if bridge3_double_func: - smiles = left+'('+func+')'+'('+func+')'+right - func_counter += 2 - shifter4 += 2*func_shift - shifter3 += 2*func_shift - used_func_groups.add(func) - else: - smiles = left+'('+func+')'+right - func_counter += 1 - shifter4 += func_shift - shifter3 += func_shift - used_func_groups.add(func) - if (isinstance(inds1, list) and compatibility(func, temp_ligand['bridge1'])): - for ind in reversed(inds1): - left, right = split_at_idx(smiles,ind) - if bridge1_double_func: - smiles = left+'('+func+')'+'('+func+')'+right - func_counter += 2 - shifter4 += 2*func_shift - shifter3 += 2*func_shift - shifter2 += 2*func_shift - used_func_groups.add(func) - else: - smiles = left+'('+func+')'+right - func_counter += 1 - shifter4 += func_shift - shifter3 += func_shift - shifter2 += func_shift - used_func_groups.add(func) - - # This is required because the bridging atoms are in the same ring as the coordination atoms and mess that up if not. - smicat[1] += shifter2 - smicat[2] += shifter3 - smicat[3] += shifter4 - - smicat_zero[1] += shifter2 - smicat_zero[2] += shifter3 - smicat_zero[3] += shifter4 - if func_counter > 0: - m = Chem.MolFromSmiles(smiles) - if m != None: - functionalized_counter += 1 - # print(smicat, shifter2, shifter3, shifter4, funcs[func], smiles, temp_ligand['coord_atoms_smicat']) - # sard - func_canonical = Chem.MolToSmiles(m, canonical=True, isomericSmiles=False) - temp_ligand['bridge_func_smiles'] = smiles - temp_ligand['bridge_func_canonical'] = func_canonical - temp_ligand['bridge_func_smiles_formula'] = CalcMolFormula(m) - temp_ligand['bridge_func_size'] = count_atoms(smiles) - temp_ligand['bridge_func_group'] = funcs[func] - temp_ligand['bridge_func_counter'] = func_counter - temp_ligand['bridge_func_coord_atoms_smicat'] = smicat - temp_ligand['bridge_func_coord_atoms_zero_index'] = smicat_zero - temp_ligand['func_name'] = temp_ligand['name']+'_bridge'+str(funcs[func])+str(func_counter) - func_lig_list.append(temp_ligand) + +def main(temp_list): + ''' + to functionalize, you need to start at the first one and move inwards in the order + of 1 --> 2 --> 3 --> 4 + ''' + double_func = ['C'] # cases that have more than one functionalization + rounds = 0 + lig_func_list = [] + func_lig_list = [] + functionalized_counter = 0 + for lignum, ligand in enumerate(temp_list): + + used_func_groups = set() + for func in list(funcs.keys()): + temp_ligand = ligand.copy() + rounds += 1 + func_shift = count_atoms(func) + smicat = temp_ligand['coord_atoms_smicat'].copy() + smicat_zero = temp_ligand['coord_atoms_zero_index'].copy() + if 'frag_func_smiles' in list(ligand.keys()): + smiles = temp_ligand['frag_func_smiles'] + original = temp_ligand['frag_func_smiles'] + else: + smiles = temp_ligand['macrocycle_smiles'] + original = temp_ligand['macrocycle_smiles'] + if temp_ligand['bridge1'] in double_func: + bridge1_double_func = True else: - temp_ligand['bridge_func_smiles'] = smiles - temp_ligand['bridge_func_canonical'] = False - temp_ligand['bridge_func_smiles_formula'] = CalcMolFormula(m) - temp_ligand['bridge_func_size'] = count_atoms(smiles) - temp_ligand['bridge_func_group'] = funcs[func] - temp_ligand['bridge_func_counter'] = func_counter - temp_ligand['bridge_func_coord_atoms_smicat'] = smicat - temp_ligand['bridge_func_coord_atoms_zero_index'] = smicat_zero - temp_ligand['func_name'] = temp_ligand['name']+'_bridge'+str(funcs[func])+str(func_counter) - if not os.path.exists('failed_bridge_func.csv'): - df = pd.DataFrame([temp_ligand]) - df.to_csv('failed_bridge_func.csv',index=False) + bridge1_double_func = False + if temp_ligand['bridge2'] in double_func: + bridge2_double_func = True + else: + bridge2_double_func = False + if temp_ligand['bridge3'] in double_func: + bridge3_double_func = True + else: + bridge3_double_func = False + inds1 = temp_ligand['bridge1_func'].copy() + inds2 = temp_ligand['bridge2_func'].copy() + inds3 = temp_ligand['bridge3_func'][0].copy() + inds4 = temp_ligand['bridge3_func'][1].copy() + func_counter = 0 + shifter2 = 0 + shifter3 = 0 + shifter4 = 0 + if (isinstance(inds4, list) and compatibility(func, temp_ligand['bridge3'])): + for ind in reversed(inds4): + left, right = split_at_idx(smiles, ind) + if bridge3_double_func: + smiles = left+'('+func+')'+'('+func+')'+right + func_counter += 2 + used_func_groups.add(func) + else: + smiles = left+'('+func+')'+right + func_counter += 1 + used_func_groups.add(func) + if (isinstance(inds2, list) and compatibility(func, temp_ligand['bridge2'])): + for ind in reversed(inds2): + left, right = split_at_idx(smiles, ind) + if bridge2_double_func: + smiles = left+'('+func+')'+'('+func+')'+right + func_counter += 2 + shifter4 += 2*func_shift + used_func_groups.add(func) + else: + smiles = left+'('+func+')'+right + func_counter += 1 + shifter4 += func_shift + used_func_groups.add(func) + if (isinstance(inds3, list) and compatibility(func, temp_ligand['bridge3'])): + for ind in reversed(inds3): + left, right = split_at_idx(smiles, ind) + if bridge3_double_func: + smiles = left+'('+func+')'+'('+func+')'+right + func_counter += 2 + shifter4 += 2*func_shift + shifter3 += 2*func_shift + used_func_groups.add(func) + else: + smiles = left+'('+func+')'+right + func_counter += 1 + shifter4 += func_shift + shifter3 += func_shift + used_func_groups.add(func) + if (isinstance(inds1, list) and compatibility(func, temp_ligand['bridge1'])): + for ind in reversed(inds1): + left, right = split_at_idx(smiles, ind) + if bridge1_double_func: + smiles = left+'('+func+')'+'('+func+')'+right + func_counter += 2 + shifter4 += 2*func_shift + shifter3 += 2*func_shift + shifter2 += 2*func_shift + used_func_groups.add(func) + else: + smiles = left+'('+func+')'+right + func_counter += 1 + shifter4 += func_shift + shifter3 += func_shift + shifter2 += func_shift + used_func_groups.add(func) + + # This is required because the bridging atoms are in the same ring as the coordination atoms and mess that up if not. + smicat[1] += shifter2 + smicat[2] += shifter3 + smicat[3] += shifter4 + + smicat_zero[1] += shifter2 + smicat_zero[2] += shifter3 + smicat_zero[3] += shifter4 + if func_counter > 0: + m = Chem.MolFromSmiles(smiles) + if m is not None: + functionalized_counter += 1 + # print(smicat, shifter2, shifter3, shifter4, funcs[func], smiles, temp_ligand['coord_atoms_smicat']) + # sard + func_canonical = Chem.MolToSmiles(m, canonical=True, isomericSmiles=False) + temp_ligand['bridge_func_smiles'] = smiles + temp_ligand['bridge_func_canonical'] = func_canonical + temp_ligand['bridge_func_smiles_formula'] = CalcMolFormula(m) + temp_ligand['bridge_func_size'] = count_atoms(smiles) + temp_ligand['bridge_func_group'] = funcs[func] + temp_ligand['bridge_func_counter'] = func_counter + temp_ligand['bridge_func_coord_atoms_smicat'] = smicat + temp_ligand['bridge_func_coord_atoms_zero_index'] = smicat_zero + temp_ligand['func_name'] = temp_ligand['name']+'_bridge'+str(funcs[func])+str(func_counter) + func_lig_list.append(temp_ligand) else: - df = pd.read_csv('failed_bridge_func.csv') - new_entry = pd.DataFrame([temp_ligand]) - new_df = pd.concat([df,new_entry],axis=0) - new_df.to_csv('failed_bridge_func.csv',index=False) - print('counter',lignum, rounds, func_counter) - temp_dict = {} - temp_dict['name'] = ligand['name'] - temp_dict['charge'] = ligand['charge'] - temp_dict['func_groups'] = list(used_func_groups) - lig_func_list.append(temp_dict) - -print('==== FUNCTIONALIZED', functionalized_counter, rounds) -with open(os.getcwd()+'/bridge_functionalized_synthesized_ligands.json', 'w') as fout: - json.dump(func_lig_list, fout) -if not os.path.exists('successful_bridge_functionalizations.csv'): - df = pd.DataFrame(lig_func_list) - df.to_csv('successful_bridge_functionalizations.csv',index=False) -else: - df = pd.read_csv('successful_bridge_functionalizations.csv') - new_entry = pd.DataFrame(lig_func_list) - new_df = pd.concat([df,new_entry],axis=0) - new_df.to_csv('successful_bridge_functionalizations.csv',index=False) + temp_ligand['bridge_func_smiles'] = smiles + temp_ligand['bridge_func_canonical'] = False + temp_ligand['bridge_func_smiles_formula'] = CalcMolFormula(m) + temp_ligand['bridge_func_size'] = count_atoms(smiles) + temp_ligand['bridge_func_group'] = funcs[func] + temp_ligand['bridge_func_counter'] = func_counter + temp_ligand['bridge_func_coord_atoms_smicat'] = smicat + temp_ligand['bridge_func_coord_atoms_zero_index'] = smicat_zero + temp_ligand['func_name'] = temp_ligand['name']+'_bridge'+str(funcs[func])+str(func_counter) + if not os.path.exists('failed_bridge_func.csv'): + df = pd.DataFrame([temp_ligand]) + df.to_csv('failed_bridge_func.csv', index=False) + else: + df = pd.read_csv('failed_bridge_func.csv') + new_entry = pd.DataFrame([temp_ligand]) + new_df = pd.concat([df, new_entry], axis=0) + new_df.to_csv('failed_bridge_func.csv', index=False) + print('counter',lignum, rounds, func_counter) + temp_dict = {} + temp_dict['name'] = ligand['name'] + temp_dict['charge'] = ligand['charge'] + temp_dict['func_groups'] = list(used_func_groups) + lig_func_list.append(temp_dict) + + print('==== FUNCTIONALIZED', functionalized_counter, rounds) + with open(os.getcwd()+'/bridge_functionalized_synthesized_ligands.json', 'w') as fout: + json.dump(func_lig_list, fout) + if not os.path.exists('successful_bridge_functionalizations.csv'): + df = pd.DataFrame(lig_func_list) + df.to_csv('successful_bridge_functionalizations.csv', index=False) + else: + df = pd.read_csv('successful_bridge_functionalizations.csv') + new_entry = pd.DataFrame(lig_func_list) + new_df = pd.concat([df, new_entry], axis=0) + new_df.to_csv('successful_bridge_functionalizations.csv', index=False) + + +if __name__ == "__main__": + input_file = sys.argv[1] + temp_list = read_synthesized_macrocycles(input_file) + main(temp_list) diff --git a/molSimplify/Informatics/frag_functionalizer.py b/molSimplify/Informatics/frag_functionalizer.py index b0e97b15..1a8ee636 100644 --- a/molSimplify/Informatics/frag_functionalizer.py +++ b/molSimplify/Informatics/frag_functionalizer.py @@ -12,27 +12,26 @@ ''' functional groups considered ''' -funcs = {'cccc': 'phenyl', - 'C': 'methyl', - 'F': 'fluoro', - 'C(F)(F)F': 'tetrafluoro', - 'Br': 'bromo', - 'Cl': 'chloro', - 'I': 'iodo', - 'C#N': 'cyano', - 'N': 'amino', - 'O': 'hydroxy', - 'S': 'thiol'} - - -''' -takes in a batch of synthesized macrocycles -''' -input_file = sys.argv[1] +funcs = { + 'cccc': 'phenyl', + 'C': 'methyl', + 'F': 'fluoro', + 'C(F)(F)F': 'tetrafluoro', + 'Br': 'bromo', + 'Cl': 'chloro', + 'I': 'iodo', + 'C#N': 'cyano', + 'N': 'amino', + 'O': 'hydroxy', + 'S': 'thiol' +} def read_synthesized_macrocycles(input_file): - with open(sys.argv[1], 'r') as f: + ''' + takes in a batch of synthesized macrocycles + ''' + with open(input_file, 'r') as f: data = json.load(f) print('----', len(data)) counter = 0 @@ -50,27 +49,6 @@ def read_synthesized_macrocycles(input_file): return temp_list_first -temp_list_first = read_synthesized_macrocycles(input_file) - -''' -In this section, we disable functionalizations on -certain fragments after the fact. Here we dont allow -functionalizations on any pyrans. -''' -temp_list = [] -for val in temp_list_first: - new_dict = val.copy() - if 'pyran' in new_dict['frag1']: - new_dict['frag1_func'] = False - if 'pyran' in new_dict['frag2']: - new_dict['frag2_func'] = False - if 'pyran' in new_dict['frag3']: - new_dict['frag3_func'] = False - if 'pyran' in new_dict['frag4']: - new_dict['frag4_func'] = False - temp_list.append(new_dict) - - def count_atoms(smiles): return len([val for val in smiles if val.isalpha()]) @@ -90,151 +68,175 @@ def split_at_idx(smiles, idx): return left, right -''' -to functionalize, you need to start at the first one and move inwards in the order -of 1 --> 2 --> 3 --> 4 - -only certain rings are compatible with the phthalocyanine style phenyl functionalization. -''' -phthalo_compat = ['imidazole2ylidine', 'furan', 'pyrrole', 'phosphole', 'thiophene'] -double_func = [] # We populate this list if we want to doubly functionalize a given carbon on a fragment. -functionalized_ligands = [] -rounds = 0 -for lignum, ligand in enumerate(temp_list): - used_func_groups = set() - for func in list(funcs.keys()): - temp_ligand = ligand.copy() - rounds += 1 - smiles = temp_ligand['macrocycle_smiles'] - counter = 5 - func_counter = 0 - if func == 'cccc': - if temp_ligand['frag1'] in phthalo_compat: - inds1 = temp_ligand['frag1_func'] - inds2 = temp_ligand['frag2_func'] - left, right = split_at_idx(smiles, inds1[0]) - if '=' in right: - additional = 'C=CC=C' +def main(temp_list): + ''' + to functionalize, you need to start at the first one and move inwards in the order + of 1 --> 2 --> 3 --> 4 + + only certain rings are compatible with the phthalocyanine style phenyl functionalization. + ''' + phthalo_compat = ['imidazole2ylidine', 'furan', 'pyrrole', 'phosphole', 'thiophene'] + double_func = [] # We populate this list if we want to doubly functionalize a given carbon on a fragment. + functionalized_ligands = [] + rounds = 0 + for lignum, ligand in enumerate(temp_list): + used_func_groups = set() + for func in list(funcs.keys()): + temp_ligand = ligand.copy() + rounds += 1 + smiles = temp_ligand['macrocycle_smiles'] + counter = 5 + func_counter = 0 + if func == 'cccc': + if temp_ligand['frag1'] in phthalo_compat: + inds1 = temp_ligand['frag1_func'] + inds2 = temp_ligand['frag2_func'] + left, right = split_at_idx(smiles, inds1[0]) + if '=' in right: + additional = 'C=CC=C' + else: + additional = '=CC=CC' + smiles = left+str(counter)+right+additional+str(counter) + counter += 1 + func_counter += 1 + left2, right2 = split_at_idx(smiles, inds2[0]) + new_right2 = right2.split(')', 1) + if '=' in new_right2[0]: + additional = 'C=CC=C' + else: + additional = '=CC=CC' + smiles = left2+str(counter)+new_right2[0]+additional+str(counter)+')'+new_right2[1] + counter += 1 + func_counter += 1 + used_func_groups.add(funcs[func]) + if temp_ligand['frag3'] in phthalo_compat: + inds3 = temp_ligand['frag3_func'] + inds4 = temp_ligand['frag4_func'] + left, right = split_at_idx(smiles, inds3[0]) + new_right1 = right.split(')', 1) + if '=' in new_right1[0]: + additional = 'C=CC=C' + else: + additional = '=CC=CC' + smiles = left+str(counter)+new_right1[0]+additional+str(counter)+')'+new_right1[1] + counter += 1 + func_counter += 1 + left2, right2 = split_at_idx(smiles, inds4[0]) + new_right2 = right2.split(')', 1) + if '=' in new_right2[0]: + additional = 'C=CC=C' + else: + additional = '=CC=CC' + smiles = left2+str(counter)+new_right2[0]+additional+str(counter)+')'+new_right2[1] + counter += 1 + func_counter += 1 + used_func_groups.add(funcs[func]) + else: + if temp_ligand['frag1'] in double_func: + frag1_double_func = True else: - additional = '=CC=CC' - smiles = left+str(counter)+right+additional+str(counter) - counter += 1 - func_counter += 1 - left2, right2 = split_at_idx(smiles, inds2[0]) - new_right2 = right2.split(')', 1) - if '=' in new_right2[0]: - additional = 'C=CC=C' + frag1_double_func = False + if temp_ligand['frag3'] in double_func: + frag2_double_func = True else: - additional = '=CC=CC' - smiles = left2+str(counter)+new_right2[0]+additional+str(counter)+')'+new_right2[1] - counter += 1 - func_counter += 1 - used_func_groups.add(funcs[func]) - if temp_ligand['frag3'] in phthalo_compat: + frag2_double_func = False + inds1 = temp_ligand['frag1_func'] + inds2 = temp_ligand['frag2_func'] inds3 = temp_ligand['frag3_func'] inds4 = temp_ligand['frag4_func'] - left, right = split_at_idx(smiles, inds3[0]) - new_right1 = right.split(')', 1) - if '=' in new_right1[0]: - additional = 'C=CC=C' - else: - additional = '=CC=CC' - smiles = left+str(counter)+new_right1[0]+additional+str(counter)+')'+new_right1[1] - counter += 1 - func_counter += 1 - left2, right2 = split_at_idx(smiles, inds4[0]) - new_right2 = right2.split(')', 1) - if '=' in new_right2[0]: - additional = 'C=CC=C' - else: - additional = '=CC=CC' - smiles = left2+str(counter)+new_right2[0]+additional+str(counter)+')'+new_right2[1] - counter += 1 - func_counter += 1 - used_func_groups.add(funcs[func]) - else: - if temp_ligand['frag1'] in double_func: - frag1_double_func = True - else: - frag1_double_func = False - if temp_ligand['frag3'] in double_func: - frag2_double_func = True - else: - frag2_double_func = False - inds1 = temp_ligand['frag1_func'] - inds2 = temp_ligand['frag2_func'] - inds3 = temp_ligand['frag3_func'] - inds4 = temp_ligand['frag4_func'] - func_inds1 = [] - func_inds2 = [] - if isinstance(inds1, list) and len(inds1) > 0: - func_inds1 += inds1 - if isinstance(inds2, list) and len(inds2) > 0: - func_inds1 += inds2 - if isinstance(inds3, list) and len(inds3) > 0: - func_inds2 += inds3 - if isinstance(inds4, list) and len(inds4) > 0: - func_inds2 += inds4 - sorted_func_inds1 = sorted(func_inds1) - sorted_func_inds2 = sorted(func_inds2) - for ind in reversed(sorted_func_inds1): - left, right = split_at_idx(smiles, ind) - if frag1_double_func: - smiles = left+'('+func+')'+'('+func+')'+right - func_counter += 2 + func_inds1 = [] + func_inds2 = [] + if isinstance(inds1, list) and len(inds1) > 0: + func_inds1 += inds1 + if isinstance(inds2, list) and len(inds2) > 0: + func_inds1 += inds2 + if isinstance(inds3, list) and len(inds3) > 0: + func_inds2 += inds3 + if isinstance(inds4, list) and len(inds4) > 0: + func_inds2 += inds4 + sorted_func_inds1 = sorted(func_inds1) + sorted_func_inds2 = sorted(func_inds2) + for ind in reversed(sorted_func_inds1): + left, right = split_at_idx(smiles, ind) + if frag1_double_func: + smiles = left+'('+func+')'+'('+func+')'+right + func_counter += 2 + else: + smiles = left+'('+func+')'+right + func_counter += 1 + used_func_groups.add(funcs[func]) + for ind in reversed(sorted_func_inds2): + left, right = split_at_idx(smiles, ind) + if frag2_double_func: + smiles = left+'('+func+')'+'('+func+')'+right + func_counter += 2 + else: + smiles = left+'('+func+')'+right + func_counter += 1 + used_func_groups.add(funcs[func]) + if func_counter > 0: + m = Chem.MolFromSmiles(smiles) + if m is not None: + func_canonical = Chem.MolToSmiles(m, canonical=True, isomericSmiles=False) + temp_ligand['frag_func_smiles'] = smiles + temp_ligand['frag_func_canonical'] = func_canonical + temp_ligand['frag_func_smiles_formula'] = CalcMolFormula(m) + temp_ligand['frag_func_size'] = count_atoms(smiles) + temp_ligand['frag_func_group'] = funcs[func] + temp_ligand['frag_func_count'] = func_counter + temp_ligand['func_name'] = temp_ligand['name']+'_'+str(funcs[func])+str(func_counter) else: - smiles = left+'('+func+')'+right - func_counter += 1 - used_func_groups.add(funcs[func]) - for ind in reversed(sorted_func_inds2): - left, right = split_at_idx(smiles, ind) - if frag2_double_func: - smiles = left+'('+func+')'+'('+func+')'+right - func_counter += 2 - else: - smiles = left+'('+func+')'+right - func_counter += 1 - used_func_groups.add(funcs[func]) - if func_counter > 0: - m = Chem.MolFromSmiles(smiles) - if m is not None: - func_canonical = Chem.MolToSmiles(m, canonical=True, isomericSmiles=False) - temp_ligand['frag_func_smiles'] = smiles - temp_ligand['frag_func_canonical'] = func_canonical - temp_ligand['frag_func_smiles_formula'] = CalcMolFormula(m) - temp_ligand['frag_func_size'] = count_atoms(smiles) - temp_ligand['frag_func_group'] = funcs[func] - temp_ligand['frag_func_count'] = func_counter - temp_ligand['func_name'] = temp_ligand['name']+'_'+str(funcs[func])+str(func_counter) - else: - temp_ligand['frag_func_smiles'] = smiles - temp_ligand['frag_func_canonical'] = func_canonical - temp_ligand['frag_func_smiles_formula'] = CalcMolFormula(m) - temp_ligand['frag_func_size'] = count_atoms(smiles) - temp_ligand['frag_func_group'] = funcs[func] - temp_ligand['frag_func_count'] = func_counter - if not os.path.exists('failed_frag_func.csv'): - df = pd.DataFrame([temp_ligand]) - df.to_csv('failed_frag_func.csv', index=False) - else: - df = pd.read_csv('failed_frag_func.csv') - new_entry = pd.DataFrame([temp_ligand]) - new_df = pd.concat([df, new_entry], axis=0) - new_df.to_csv('failed_frag_func.csv', index=False) - print('counter', lignum, rounds) - temp_dict = {} - temp_dict['name'] = temp_ligand['name'] - temp_dict['charge'] = temp_ligand['charge'] - temp_dict['func_groups'] = list(used_func_groups) - if not os.path.exists('successful_functionalizations.csv'): - df = pd.DataFrame([temp_dict]) - df.to_csv('successful_functionalizations.csv', index=False) - else: - df = pd.read_csv('successful_functionalizations.csv') - new_entry = pd.DataFrame([temp_dict]) - new_df = pd.concat([df, new_entry], axis=0) - new_df.to_csv('successful_functionalizations.csv', index=False) - -with open(os.getcwd()+'/frag_functionalized_synthesized_ligands.json', 'w') as fout: - json.dump(functionalized_ligands, fout) + temp_ligand['frag_func_smiles'] = smiles + temp_ligand['frag_func_canonical'] = func_canonical + temp_ligand['frag_func_smiles_formula'] = CalcMolFormula(m) + temp_ligand['frag_func_size'] = count_atoms(smiles) + temp_ligand['frag_func_group'] = funcs[func] + temp_ligand['frag_func_count'] = func_counter + if not os.path.exists('failed_frag_func.csv'): + df = pd.DataFrame([temp_ligand]) + df.to_csv('failed_frag_func.csv', index=False) + else: + df = pd.read_csv('failed_frag_func.csv') + new_entry = pd.DataFrame([temp_ligand]) + new_df = pd.concat([df, new_entry], axis=0) + new_df.to_csv('failed_frag_func.csv', index=False) + print('counter', lignum, rounds) + temp_dict = {} + temp_dict['name'] = temp_ligand['name'] + temp_dict['charge'] = temp_ligand['charge'] + temp_dict['func_groups'] = list(used_func_groups) + if not os.path.exists('successful_functionalizations.csv'): + df = pd.DataFrame([temp_dict]) + df.to_csv('successful_functionalizations.csv', index=False) + else: + df = pd.read_csv('successful_functionalizations.csv') + new_entry = pd.DataFrame([temp_dict]) + new_df = pd.concat([df, new_entry], axis=0) + new_df.to_csv('successful_functionalizations.csv', index=False) + + with open(os.getcwd()+'/frag_functionalized_synthesized_ligands.json', 'w') as fout: + json.dump(functionalized_ligands, fout) + + +if __name__ == "__main__": + input_file = sys.argv[1] + temp_list_first = read_synthesized_macrocycles(input_file) + ''' + In this section, we disable functionalizations on + certain fragments after the fact. Here we dont allow + functionalizations on any pyrans. + ''' + temp_list = [] + for val in temp_list_first: + new_dict = val.copy() + if 'pyran' in new_dict['frag1']: + new_dict['frag1_func'] = False + if 'pyran' in new_dict['frag2']: + new_dict['frag2_func'] = False + if 'pyran' in new_dict['frag3']: + new_dict['frag3_func'] = False + if 'pyran' in new_dict['frag4']: + new_dict['frag4_func'] = False + temp_list.append(new_dict) + main(temp_list) diff --git a/molSimplify/Ligands/alphabetizer.py b/molSimplify/Ligands/alphabetizer.py index 7806e5a3..eaa1d41d 100644 --- a/molSimplify/Ligands/alphabetizer.py +++ b/molSimplify/Ligands/alphabetizer.py @@ -1,16 +1,21 @@ # This script alphabetizes the entries in ligands.dict # Assumes the first two lines are comments. -with open('ligands.dict', 'r') as f: - array_content = f.readlines() +def main(): + with open('ligands.dict', 'r') as f: + array_content = f.readlines() -comment_lines = array_content[:2] -dictionary_lines = array_content[2:] -dictionary_lines.sort() # Alphabetization + comment_lines = array_content[:2] + dictionary_lines = array_content[2:] + dictionary_lines.sort() # Alphabetization -# Rewriting ligands.dict, alphabetized. -with open('ligands.dict', 'w') as f: - for line in comment_lines: - f.write(line) - for line in dictionary_lines: - f.write(line) + # Rewriting ligands.dict, alphabetized. + with open('ligands.dict', 'w') as f: + for line in comment_lines: + f.write(line) + for line in dictionary_lines: + f.write(line) + + +if __name__ == "__main__": + main() diff --git a/molSimplify/Scripts/convert_2to3.py b/molSimplify/Scripts/convert_2to3.py index db09647c..44d4b230 100644 --- a/molSimplify/Scripts/convert_2to3.py +++ b/molSimplify/Scripts/convert_2to3.py @@ -10,9 +10,14 @@ def run_bash(filein): print(('ll:', ll)) -basedir = '../' -for dirpath, dir, files in os.walk(basedir): - for f in sorted(files): - if f.split(".")[-1] == 'py': - run_bash(dirpath + '/' + f) -print("Done.") +def main(): + basedir = '../' + for dirpath, dir, files in os.walk(basedir): + for f in sorted(files): + if f.split(".")[-1] == 'py': + run_bash(dirpath + '/' + f) + print("Done.") + + +if __name__ == "__main__": + main() diff --git a/molSimplify/Scripts/in_b3lyp_usetc.py b/molSimplify/Scripts/in_b3lyp_usetc.py index fcfbd428..b713fcc5 100644 --- a/molSimplify/Scripts/in_b3lyp_usetc.py +++ b/molSimplify/Scripts/in_b3lyp_usetc.py @@ -11,7 +11,6 @@ import numpy as np import shutil import iodata -from os.path import expanduser from molden2psi4wfn import tcmolden2psi4wfn_ao_mapping @@ -51,86 +50,92 @@ def get_molecule(xyzfile, charge, spin): return mol -home = expanduser("~") -# psi4_scr = home + '/psi4_scr/' -psi4_scr = './' -if not os.path.isdir(psi4_scr): - os.makedirs(psi4_scr) -# ---argument parsing--- -parser = argparse.ArgumentParser(description='psi4 dft calculations.') -parser.add_argument('-c', action="store", type=int, dest='charge') -parser.add_argument('-spin', action="store", type=int, dest='spin') -parser.add_argument('-xyz', action="store", type=str, dest='xyzfile') -parser.add_argument('-molden', action="store", type=str, dest='moldenfile') -parser.add_argument('-nthread', action="store", type=int, default=4, dest='num_threads') -parser.add_argument('-memory', action="store", type=str, default='12000 MB', dest='memory') -parser.add_argument('-ref', action="store", type=str, default='uks', dest='reference') -args = parser.parse_args() -moldenfile = args.moldenfile -xyzfile = args.xyzfile -charge, spin = args.charge, args.spin -psi4.set_memory(args.memory) -psi4.set_num_threads(args.num_threads) +def main(): + # from os.path import expanduser + # home = expanduser("~") + # psi4_scr = home + '/psi4_scr/' + psi4_scr = './' + if not os.path.isdir(psi4_scr): + os.makedirs(psi4_scr) + # ---argument parsing--- + parser = argparse.ArgumentParser(description='psi4 dft calculations.') + parser.add_argument('-c', action="store", type=int, dest='charge') + parser.add_argument('-spin', action="store", type=int, dest='spin') + parser.add_argument('-xyz', action="store", type=str, dest='xyzfile') + parser.add_argument('-molden', action="store", type=str, dest='moldenfile') + parser.add_argument('-nthread', action="store", type=int, default=4, dest='num_threads') + parser.add_argument('-memory', action="store", type=str, default='12000 MB', dest='memory') + parser.add_argument('-ref', action="store", type=str, default='uks', dest='reference') + args = parser.parse_args() + moldenfile = args.moldenfile + xyzfile = args.xyzfile + charge, spin = args.charge, args.spin + psi4.set_memory(args.memory) + psi4.set_num_threads(args.num_threads) -# ---basic setup--- -filename = "output" -psi4.core.set_output_file(filename + '.dat', False) -psi4.qcdb.libmintsbasisset.basishorde['LACVPS'] = lacvps -psi4.set_options({ - 'reference': args.reference, - "puream": False, - "DF_SCF_GUESS": False, - "scf_type": "df", - "dft_pruning_scheme": "robust", - "basis": "lacvps", - "DFT_BASIS_TOLERANCE": 1e-10, - "INTS_TOLERANCE": 1e-10, - "PRINT_MOS": False, - "dft_spherical_points": 590, - "dft_radial_points": 99, - "guess": "read", }) + # ---basic setup--- + filename = "output" + psi4.core.set_output_file(filename + '.dat', False) + psi4.qcdb.libmintsbasisset.basishorde['LACVPS'] = lacvps + psi4.set_options({ + 'reference': args.reference, + "puream": False, + "DF_SCF_GUESS": False, + "scf_type": "df", + "dft_pruning_scheme": "robust", + "basis": "lacvps", + "DFT_BASIS_TOLERANCE": 1e-10, + "INTS_TOLERANCE": 1e-10, + "PRINT_MOS": False, + "dft_spherical_points": 590, + "dft_radial_points": 99, + "guess": "read", }) -# ----1step scf--- -print("1step scf...") -mol = get_molecule(xyzfile, charge, spin) -psi4.set_options({ - "maxiter": 5, - "D_CONVERGENCE": 1e5, - "E_CONVERGENCE": 1e5, - "fail_on_maxiter": False}) -e, wfn = psi4.energy('b3lyp', molecule=mol, return_wfn=True) -wfn.to_file("wfn-1step.180") + # ----1step scf--- + print("1step scf...") + mol = get_molecule(xyzfile, charge, spin) + psi4.set_options({ + "maxiter": 5, + "D_CONVERGENCE": 1e5, + "E_CONVERGENCE": 1e5, + "fail_on_maxiter": False}) + e, wfn = psi4.energy('b3lyp', molecule=mol, return_wfn=True) + wfn.to_file("wfn-1step.180") -# -----conversion--- -print("molden2psi4wfn conversion...") -d_molden = iodata.molden.load_molden(moldenfile) -restricted = True if any(x in args.reference for x in ["r", "R"]) else False -Ca, Cb, mapping = tcmolden2psi4wfn_ao_mapping(d_molden, restricted=restricted) -wfn_minimal_np = np.load("wfn-1step.180.npy", allow_pickle=True) -wfn_minimal_np[()]['matrix']["Ca"] = Ca -if not restricted: - wfn_minimal_np[()]['matrix']["Cb"] = Cb -else: - wfn_minimal_np[()]['matrix']["Cb"] = Ca -np.save("wfn-1step-tc.180.npy", wfn_minimal_np) + # -----conversion--- + print("molden2psi4wfn conversion...") + d_molden = iodata.molden.load_molden(moldenfile) + restricted = True if any(x in args.reference for x in ["r", "R"]) else False + Ca, Cb, mapping = tcmolden2psi4wfn_ao_mapping(d_molden, restricted=restricted) + wfn_minimal_np = np.load("wfn-1step.180.npy", allow_pickle=True) + wfn_minimal_np[()]['matrix']["Ca"] = Ca + if not restricted: + wfn_minimal_np[()]['matrix']["Cb"] = Cb + else: + wfn_minimal_np[()]['matrix']["Cb"] = Ca + np.save("wfn-1step-tc.180.npy", wfn_minimal_np) -# ----copy wfn file to the right place with a right name--- -print("wfn copying...") -pid = str(os.getpid()) -targetfile = psi4_scr + filename + '.default.' + pid + '.180.npy' -shutil.copyfile("wfn-1step-tc.180.npy", targetfile) + # ----copy wfn file to the right place with a right name--- + print("wfn copying...") + pid = str(os.getpid()) + targetfile = psi4_scr + filename + '.default.' + pid + '.180.npy' + shutil.copyfile("wfn-1step-tc.180.npy", targetfile) -# ---final scf--- -print("final scf...") -psi4.set_options({ - 'SOSCF': False, - "SOSCF_MAX_ITER": 40, - }) -psi4.set_options({ - "maxiter": 250, - "D_CONVERGENCE": 1e-6, - "E_CONVERGENCE": 1e-6, - "fail_on_maxiter": True}) -e, wfn = psi4.energy('b3lyp', molecule=mol, return_wfn=True) -wfn.to_file("wfn.180") -psi4.molden(wfn, "geo.molden") + # ---final scf--- + print("final scf...") + psi4.set_options({ + 'SOSCF': False, + "SOSCF_MAX_ITER": 40, + }) + psi4.set_options({ + "maxiter": 250, + "D_CONVERGENCE": 1e-6, + "E_CONVERGENCE": 1e-6, + "fail_on_maxiter": True}) + e, wfn = psi4.energy('b3lyp', molecule=mol, return_wfn=True) + wfn.to_file("wfn.180") + psi4.molden(wfn, "geo.molden") + + +if __name__ == "__main__": + main() diff --git a/molSimplify/python_nn/dictionary_toolbox.py b/molSimplify/python_nn/dictionary_toolbox.py index f27cf1b0..347d89ba 100644 --- a/molSimplify/python_nn/dictionary_toolbox.py +++ b/molSimplify/python_nn/dictionary_toolbox.py @@ -25,23 +25,25 @@ def read_dictionary(path): return emsg, dictionary -sfd = {"split_energy": [-54.19, 142.71], - "slope": [-174.20, 161.58], - "ls_min": [1.8146, 0.6910], - "hs_min": [1.8882, 0.6956], - "ox": [2, 1], - "alpha": [0, 0.3], - "ax_charge": [-2, 2], - "eq_charge": [-2, 2], - "ax_dent": [1, 1], - "eq_dent": [1, 3], - "sum_delen": [-5.34, 12.54], - "max_delen": [-0.89, 2.09], - "ax_bo": [0, 3], - "eq_bo": [0.00, 3], - "ax_ki": [0.00, 4.29], - "eq_ki": [0.00, 6.96]} -write_dictionary(sfd, 'scaling.csv') +if __name__ == "__main__": + sfd = { + "split_energy": [-54.19, 142.71], + "slope": [-174.20, 161.58], + "ls_min": [1.8146, 0.6910], + "hs_min": [1.8882, 0.6956], + "ox": [2, 1], + "alpha": [0, 0.3], + "ax_charge": [-2, 2], + "eq_charge": [-2, 2], + "ax_dent": [1, 1], + "eq_dent": [1, 3], + "sum_delen": [-5.34, 12.54], + "max_delen": [-0.89, 2.09], + "ax_bo": [0, 3], + "eq_bo": [0.00, 3], + "ax_ki": [0.00, 4.29], + "eq_ki": [0.00, 6.96] + } + write_dictionary(sfd, 'scaling.csv') - -rect = read_dictionary('scaling.csv') + rect = read_dictionary('scaling.csv') From 3d77be4d27f6e68d21c5a27d8b2aa2d5cf8e9efd Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Wed, 26 Apr 2023 20:41:52 -0400 Subject: [PATCH 14/17] Joblib is now a standalone package --- molSimplify/python_krr/sklearn_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/molSimplify/python_krr/sklearn_models.py b/molSimplify/python_krr/sklearn_models.py index 280b8def..081e1cbe 100644 --- a/molSimplify/python_krr/sklearn_models.py +++ b/molSimplify/python_krr/sklearn_models.py @@ -1,5 +1,5 @@ import numpy as np -from sklearn.externals import joblib +import joblib from pkg_resources import resource_filename, Requirement from molSimplify.python_nn.tf_ANN import (tf_ANN_excitation_prepare, load_normalization_data, From 7af10554d8551533be864bda7ce12cec999be923 Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Wed, 26 Apr 2023 20:43:02 -0400 Subject: [PATCH 15/17] Expand the doctests to most of the package --- .github/workflows/CI.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index bf2a543d..685906ee 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -80,9 +80,9 @@ jobs: pytest -v --cov=molSimplify --cov-report=xml - name: Run doctest - # For now only testing the files in the Classes folder + # For now still excluding several subfolders and files run: | - pytest --doctest-modules molSimplify/Classes + pytest --doctest-modules --ignore=molSimplify/job_manager --ignore=molSimplify/Informatics/MOF --ignore=molSimplify/Scripts/in_b3lyp_usetc.py --ignore=molSimplify/Informatics/jupyter_vis.py --ignore=molSimplify/Informatics/macrocycle_synthesis.py --ignore=molSimplify/Informatics/organic_fingerprints.py molSimplify - name: Upload coverage report to codecov uses: codecov/codecov-action@v3 From c944f5dd38b92ea54b21c91d1e7ca25b2b56ad99 Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Thu, 27 Apr 2023 11:23:54 -0400 Subject: [PATCH 16/17] Trigger slack webhook only on CI failure --- .github/workflows/CI.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index 685906ee..483b5b81 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -97,7 +97,7 @@ jobs: - name: Report Status # Slack notifications only on the main repo - if: ${{ github.event_name != 'pull_request' && github.repository == 'hjkgrp/molSimplify' }} + if: ${{job.status == 'failure' && github.event_name != 'pull_request' && github.repository == 'hjkgrp/molSimplify' }} #uses: ravsamhq/notify-slack-action@v1 uses: 8398a7/action-slack@v3 with: From 1c0f10237604398c846c6725ab3a439b69c1b070 Mon Sep 17 00:00:00 2001 From: Ralf Meyer Date: Thu, 27 Apr 2023 11:32:25 -0400 Subject: [PATCH 17/17] Add "optional" packages to the environment file --- devtools/conda-envs/mols.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/devtools/conda-envs/mols.yml b/devtools/conda-envs/mols.yml index a6d1b729..e5b47f15 100644 --- a/devtools/conda-envs/mols.yml +++ b/devtools/conda-envs/mols.yml @@ -13,11 +13,14 @@ dependencies: - pytest # Optional - # GUI + # GUI / plotting - pyqt - qt + - matplotlib # Database - pymongo + # PDB interaction + - beautifulsoup4 # Machine learning #- pytorch::pytorch #- hyperopt @@ -25,3 +28,5 @@ dependencies: - theochem::iodata - xtb #- psi4::psi4 + # molscontrol + - scikit-image