diff --git a/docs/environment.yaml b/docs/environment.yaml index bbc62c15..4f2449f1 100644 --- a/docs/environment.yaml +++ b/docs/environment.yaml @@ -8,3 +8,4 @@ dependencies: - pandas - scikit-learn - beautifulsoup4 + - networkx diff --git a/docs/source/Informatics.rst b/docs/source/Informatics.rst index 7eb295a3..69f25a17 100644 --- a/docs/source/Informatics.rst +++ b/docs/source/Informatics.rst @@ -17,4 +17,4 @@ Molecular Revised Autocorrelations Revised Autocorrelations for MOFs --------------------------------- -Documentation for MOFs is coming soon! \ No newline at end of file +Documentation for RACs coming soon! diff --git a/molSimplify/Classes/ligand.py b/molSimplify/Classes/ligand.py index ec7373be..1f5a630a 100644 --- a/molSimplify/Classes/ligand.py +++ b/molSimplify/Classes/ligand.py @@ -261,7 +261,7 @@ def percent_buried_vol(self, radius=3.5, gridspec=0.1, return percent_buried -def ligand_breakdown(mol, flag_loose=False, BondedOct=False, silent=True): +def ligand_breakdown(mol, flag_loose=False, BondedOct=False, silent=True, transition_metals_only=True): """Extract axial and equatorial components of a octahedral complex. Parameters @@ -287,7 +287,7 @@ def ligand_breakdown(mol, flag_loose=False, BondedOct=False, silent=True): """ # this function takes an octahedral # complex and returns ligands - metal_index = mol.findMetal()[0] + metal_index = mol.findMetal(transition_metals_only=transition_metals_only)[0] bondedatoms = mol.getBondedAtomsSmart(metal_index, oct=True) # print('!!!!!boundatoms', bondedatoms) # print('from get oct' + str(bondedatoms)) diff --git a/molSimplify/Classes/mol3D.py b/molSimplify/Classes/mol3D.py index 78003ecd..f892d209 100644 --- a/molSimplify/Classes/mol3D.py +++ b/molSimplify/Classes/mol3D.py @@ -698,9 +698,13 @@ def convert2OBMol2(self, force_clean=False, ignoreX=False): ##### obConversion.SetOutFormat('mol2') ss = obConversion.WriteString(OBMol) - # Update atom types from OBMol - lines = ss.split('ATOM\n')[1].split( - '@BOND')[0].split('\n')[:-1] + # Update atom types from OBMol. + if "UNITY_ATOM_ATTR" in ss: # If this section is present, it will be before the @BOND section. + lines = ss.split('ATOM\n')[1].split( + '@UNITY_ATOM_ATTR')[0].split('\n')[:-1] + else: + lines = ss.split('ATOM\n')[1].split( + '@BOND')[0].split('\n')[:-1] for i, line in enumerate(lines): if '.' in line.split()[5]: self.atoms[i].name = line.split()[5].split('.')[1] @@ -1273,6 +1277,41 @@ def apply_ffopt(self, constraints=False, ff='uff'): self.convert2mol3D() return en + def apply_ffopt_list_constraints(self, list_constraints=False, ff='uff'): + """ + Apply forcefield optimization to a given mol3D class. + Differs from apply_ffopt in that one can specify constrained atoms as a list. + + Parameters + ---------- + list_constraints : list of int, optional + List of atom indices to employ cartesian constraints before ffopt. + ff : str, optional + Force field to be used in openbabel. Default is UFF. + + Returns + ------- + energy : float + Energy of the ffopt in kJ/mol. + + """ + forcefield = openbabel.OBForceField.FindForceField(ff) + constr = openbabel.OBFFConstraints() + if list_constraints: + for catom in list_constraints: + # Openbabel uses a 1 index instead of a 0 index. + constr.AddAtomConstraint(catom+1) + self.convert2OBMol() + forcefield.Setup(self.OBMol, constr) + if self.OBMol.NumHvyAtoms() > 10: + forcefield.ConjugateGradients(200) + else: + forcefield.ConjugateGradients(50) + forcefield.GetCoordinates(self.OBMol) + en = forcefield.Energy() + self.convert2mol3D() + return en + def findcloseMetal(self, atom0): """ Find the nearest metal to a given atom3D class. @@ -4978,7 +5017,7 @@ def read_smiles(self, smiles, ff="mmff94", steps=2500): def get_smiles(self, canonicalize=False, use_mol2=False) -> str: """ - Read a smiles string and convert it to a mol3D class instance. + Returns the SMILES string representing the mol3D object. Parameters ---------- diff --git a/molSimplify/Classes/protein3D.py b/molSimplify/Classes/protein3D.py index 86461b03..9fbc385e 100644 --- a/molSimplify/Classes/protein3D.py +++ b/molSimplify/Classes/protein3D.py @@ -22,7 +22,6 @@ import time from scipy.spatial import ConvexHull # from pymol import cmd, stored - # no GUI support for now @@ -82,25 +81,27 @@ def __init__(self, pdbCode='undef'): self.hull = [] def setAAs(self, aas): - """ Set monomers of a protein3D class to different monomers. + """ + Set monomers of a protein3D class to different monomers. Parameters ---------- - aas : dictionary - Keyed by chain and location - Valued by monomer3D monomers (amino acids or nucleotides) + aas : dictionary + Keyed by chain and location + Valued by monomer3D monomers (amino acids or nucleotides) """ self.aas = aas self.naas = len(aas) def setAtoms(self, atoms): - """ Set atom indices of a protein3D class to atoms. + """ + Set atom indices of a protein3D class to atoms. Parameters ---------- - atoms : dictionary - Keyed by atom index - Valued by atom3D atom that has that index + atoms : dictionary + Keyed by atom index + Valued by atom3D atom that has that index """ self.atoms = atoms @@ -109,68 +110,75 @@ def setIndices(self, a_ids): Parameters ---------- - a_ids : dictionary - Keyed by atom3D atom - Valued by its index + a_ids : dictionary + Keyed by atom3D atom + Valued by its index """ self.a_ids = a_ids def setHetmols(self, hetmols): - """ Set heteromolecules of a protein3D class to different ones. + """ + Set heteromolecules of a protein3D class to different ones. Parameters ---------- - hetmols : dictionary - Keyed by chain and location - Valued by mol3D heteromolecules + hetmols : dictionary + Keyed by chain and location + Valued by mol3D heteromolecules """ self.hetmols = hetmols self.nhetmols = len(hetmols.keys()) def setChains(self, chains): - """ Set chains of a protein3D class to different chains. + """ + Set chains of a protein3D class to different chains. Parameters ---------- - chains : dictionary - Keyed by desired chain IDs. - Valued by the list of molecules in the chain. + chains : dictionary + Keyed by desired chain IDs. + Valued by the list of molecules in the chain. """ self.chains = chains self.nchains = len(chains.keys()) def setMissingAtoms(self, missing_atoms): - """ Set missing atoms of a protein3D class to a new dictionary. + """ + Set missing atoms of a protein3D class to a new dictionary. Parameters ---------- - missing_atoms : dictionary - Keyed by amino acid residues / nucleotides of origin - Valued by missing atoms + missing_atoms : dictionary + Keyed by amino acid residues / nucleotides of origin + Valued by missing atoms """ self.missing_atoms = missing_atoms def setMissingAAs(self, missing_aas): - """ Set missing amino acids of a protein3D class to a new list. + """ + Set missing amino acids of a protein3D class to a new list. + Parameters ---------- - missing_aas : list - List of missing amino acids. + missing_aas : list + List of missing amino acids. """ self.missing_aas = missing_aas def setConf(self, conf): - """ Set possible conformations of a protein3D class to a new list. + """ + Set possible conformations of a protein3D class to a new list. Parameters ---------- - conf : list - List of possible conformations for applicable amino acids. + conf : list + List of possible conformations for applicable amino acids. """ self.conf = conf def autoChooseConf(self): - """ Automatically choose the conformation of a protein3D class + """ + Automatically choose the conformation of a protein3D class instance based first on what the greatest occupancy level is and then the first conformation ihe alphabet with all else equal. @@ -204,39 +212,44 @@ def autoChooseConf(self): self.setConf([]) def setR(self, R): - """ Set R value of protein3D class. + """ + Set R value of protein3D class. Parameters ---------- - R : float - The desired new R value. + R : float + The desired new R value. """ self.R = R def setRfree(self, Rfree): - """ Set Rfree value of protein3D class. + """ + Set Rfree value of protein3D class. Parameters ---------- - Rfree : float - The desired new Rfree value. + Rfree : float + The desired new Rfree value. """ self.Rfree = Rfree def setRSRZ(self, RSRZ): - """ Set RSRZ score of protein3D class. + """ + Set RSRZ score of protein3D class. Parameters ---------- - RSRZ : float - The desired new RSRZ score. + RSRZ : float + The desired new RSRZ score. """ self.RSRZ = RSRZ def getMissingAtoms(self): - """ Get missing atoms of a protein3D class. + """ + Get missing atoms of a protein3D class. - Example demonstration of this method: + Examples + -------- >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1MH1') # Fetch a PDB fetched: 1MH1 @@ -249,10 +262,11 @@ def getMissingAtoms(self): return self.missing_atoms.values() def getMissingAAs(self): - """ Get missing amino acid residues of a protein3D class. - - Example demonstration of this method: + """ + Get missing amino acid residues of a protein3D class. + Examples + -------- >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1MH1') # Fetch a PDB fetched: 1MH1 @@ -262,10 +276,11 @@ def getMissingAAs(self): return self.missing_aas def countAAs(self): - """ Return the number of amino acid residues in a protein3D class. - - Example demonstration of this method: + """ + Return the number of amino acid residues in a protein3D class. + Examples + -------- >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB fetched: 1os7 @@ -281,18 +296,19 @@ def findAtom(self, sym="X", aa=True): Parameters ---------- - sym : str - element symbol, default as X. - aa : boolean - True if we want atoms contained in amino acids - False if we want atoms contained in heteromolecules + sym : str + element symbol, default as X. + aa : boolean + True if we want atoms contained in amino acids + False if we want atoms contained in heteromolecules Returns - ---------- - inds: list - a list of atom indices with the specified symbol. + ------- + inds: list + a list of atom indices with the specified symbol. - Example demonstration of this method: + Examples + -------- >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB fetched: 1os7 @@ -324,15 +340,16 @@ def findAA(self, three_lc="XAA"): Parameters ---------- - three_lc: str - three-letter code, default as XAA. + three_lc: str + three-letter code, default as XAA. Returns ------- - inds: set - a set of amino acid indices with the specified symbol. + inds: set + a set of amino acid indices with the specified symbol. - Example demonstration of this method: + Examples + -------- >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB fetched: 1os7 @@ -350,20 +367,21 @@ def findAA(self, three_lc="XAA"): return inds def getChain(self, chain_id): - # BUGGY - """ Takes a chain of interest and turns it into its own protein3D class instance. + """ + Takes a chain of interest and turns it into its own protein3D class instance. Parameters ---------- - chain_id : string - The letter name of the chain of interest + chain_id : string + The letter name of the chain of interest Returns ------- - p : protein3D - A protein3D instance consisting of just the chain of interest + p : protein3D + A protein3D instance consisting of just the chain of interest - Example demonstration of this method: + Examples + -------- >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB fetched: 1os7 @@ -378,7 +396,7 @@ def getChain(self, chain_id): missing_aas = [] for aa in self.missing_aas: if aa.chain == chain_id: - missing_aas.append(aa) + missing_aas.append(aa) p.setMissingAAs(missing_aas) aas = {} @@ -423,7 +441,7 @@ def getChain(self, chain_id): bonds[a] = set() for b in self.bonds[a]: if b in p.atoms.values(): - bonds[a].add(b) + bonds[a].add(b) p.setBonds(bonds) p.setIndices(a_ids) @@ -432,22 +450,24 @@ def getChain(self, chain_id): return p def getMolecule(self, a_id, aas_only=False): - """ Finds the molecule that the atom is contained in. + """ + Finds the molecule that the atom is contained in. Parameters ---------- - a_id : int - the index of the desired atom whose molecule we want to find - aas_only : boolean - True if we want ito find atoms contained in amino acids only. - False if we want atoms contained in all molecules. Default is False. + a_id : int + The index of the desired atom whose molecule we want to find + aas_only : boolean + True if we want ito find atoms contained in amino acids only. + False if we want atoms contained in all molecules. Default is False. Returns ------- - mol : monomer3D or mol3D - the amino acid residue, nucleotide, or heteromolecule containing the atom + mol : monomer3D or mol3D + The amino acid residue, nucleotide, or heteromolecule containing the atom - Example demonstration of this method: + Examples + -------- >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB fetched: 1os7 @@ -477,14 +497,16 @@ def getMolecule(self, a_id, aas_only=False): return None # something is wrong def stripAtoms(self, atoms_stripped): - """ Removes certain atoms from the protein3D class instance. + """ + Removes certain atoms from the protein3D class instance. Parameters ---------- - atoms_stripped : list - list of atom3D indices that should be removed + atoms_stripped : list + List of atom3D indices that should be removed - Example demonstration of this method: + Examples + -------- >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') # Fetch a PDB fetched: 1os7 @@ -556,16 +578,18 @@ def stripAtoms(self, atoms_stripped): self.setIndices(a_ids) def stripHetMol(self, hetmol): - """ Removes all heteroatoms part of the specified heteromolecule from + """ + Removes all heteroatoms part of the specified heteromolecule from the protein3D class instance. Parameters ---------- - hetmol : str - String representing the name of a heteromolecule whose - heteroatoms should be stripped from the protein3D class instance + hetmol : str + String representing the name of a heteromolecule whose + heteroatoms should be stripped from the protein3D class instance - Example demonstration of this method: + Examples + -------- >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('3I40') # Fetch a PDB fetched: 3I40 @@ -587,19 +611,21 @@ def stripHetMol(self, hetmol): pass def findMetal(self, transition_metals_only=True): - """Find metal(s) in a protein3D class. + """ + Find metal(s) in a protein3D class. + Parameters ---------- - transition_metals_only : bool, optional - Only find transition metals. Default is true. + transition_metals_only : bool, optional + Only find transition metals. Default is true. Returns ------- - metal_list : list - List of indices of metal atoms in protein3D. - - Example of fetching a PDB file: + metal_list : list + List of indices of metal atoms in protein3D. + Examples + -------- >>> pdb_system = protein3D() >>> pdb_system.fetch_pdb('1os7') fetched: 1os7 @@ -618,57 +644,61 @@ def findMetal(self, transition_metals_only=True): return (self.metals) def freezeatom(self, atomIdx): - """Set the freeze attribute to be true for a given atom3D class. + """ + Set the freeze attribute to be true for a given atom3D class. Parameters ---------- - atomIdx : int - Index for atom to be frozen. + atomIdx : int + Index for atom to be frozen. """ self.atoms[atomIdx].frozen = True def freezeatoms(self, Alist): - """Set the freeze attribute to be true for a given set of atom3D classes, + """ + Set the freeze attribute to be true for a given set of atom3D classes, given their indices. Preserves ordering, starts from largest index. Parameters ---------- - Alist : list - List of indices for atom3D instances to remove. + Alist : list + List of indices for atom3D instances to remove. """ for h in sorted(Alist, reverse=True): self.freezeatom(h) def getAtom(self, idx): - """Get atom with a given index. + """ + Get atom with a given index. Parameters ---------- - idx : int - Index of desired atom. + idx : int + Index of desired atom. Returns ------- - atom : atom3D - atom3D class for element at given index. + atom : atom3D + atom3D class for element at given index. """ return self.atoms[idx] def getIndex(self, atom): - """ Get index of a given atom + """ + Get index of a given atom Parameters ---------- - atom : atom3D - atom3D class for element at given index. + atom : atom3D + atom3D class for element at given index. Returns ------- - idx : int - Index of desired atom. + idx : int + Index of desired atom. """ if hasattr(self, 'a_ids') and atom in self.a_ids.keys(): @@ -678,19 +708,20 @@ def getIndex(self, atom): return idx def getBoundMols(self, h_id, aas_only=False): - """Get a list of molecules bound to a heteroatom, usually a metal. + """ + Get a list of molecules bound to a heteroatom, usually a metal. Parameters ---------- - h_id : int - the index of the desired (hetero)atom origin - aas_only : boolean - whether or not to only consider amino acids, defaults False + h_id : int + The index of the desired (hetero)atom origin + aas_only : boolean + Whether or not to only consider amino acids, defaults False Returns ------- - bound_mols : list - list of monomer3D and/or mol3D instances of molecules bound to hetatm + bound_mols : list + List of monomer3D and/or mol3D instances of molecules bound to hetatm """ bound_mols = [] for b_id in self.atoms.keys(): @@ -703,13 +734,14 @@ def getBoundMols(self, h_id, aas_only=False): return bound_mols def readfrompdb(self, text): - """ Read PDB into a protein3D class instance. + """ + Read PDB into a protein3D class instance. Parameters ---------- - text : str - String of path to PDB file. Path may be local or global. - May also be the text of a PDB file from the internet. + text : str + String of path to PDB file. Path may be local or global. + May also be the text of a PDB file from the internet. """ # read in PDB file @@ -883,12 +915,13 @@ def readfrompdb(self, text): self.setBonds(bonds) def fetch_pdb(self, pdbCode): - """ API query to fetch a pdb and write it as a protein3D class instance + """ + API query to fetch a pdb and write it as a protein3D class instance Parameters ---------- - pdbCode : str - code for protein, e.g. 1os7 + pdbCode : str + Code for protein, e.g. 1os7 """ remoteCode = pdbCode.upper() try: @@ -909,25 +942,28 @@ def fetch_pdb(self, pdbCode): print("warning: %s not valid.\n" % pdbCode) def setBonds(self, bonds): - """Sets the bonded atoms in the protein. + """ + Sets the bonded atoms in the protein. + This is effectively the molecular graph. Parameters ---------- - bonds : dictionary - Keyed by atom3D atoms in the protein - Valued by a set consisting of bonded atoms + bonds : dictionary + Keyed by atom3D atoms in the protein + Valued by a set consisting of bonded atoms """ self.bonds = bonds def readMetaData(self): - """ API query to fetch XML data from a pdb and add its useful attributes + """ + API query to fetch XML data from a pdb and add its useful attributes to a protein3D class. Parameters ---------- - pdbCode : str - code for protein, e.g. 1os7 + pdbCode : str + Code for protein, e.g. 1os7 """ pdbCode = self.pdbCode try: @@ -974,42 +1010,46 @@ def readMetaData(self): print("warning: %s not valid.\n" % pdbCode) def setDataCompleteness(self, DataCompleteness): - """ Set DataCompleteness value of protein3D class. + """ + Set DataCompleteness value of protein3D class. Parameters ---------- - DataCompleteness : float - The desired new R value. + DataCompleteness : float + The desired new R value. """ self.DataCompleteness = DataCompleteness def setTwinL(self, TwinL): - """ Set TwinL score of protein3D class. + """ + Set TwinL score of protein3D class. Parameters ---------- - TwinL : float - The desired new TwinL score. + TwinL : float + The desired new TwinL score. """ self.TwinL = TwinL def setTwinL2(self, TwinL2): - """ Set TwinL squared score of protein3D class. + """ + Set TwinL squared score of protein3D class. Parameters ---------- - TwinL2 : float - The desired new TwinL squared score. + TwinL2 : float + The desired new TwinL squared score. """ self.TwinL2 = TwinL2 def setEDIAScores(self): - """ Sets the EDIA score of a protein3D class. + """ + Sets the EDIA score of a protein3D class. Parameters ---------- - pdbCode : string - The 4-character code of the protein3D class. + pdbCode : string + The 4-character code of the protein3D class. """ code = self.pdbCode cmd = ('curl -d \'{"edia":{ "pdbCode":"'+code+'"}}\' -H "Accept: application/json"' @@ -1056,12 +1096,13 @@ def setEDIAScores(self): print("OXT is missing") def setPDBCode(self, pdbCode): - """ Sets the 4-letter PDB code of a protein3D class instance + """ + Sets the 4-letter PDB code of a protein3D class instance Parameters ---------- - pdbCode : string - Desired 4-letter PDB code + pdbCode : string + Desired 4-letter PDB code """ self.pdbCode = pdbCode @@ -1116,12 +1157,13 @@ def setCentroid(self): self.centroid = centroid def convexhull(self): - """Computes convex hull of protein. + """ + Computes convex hull of protein. Returns ------- - hull : array - Coordinates of convex hull. + hull : array + Coordinates of convex hull. """ points = [] # loop over atoms in protein diff --git a/molSimplify/Informatics/MOF/Linker_rotation.py b/molSimplify/Informatics/MOF/Linker_rotation.py index 937d9280..c277b40f 100644 --- a/molSimplify/Informatics/MOF/Linker_rotation.py +++ b/molSimplify/Informatics/MOF/Linker_rotation.py @@ -145,7 +145,7 @@ def linker_rotation(molcif, fcoords, linker, rot_angle): cell_v = np.array(cell_vector) cart_coords = fractional2cart(fcoords,cell_v) distance_mat = compute_distance_matrix3(cell_v,cart_coords) # distance matrix of all atoms - adj_matrix = compute_adj_matrix(distance_mat,allatomtypes) # from distance matrix and heuristics for bond distances, obtains connectivity information in the form of adjacency matrix (graph) + adj_matrix, _ = compute_adj_matrix(distance_mat,allatomtypes) # from distance matrix and heuristics for bond distances, obtains connectivity information in the form of adjacency matrix (graph) molcif.graph = adj_matrix.todense() # dense form of adjacency matrix / graph is saved to molcif object diff --git a/molSimplify/Informatics/MOF/MOF_descriptors.py b/molSimplify/Informatics/MOF/MOF_descriptors.py index fe6a1ab8..ac8036d4 100644 --- a/molSimplify/Informatics/MOF/MOF_descriptors.py +++ b/molSimplify/Informatics/MOF/MOF_descriptors.py @@ -45,7 +45,8 @@ # Pymatgen is used to get the primitive cell. # ######################################################################################### -### Defining pymatgen function for getting primitive, since molSimplify does not depend on pymatgen. +### Defining pymatgen function for getting primitive, since molSimplify does not depend on pymatgen. +# This function is commented out and should be uncommented if used. from pymatgen.io.cif import CifParser def get_primitive(datapath, writepath, occupancy_tolerance=1): @@ -145,7 +146,7 @@ def gen_and_append_desc(temp_mol, target_list, depth, descriptor_names, descript # the vector to be of constant dimension so we can correlate the output property. # ######################################################################################### -def make_MOF_SBU_RACs(SBUlist, SBU_subgraph, molcif, depth, name, cell, anchoring_atoms, sbupath, connections_list=False, connections_subgraphlist=False): +def make_MOF_SBU_RACs(SBUlist, SBU_subgraph, molcif, depth, name, cell, anchoring_atoms, sbupath, linkeranchors_superlist, connections_list=False, connections_subgraphlist=False): """ Generated RACs on the SBUs of the MOF, as well as on the lc atoms (SBU-connected atoms of linkers). @@ -167,6 +168,8 @@ def make_MOF_SBU_RACs(SBUlist, SBU_subgraph, molcif, depth, name, cell, anchorin Linker atoms that are bonded to a metal. sbupath : str Path of the folder to make a csv file in. + linkeranchors_superlist : list of set + Coordinating atoms of linkers. Number of sets is the number of linkers. connections_list : list of lists of int Each inner list is its own separate linker. The ints are the atom indices of that linker. Length is # of linkers. connections_subgraphlist : list of scipy.sparse.csr.csr_matrix @@ -193,6 +196,10 @@ def make_MOF_SBU_RACs(SBUlist, SBU_subgraph, molcif, depth, name, cell, anchorin sbu_descriptor_path, sbu_descriptors, lc_descriptors = load_sbu_lc_descriptors(sbupath) + global_connection_indices = [] + for item in linkeranchors_superlist: + global_connection_indices.extend(list(item)) + """"""""" Loop over all SBUs as identified by subgraphs. Then create the mol3Ds for each SBU. """"""""" @@ -273,6 +280,17 @@ def make_MOF_SBU_RACs(SBUlist, SBU_subgraph, molcif, depth, name, cell, anchorin if not os.path.exists(xyz_path): SBU_mol_fcoords_connected = XYZ_connected(cell, SBU_mol_cart_coords, SBU_mol_adj_mat) writeXYZandGraph(xyz_path, SBU_mol_atom_labels, cell, SBU_mol_fcoords_connected,SBU_mol_adj_mat) + + # Write TXT file indicating the connecting atoms + sbu_index_connection_indices = [] + for item in global_connection_indices: + if item in SBU: + sbu_index_connection_indices.append(SBU.index(item)) + sbu_index_connection_indices = list(np.sort(sbu_index_connection_indices)) # Sort in ascending order + sbu_index_connection_indices = [str(item) for item in sbu_index_connection_indices] + with open(f'{sbupath}/{name}_connection_indices_sbu_{i}.txt', 'w') as f: + f.write(' '.join(sbu_index_connection_indices)) + """"""""" Generate all of the SBU based RACs (full scope, mc) """"""""" @@ -300,7 +318,7 @@ def make_MOF_SBU_RACs(SBUlist, SBU_subgraph, molcif, depth, name, cell, anchorin averaged_SBU_descriptors = np.mean(np.array(descriptor_list), axis=0) # Average the SBU RACs over all of the SBUs in the MOF. return names, averaged_SBU_descriptors, lc_names, averaged_lc_descriptors -def make_MOF_linker_RACs(linkerlist, linker_subgraphlist, molcif, depth, name, cell, linkerpath): +def make_MOF_linker_RACs(linkerlist, linker_subgraphlist, molcif, depth, name, cell, linkerpath, linkeranchors_superlist): """ Generate RACs on the linkers of the MOF. @@ -319,7 +337,9 @@ def make_MOF_linker_RACs(linkerlist, linker_subgraphlist, molcif, depth, name, c cell : numpy.ndarray The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3). linkerpath : str - Path of the folder to make a csv file in. + Path of the folder to make a csv file in. + linkeranchors_superlist : list of set + Coordinating atoms of linkers. Number of sets is the number of linkers. Returns ------- @@ -339,6 +359,11 @@ def make_MOF_linker_RACs(linkerlist, linker_subgraphlist, molcif, depth, name, c linker_descriptors = pd.read_csv(linker_descriptor_path+'/linker_descriptors.csv') else: linker_descriptors = pd.DataFrame() + + global_connection_indices = [] + for item in linkeranchors_superlist: + global_connection_indices.extend(list(item)) + for i, linker in enumerate(linkerlist): # Iterating through the linkers. # Preparing a mol3D object for the current linker. linker_mol = mol3D() @@ -355,6 +380,16 @@ def make_MOF_linker_RACs(linkerlist, linker_subgraphlist, molcif, depth, name, c if not os.path.exists(xyz_path): linker_mol_fcoords_connected = XYZ_connected(cell, linker_mol_cart_coords, linker_mol_adj_mat) writeXYZandGraph(xyz_path, linker_mol_atom_labels, cell, linker_mol_fcoords_connected, linker_mol_adj_mat) + # Write TXT file indicating the connecting atoms + linker_index_connection_indices = [] + for item in global_connection_indices: + if item in linker: + linker_index_connection_indices.append(linker.index(item)) + linker_index_connection_indices = list(np.sort(linker_index_connection_indices)) # Sort in ascending order + linker_index_connection_indices = [str(item) for item in linker_index_connection_indices] + with open(f'{linkerpath}/{name}_connection_indices_linker_{i}.txt', 'w') as f: + f.write(' '.join(linker_index_connection_indices)) + allowed_strings = ['electronegativity', 'nuclear_charge', 'ident', 'topology', 'size'] labels_strings = ['chi', 'Z', 'I', 'T', 'S'] colnames = [] @@ -546,7 +581,7 @@ def get_MOF_descriptors(data, depth, path=False, xyzpath=False, graph_provided=F if not graph_provided: # Make the adjacency matrix. distance_mat = compute_distance_matrix3(cell_v,cart_coords) try: - adj_matrix,_=compute_adj_matrix(distance_mat,allatomtypes,wiggle_room) + adj_matrix, _ = compute_adj_matrix(distance_mat,allatomtypes,wiggle_room) except NotImplementedError: failure_str = f"Failed to featurize {name}: atomic overlap\n" full_names, full_descriptors = failure_response(path, failure_str) @@ -675,6 +710,7 @@ def get_MOF_descriptors(data, depth, path=False, xyzpath=False, graph_provided=F templist = linker_list.copy() long_ligands = False max_min_linker_length , min_max_linker_length = (0,100) # The maximum value of the minimum linker length, and the minimum value of the maximum linker length. Updated later. + linkeranchors_superlist = [] # Will contain the indices of the linker atoms that coordinate to metals for ii, atoms_list in reversed(list(enumerate(linker_list))): # Loop over all linker subgraphs linkeranchors_list = set() linkeranchors_atoms = set() @@ -695,6 +731,7 @@ def get_MOF_descriptors(data, depth, path=False, xyzpath=False, graph_provided=F sbuanchors_list.add(sbu_atoms) sbu_connect_list.add(kk) #Add if unique SBUs min_length,max_length = linker_length(linker_subgraphlist[ii].todense(), linkeranchors_list) + linkeranchors_superlist.append(linkeranchors_atoms) if len(linkeranchors_list) >= 2 : # linker, and in one ambiguous case, could be a ligand. if len(sbu_connect_list) >= 2: # Something that connects two SBUs is certain to be a linker @@ -791,24 +828,25 @@ def get_MOF_descriptors(data, depth, path=False, xyzpath=False, graph_provided=F """"""""" if len(linker_subgraphlist)>=1: # Featurize cases that did not fail. # try: - descriptor_names, descriptors, lc_descriptor_names, lc_descriptors = make_MOF_SBU_RACs(SBU_list, SBU_subgraphlist, molcif, depth, name, cell_v, anc_atoms, sbupath, connections_list, connections_subgraphlist) - lig_descriptor_names, lig_descriptors = make_MOF_linker_RACs(linker_list, linker_subgraphlist, molcif, depth, name, cell_v, linkerpath) + descriptor_names, descriptors, lc_descriptor_names, lc_descriptors = make_MOF_SBU_RACs(SBU_list, SBU_subgraphlist, molcif, depth, name, cell_v, anc_atoms, sbupath, linkeranchors_superlist, connections_list, connections_subgraphlist) + lig_descriptor_names, lig_descriptors = make_MOF_linker_RACs(linker_list, linker_subgraphlist, molcif, depth, name, cell_v, linkerpath, linkeranchors_superlist) full_names = descriptor_names+lig_descriptor_names+lc_descriptor_names #+ ECFP_names full_descriptors = list(descriptors)+list(lig_descriptors)+list(lc_descriptors) print(len(full_names),len(full_descriptors)) # except: # full_names = [0] # full_descriptors = [0] - elif len(linker_subgraphlist) == 1: # this never happens, right? + elif len(linker_subgraphlist) == 1: # Only one linker identified. print('Suspicious featurization') full_names = [1] full_descriptors = [1] - else: - print('Failed to featurize this MOF.') + else: # Means len(linker_subgraphlist) is zero. + print('Failed to featurize this MOF; no linkers were identified.') full_names = [0] full_descriptors = [0] if (len(full_names) <= 1) and (len(full_descriptors) <= 1): - tmpstr = "Failed to featurize %s\n"%(name) + print(f'full_names is {full_names} and full_descriptors is {full_descriptors}') + tmpstr = "Failed to featurize %s: Only zero or one total linkers identified.\n"%(name) write2file(path,"/FailedStructures.log",tmpstr) return full_names, full_descriptors diff --git a/molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py b/molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py index cdce3268..ede9df0d 100644 --- a/molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py +++ b/molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py @@ -316,7 +316,7 @@ def get_MOF_descriptors(data, depth, path=False, xyzpath = False): return full_names, full_descriptors distance_mat = compute_distance_matrix2(cell_v,cart_coords) try: - adj_matrix=compute_adj_matrix(distance_mat,allatomtypes) + adj_matrix, _ = compute_adj_matrix(distance_mat,allatomtypes) except NotImplementedError: full_names = [0] full_descriptors = [0] diff --git a/molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional_2.py b/molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional_2.py index 9fa8c276..13f58ec7 100644 --- a/molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional_2.py +++ b/molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional_2.py @@ -317,7 +317,7 @@ def get_MOF_descriptors(data, depth, path=False, xyzpath = False): return full_names, full_descriptors distance_mat = compute_distance_matrix2(cell_v,cart_coords) try: - adj_matrix=compute_adj_matrix(distance_mat,allatomtypes) + adj_matrix, _ = compute_adj_matrix(distance_mat,allatomtypes) except NotImplementedError: full_names = [0] full_descriptors = [0] diff --git a/molSimplify/Informatics/MOF/PBC_functions.py b/molSimplify/Informatics/MOF/PBC_functions.py index 15583974..266b0e74 100644 --- a/molSimplify/Informatics/MOF/PBC_functions.py +++ b/molSimplify/Informatics/MOF/PBC_functions.py @@ -12,6 +12,8 @@ metals, ) +# PBC: periodic boundary conditions + deg2rad = np.pi/180.0 def readcif(name): """ @@ -278,7 +280,7 @@ def XYZ_connected(cell,cart_coords,adj_mat): from scipy import sparse n_components, labels_components = sparse.csgraph.connected_components(csgraph=adj_mat, directed=False, return_labels=True) # print(n_components,'comp',labels_components) - tested_index = 0 # The label for the connected components. 0 indicates the first connected componet, etc. + tested_index = 0 # The label for the connected components. 0 indicates the first connected component, etc. index_counter = 0 while len(connected_components) < len(cart_coords): try: @@ -304,25 +306,22 @@ def XYZ_connected(cell,cart_coords,adj_mat): def writeXYZfcoords(filename,atoms,cell,fcoords): """ - TODO + Write an XYZ file using fractional coordinates. Parameters ---------- - TODO : TODO - TODO - TODO : TODO - TODO - TODO : TODO - TODO + filename : str + The path to where the xyz of the MOF structure will be written. + atoms : list of str + The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms. + cell : numpy.ndarray + The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3). + fcoords : numpy.ndarray + The fractional positions of the atoms of the cif file. Shape is (number of atoms, 3). Returns ------- - TODO : TODO - TODO - TODO : TODO - TODO - TODO : TODO - TODO + None """ with open(filename,"w") as fo: @@ -399,25 +398,20 @@ def returnXYZandGraph(filename,atoms,cell,fcoords,molgraph): def writeXYZcoords(filename,atoms,coords): """ - TODO + Write an XYZ file using Cartesian coordinates. Parameters ---------- - TODO : TODO - TODO - TODO : TODO - TODO - TODO : TODO - TODO + filename : str + The path to where the xyz of the MOF structure will be written. + atoms : list of str + The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms. + coords : numpy.ndarray + The Cartesian positions of the atoms of the cif file. Shape is (number of atoms, 3). Returns ------- - TODO : TODO - TODO - TODO : TODO - TODO - TODO : TODO - TODO + None """ with open(filename,"w") as fo: @@ -429,25 +423,18 @@ def writeXYZcoords(filename,atoms,coords): def writeXYZcoords_withcomment(filename,atoms,coords,comment): """ - TODO + Write an XYZ file using Cartesian coordinates, with a comment included. Parameters ---------- - TODO : TODO - TODO - TODO : TODO - TODO - TODO : TODO - TODO - - Returns - ------- - TODO : TODO - TODO - TODO : TODO - TODO - TODO : TODO - TODO + filename : str + The path to where the xyz of the MOF structure will be written. + atoms : list of str + The atom types of the cif file, indicated by periodic symbols like 'O' and 'Cu'. Length is the number of atoms. + coords : numpy.ndarray + The Cartesian positions of the atoms of the cif file. Shape is (number of atoms, 3). + comment : str + The comment to include in the XYZ file. """ with open(filename,"w") as fo: @@ -662,6 +649,58 @@ def compute_distance_matrix3(cell, cart_coords, num_cells = 1): distance_matrix = np.min(dist, axis=-1) # Consider the distance between two atoms at the crystal cell shift where they are closest. return distance_matrix +def position_nearest_atom(cell, cart_coords, index_of_interest, num_cells = 1): + """ + Computes the pairwise distances between all atoms in the crystal cell to the atom specified by index_of_interest; returns the position of the nearest atom. + + Parameters + ---------- + cell : numpy.ndarray + The three Cartesian vectors representing the edges of the crystal cell. Shape is (3,3). + cart_coords : numpy.ndarray + The Cartesian coordinates of the crystal atoms. Shape is (number of atoms, 3). + index_of_interest : int + The index of the atom to which we want to find the nearest atom's position. + num_cells : int + The number of crystal cells to put together for the evaluation of distances. + + Returns + ------- + nearest_position : numpy.ndarray + The Cartesian coordinates of the nearest atom. Shape is (3,). + nearest_index : numpy.int64 + The index of the nearest atom. + shift_for_nearest_atom : numpy.ndarray + The crystal cell shifts that position the nearest atom closest to the atom of interest. Shape is (3,). Will look something like [-1 0 -1] or [0 0 0] or etc. + + """ + pos = np.arange(-num_cells, num_cells+1, 1) # [-1, 0, 1] if num_cells is 1 + combos = np.array(np.meshgrid(pos, pos, pos)).T.reshape(-1,3) # The 27 combinations of -1, 0, 1 if num_cells is 1 + shifts = np.sum(np.expand_dims(cell, axis=0)*np.expand_dims(combos, axis=-1), axis=1) # The possible shifts by the crystal cell vectors. + # NxNxCells distance array + shifted = np.expand_dims(cart_coords, axis=1) + np.expand_dims(shifts, axis=0) # The shifted Cartesian coordinates. Shape is (number of atoms, number of combinations in combos, 3) + + # The distances between atoms, across different crystal cell shifts, for the three Cartesian dimensions. + dist = np.expand_dims(np.expand_dims(cart_coords[index_of_interest], axis=0), axis=0) - shifted # Shape is (number of atoms, number of combinations in combos, 3) + # The shape of np.expand_dims(np.expand_dims(cart_coords[index_of_interest], axis=0), axis=0) is (1, 1, 3). These are the coordinates of the atom of interest. + # numpy subtraction expands out the axes of length one for the subtraction. + + # The standard distance formula of square root of x^2 + y^2 + z^2 + dist = np.sqrt(np.sum(np.square(dist), axis=-1)) # Shape is (number of atoms, number of combinations in combos) + + # Want the atom that is closest to index_of_interest, given the ideal shift + # Don't want to consider distance of atom of interest to itself, so I eliminate it from consideration this way. + dist[index_of_interest,:] = np.array([np.Inf]*np.shape(dist)[1]) + # Find the index of the closest atom. + index_nearest_atom = np.argmin(dist) + index_nearest_atom = np.unravel_index(index_nearest_atom, np.shape(dist)) # This is (atom index, shift index) + + # Get the Cartesian coordinates of the nearest atom + nearest_position = shifted[index_nearest_atom[0], index_nearest_atom[1], :] + nearest_index = index_nearest_atom[0] + shift_for_nearest_atom = combos[index_nearest_atom[1],:] + + return nearest_position, nearest_index, shift_for_nearest_atom def make_graph_from_nodes_edges(nodes,edges,attribs): """ @@ -692,13 +731,13 @@ def make_graph_from_nodes_edges(nodes,edges,attribs): gr.add_edges_from(edges) return gr -def mkcell(params): +def mkcell(cpar): """ Update the cell representation to match the parameters. Parameters ---------- - params : numpy.ndarray + cpar : numpy.ndarray The parameters (i.e. lattice constants) of the MOF cell. Specifically, A, B, C, alpha, beta, and gamma. Shape is (6,). Returns @@ -708,8 +747,8 @@ def mkcell(params): """ - a_mag, b_mag, c_mag = params[:3] - alpha, beta, gamma = [x * deg2rad for x in params[3:]] # Converting the angles to radians from degrees. + a_mag, b_mag, c_mag = cpar[:3] + alpha, beta, gamma = [x * deg2rad for x in cpar[3:]] # Converting the angles to radians from degrees. a_vec = np.array([a_mag, 0.0, 0.0]) # a_vec is taken to be along the x axis b_vec = np.array([b_mag * np.cos(gamma), b_mag * np.sin(gamma), 0.0]) # See this depiction of lattice parameters for reasoning behind these equations. https://www.doitpoms.ac.uk/tlplib/crystallography3/parameters.php. b_vec is taken to be in the X-Y plane. c_x = c_mag * np.cos(beta) diff --git a/molSimplify/Informatics/MOF/cluster_extraction.py b/molSimplify/Informatics/MOF/cluster_extraction.py index c836ad6b..cd2dee20 100644 --- a/molSimplify/Informatics/MOF/cluster_extraction.py +++ b/molSimplify/Informatics/MOF/cluster_extraction.py @@ -167,7 +167,7 @@ def get_MOF_descriptors(data, depth, path=False, xyzpath = False): return None, None distance_mat = pbc_funs.compute_distance_matrix2(cell_v, cart_coords) try: - adj_matrix = pbc_funs.compute_adj_matrix(distance_mat, allatomtypes) + adj_matrix, _ = pbc_funs.compute_adj_matrix(distance_mat, allatomtypes) except NotImplementedError: tmpstr = "Failed to featurize %s: atomic overlap\n" % (name) pbc_funs.write2file(path, "/FailedStructures.log", tmpstr) diff --git a/molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py b/molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py index 454b9266..f18edcd7 100644 --- a/molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py +++ b/molSimplify/Informatics/MOF/fragment_MOFs_for_pormake.py @@ -545,7 +545,7 @@ def make_MOF_fragments(data, depth, path=False, xyzpath = False): return None, None distance_mat = compute_distance_matrix2(cell_v,cart_coords) try: - adj_matrix, _ =compute_adj_matrix(distance_mat,allatomtypes) + adj_matrix, _ = compute_adj_matrix(distance_mat,allatomtypes) except NotImplementedError: tmpstr = "Failed to featurize %s: atomic overlap\n"%(name) write2file(path,"/FailedStructures.log",tmpstr) diff --git a/molSimplify/Scripts/inparse.py b/molSimplify/Scripts/inparse.py index 1d0d701e..605eddc8 100644 --- a/molSimplify/Scripts/inparse.py +++ b/molSimplify/Scripts/inparse.py @@ -35,7 +35,7 @@ def checkinput(args, calctype="base"): args.core = ['fe'] # convert full element name to symbol for cores: if args.core[0].lower() in list(metals_conv.keys()): - print(('swithcing core from ' + str(args.core) + + print(('switching core from ' + str(args.core) + ' to ' + str(metals_conv[args.core[0].lower()]))) args.core = [metals_conv[args.core[0].lower()]] if (args.core[0][0].upper()+args.core[0][1:].lower() in elementsbynum): @@ -1111,8 +1111,8 @@ def parseinputs_basic(*p): parser.add_argument("-oxstate", help="oxidation state of the metal") parser.add_argument( "-coord", help="coordination such as 4,5,6", action="store_true") - parser.add_argument("-geometry", help="geometry", action="store_true") - parser.add_argument("-geo", help="geometry", action="store_true") + parser.add_argument("-geometry", help=f"geometry; currently available ones are {', '.join(getgeoms()[2])} which correspond to {', '.join(getgeoms()[1])}", action="store_true") + parser.add_argument("-geo", help=f"geometry; currently available ones are {', '.join(getgeoms()[2])} which correspond to {', '.join(getgeoms()[1])}", action="store_true") parser.add_argument("-lig", help="ligands to be included in complex; ligands.dict options display with command `molsimplify -h liganddict`") parser.add_argument( "-ligocc", help="number of corresponding ligands", action="store_true") # e.g. 1,2,1