From 461edda3b268772060954a609f23eb3f2571c006 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 30 May 2020 08:45:39 -0700 Subject: [PATCH 1/6] Break propka.calculations into parts. Broken into three files to simplify import analysis: * `calculations.py` - distance calculations and simple math * `energy.py` - energy calculations * `hydrogens.py` - hydrogen geometry calculations Working towards addressing https://github.com/jensengroup/propka-3.1/issues/49 --- propka/calculations.py | 877 ----------------------------------------- propka/determinants.py | 2 +- propka/energy.py | 531 +++++++++++++++++++++++++ propka/hydrogens.py | 346 ++++++++++++++++ propka/version.py | 51 +-- 5 files changed, 906 insertions(+), 901 deletions(-) create mode 100644 propka/energy.py create mode 100644 propka/hydrogens.py diff --git a/propka/calculations.py b/propka/calculations.py index 3770983..df8db5d 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -66,880 +66,3 @@ def get_smallest_distance(atoms1, atoms2): res_atom1 = atom1 res_atom2 = atom2 return [res_atom1, math.sqrt(res_dist), res_atom2] - - -# TODO - the next set of functions form a distinct "module" for hydrogen addition - - -def setup_bonding_and_protonation(parameters, molecular_container): - """Set up bonding and protonation for a molecule. - - NOTE - the unused `parameters` argument is required for compatibility in version.py - TODO - figure out why there is a similar function in version.py - - Args: - parameters: not used - molecular_container: molecule container. - """ - # make bonds - my_bond_maker = setup_bonding(molecular_container) - # set up ligand atom names - set_ligand_atom_names(molecular_container) - # apply information on pi electrons - my_bond_maker.add_pi_electron_information(molecular_container) - # Protonate atoms - if molecular_container.options.protonate_all: - PROTONATOR = propka.protonate.Protonate(verbose=False) - PROTONATOR.protonate(molecular_container) - - -def setup_bonding(molecular_container): - """Set up bonding for a molecular container. - - Args: - molecular_container: the molecular container in question - Returns: - BondMaker object - """ - my_bond_maker = propka.bonds.BondMaker() - my_bond_maker.find_bonds_for_molecules_using_boxes(molecular_container) - return my_bond_maker - - -def setup_bonding_and_protonation_30_style(parameters, molecular_container): - """Set up bonding for a molecular container. - - Args: - parameters: parameters for calculation - molecular_container: the molecular container in question - Returns: - BondMaker object - """ - # Protonate atoms - protonate_30_style(molecular_container) - # make bonds - bond_maker = propka.bonds.BondMaker() - bond_maker.find_bonds_for_molecules_using_boxes(molecular_container) - return bond_maker - - -def protonate_30_style(molecular_container): - """Protonate the molecule. - - Args: - molecular_container: molecule - """ - for name in molecular_container.conformation_names: - info('Now protonating', name) - # split atom into residues - curres = -1000000 - residue = [] - o_atom = None - c_atom = None - for atom in molecular_container.conformations[name].atoms: - if atom.res_num != curres: - curres = atom.res_num - if len(residue) > 0: - #backbone - [o_atom, c_atom] = add_backbone_hydrogen( - residue, o_atom, c_atom) - #arginine - if residue[0].res_name == 'ARG': - add_arg_hydrogen(residue) - #histidine - if residue[0].res_name == 'HIS': - add_his_hydrogen(residue) - #tryptophan - if residue[0].res_name == 'TRP': - add_trp_hydrogen(residue) - #amides - if residue[0].res_name in ['GLN', 'ASN']: - add_amd_hydrogen(residue) - residue = [] - if atom.type == 'atom': - residue.append(atom) - - -def set_ligand_atom_names(molecular_container): - """Set names for ligands in molecular container. - - Args: - molecular_container: molecular container for ligand names - """ - for name in molecular_container.conformation_names: - molecular_container.conformations[name].set_ligand_atom_names() - - -def add_arg_hydrogen(residue): - """Adds Arg hydrogen atoms to residues according to the 'old way'. - - Args: - residue: arginine residue to protonate - Returns: - list of hydrogen atoms - """ - #info('Adding arg H',residue) - for atom in residue: - if atom.name == "CD": - cd_atom = atom - elif atom.name == "CZ": - cz_atom = atom - elif atom.name == "NE": - ne_atom = atom - elif atom.name == "NH1": - nh1_atom = atom - elif atom.name == "NH2": - nh2_atom = atom - h1_atom = protonate_sp2(cd_atom, ne_atom, cz_atom) - h1_atom.name = "HE" - h2_atom = protonate_direction(nh1_atom, ne_atom, cz_atom) - h2_atom.name = "HN1" - h3_atom = protonate_direction(nh1_atom, ne_atom, cd_atom) - h3_atom.name = "HN2" - h4_atom = protonate_direction(nh2_atom, ne_atom, cz_atom) - h4_atom.name = "HN3" - h5_atom = protonate_direction(nh2_atom, ne_atom, h1_atom) - h5_atom.name = "HN4" - return [h1_atom, h2_atom, h3_atom, h4_atom, h5_atom] - - -def add_his_hydrogen(residue): - """Adds His hydrogen atoms to residues according to the 'old way'. - - Args: - residue: histidine residue to protonate - """ - for atom in residue: - if atom.name == "CG": - cg_atom = atom - elif atom.name == "ND1": - nd_atom = atom - elif atom.name == "CD2": - cd_atom = atom - elif atom.name == "CE1": - ce_atom = atom - elif atom.name == "NE2": - ne_atom = atom - hd_atom = protonate_sp2(cg_atom, nd_atom, ce_atom) - hd_atom.name = "HND" - he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom) - he_atom.name = "HNE" - - -def add_trp_hydrogen(residue): - """Adds Trp hydrogen atoms to residues according to the 'old way'. - - Args: - residue: tryptophan residue to protonate - """ - cd_atom = None - ne_atom = None - for atom in residue: - if atom.name == "CD1": - cd_atom = atom - elif atom.name == "NE1": - ne_atom = atom - elif atom.name == "CE2": - ce_atom = atom - if (cd_atom is None) or (ne_atom is None) or (ce_atom is None): - str_ = "Unable to find all atoms for {0:s} {1:s}".format( - residue[0].res_name, residue[0].res_num) - raise ValueError(str_) - he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom) - he_atom.name = "HNE" - - -def add_amd_hydrogen(residue): - """Adds Gln & Asn hydrogen atoms to residues according to the 'old way'. - - Args: - residue: glutamine or asparagine residue to protonate - """ - c_atom = None - o_atom = None - n_atom = None - for atom in residue: - if ((atom.res_name == "GLN" and atom.name == "CD") - or (atom.res_name == "ASN" and atom.name == "CG")): - c_atom = atom - elif ((atom.res_name == "GLN" and atom.name == "OE1") - or (atom.res_name == "ASN" and atom.name == "OD1")): - o_atom = atom - elif ((atom.res_name == "GLN" and atom.name == "NE2") - or (atom.res_name == "ASN" and atom.name == "ND2")): - n_atom = atom - if (c_atom is None) or (o_atom is None) or (n_atom is None): - str_ = "Unable to find all atoms for {0:s} {1:s}".format( - residue[0].res_name, residue[0].res_num) - raise ValueError(str_) - h1_atom = protonate_direction(n_atom, o_atom, c_atom) - h1_atom.name = "HN1" - h2_atom = protonate_average_direction(n_atom, c_atom, o_atom) - h2_atom.name = "HN2" - - -def add_backbone_hydrogen(residue, o_atom, c_atom): - """Adds hydrogen backbone atoms to residues according to the old way. - - dR is wrong for the N-terminus (i.e. first residue) but it doesn't affect - anything at the moment. Could be improved, but works for now. - - Args: - residue: residue to protonate - o_atom: backbone oxygen atom - c_atom: backbone carbon atom - Returns: - [new backbone oxygen atom, new backbone carbon atom] - """ - new_c_atom = None - new_o_atom = None - n_atom = None - for atom in residue: - if atom.name == "N": - n_atom = atom - if atom.name == "C": - new_c_atom = atom - if atom.name == "O": - new_o_atom = atom - if None in [c_atom, o_atom, n_atom]: - return [new_o_atom, new_c_atom] - if n_atom.res_name == "PRO": - # PRO doesn't have an H-atom; do nothing - pass - else: - h_atom = protonate_direction(n_atom, o_atom, c_atom) - h_atom.name = "H" - return [new_o_atom, new_c_atom] - - -def protonate_direction(x1_atom, x2_atom, x3_atom): - """Protonates an atom, x1_atom, given a direction. - - New direction for x1_atom proton is (x2_atom -> x3_atom). - - Args: - x1_atom: atom to be protonated - x2_atom: atom for direction - x3_atom: other atom for direction - Returns: - new hydrogen atom - """ - dx = (x3_atom.x - x2_atom.x) - dy = (x3_atom.y - x2_atom.y) - dz = (x3_atom.z - x2_atom.z) - length = math.sqrt(dx*dx + dy*dy + dz*dz) - x = x1_atom.x + dx/length - y = x1_atom.y + dy/length - z = x1_atom.z + dz/length - h_atom = make_new_h(x1_atom, x, y, z) - h_atom.name = "H" - return h_atom - - -def protonate_average_direction(x1_atom, x2_atom, x3_atom): - """Protonates an atom, x1_atom, given a direction. - - New direction for x1_atom is (x1_atom/x2_atom -> x3_atom). - Note, this one uses the average of x1_atom & x2_atom (N & O) unlike - the previous N - C = O - - Args: - x1_atom: atom to be protonated - x2_atom: atom for direction - x3_atom: other atom for direction - Returns: - new hydrogen atom - """ - dx = (x3_atom.x + x1_atom.x)*0.5 - x2_atom.x - dy = (x3_atom.y + x1_atom.y)*0.5 - x2_atom.y - dz = (x3_atom.z + x1_atom.z)*0.5 - x2_atom.z - length = math.sqrt(dx*dx + dy*dy + dz*dz) - x = x1_atom.x + dx/length - y = x1_atom.y + dy/length - z = x1_atom.z + dz/length - h_atom = make_new_h(x1_atom, x, y, z) - h_atom.name = "H" - return h_atom - - -def protonate_sp2(x1_atom, x2_atom, x3_atom): - """Protonates a SP2 atom, given a list of atoms - - Args: - x1_atom: atom to set direction - x2_atom: atom to be protonated - x3_atom: other atom to set direction - Returns: - new hydrogen atom - """ - dx = (x1_atom.x + x3_atom.x)*0.5 - x2_atom.x - dy = (x1_atom.y + x3_atom.y)*0.5 - x2_atom.y - dz = (x1_atom.z + x3_atom.z)*0.5 - x2_atom.z - length = math.sqrt(dx*dx + dy*dy + dz*dz) - x = x2_atom.x - dx/length - y = x2_atom.y - dy/length - z = x2_atom.z - dz/length - h_atom = make_new_h(x2_atom, x, y, z) - h_atom.name = "H" - return h_atom - - -def make_new_h(atom, x, y, z): - """Add a new hydrogen to an atom at the specified position. - - Args: - atom: atom to protonate - x: x position of hydrogen - y: y position of hydrogen - z: z position of hydrogen - Returns: - new hydrogen atom - """ - new_h = propka.atom.Atom() - new_h.set_property( - numb=None, name='H{0:s}'.format(atom.name[1:]), - res_name=atom.res_name, chain_id=atom.chain_id, - res_num=atom.res_num, x=x, y=y, z=z, occ=None, beta=None) - new_h.element = 'H' - new_h.bonded_atoms = [atom] - new_h.charge = 0 - new_h.steric_number = 0 - new_h.number_of_lone_pairs = 0 - new_h.number_of_protons_to_add = 0 - new_h.num_pi_elec_2_3_bonds = 0 - atom.bonded_atoms.append(new_h) - atom.conformation_container.add_atom(new_h) - return new_h - - -# TODO - the remaining functions form a distinct "module" for desolvation - - -# TODO - I have no idea what these constants mean so I labeled them "UNK_" -UNK_MIN_DISTANCE = 2.75 -MIN_DISTANCE_4TH = math.pow(UNK_MIN_DISTANCE, 4) -UNK_DIELECTRIC1 = 160 -UNK_DIELECTRIC2 = 30 -UNK_PKA_SCALING1 = 244.12 -UNK_BACKBONE_DISTANCE1 = 6.0 -UNK_BACKBONE_DISTANCE2 = 3.0 -UNK_FANGLE_MIN = 0.001 -UNK_PKA_SCALING2 = 0.8 -COMBINED_NUM_BURIED_MAX = 900 -SEPARATE_NUM_BURIED_MAX = 400 - - -def radial_volume_desolvation(parameters, group): - """Calculate desolvation terms for group. - - Args: - parameters: parameters for desolvation calculation - group: group of atoms for calculation - """ - all_atoms = group.atom.conformation_container.get_non_hydrogen_atoms() - volume = 0.0 - # TODO - Nathan really wants to rename the num_volume variable. - # He had to re-read the original paper to figure out what it was. - # A better name would be num_volume. - group.num_volume = 0 - min_dist_4th = MIN_DISTANCE_4TH - for atom in all_atoms: - # ignore atoms in the same residue - if (atom.res_num == group.atom.res_num - and atom.chain_id == group.atom.chain_id): - continue - sq_dist = squared_distance(group, atom) - # desolvation - if sq_dist < parameters.desolv_cutoff_squared: - # use a default relative volume of 1.0 if the volume of the element - # is not found in parameters - # insert check for methyl groups - if atom.element == 'C' and atom.name not in ['CA', 'C']: - dvol = parameters.VanDerWaalsVolume['C4'] - else: - dvol = parameters.VanDerWaalsVolume.get(atom.element, 1.0) - dv_inc = dvol/max(min_dist_4th, sq_dist*sq_dist) - volume += dv_inc - # buried - if sq_dist < parameters.buried_cutoff_squared: - group.num_volume += 1 - group.buried = calculate_weight(parameters, group.num_volume) - scale_factor = calculate_scale_factor(parameters, group.buried) - volume_after_allowance = max(0.00, volume-parameters.desolvationAllowance) - group.energy_volume = ( - group.charge * parameters.desolvationPrefactor - * volume_after_allowance * scale_factor) - - -def calculate_scale_factor(parameters, weight): - """Calculate desolvation scaling factor. - - Args: - parameters: parameters for desolvation calculation - weight: weight for scaling factor - Returns: - scaling factor - """ - scale_factor = 1.0 - (1.0 - parameters.desolvationSurfaceScalingFactor)*(1.0 - weight) - return scale_factor - - -def calculate_weight(parameters, num_volume): - """Calculate the atom-based desolvation weight. - - TODO - figure out why a similar function exists in version.py - - Args: - parameters: parameters for desolvation calculation - num_volume: number of heavy atoms within desolvation calculation volume - Returns: - desolvation weight - """ - weight = ( - float(num_volume - parameters.Nmin) - / float(parameters.Nmax - parameters.Nmin)) - weight = min(1.0, weight) - weight = max(0.0, weight) - return weight - - -def calculate_pair_weight(parameters, num_volume1, num_volume2): - """Calculate the atom-pair based desolvation weight. - - Args: - num_volume1: number of heavy atoms within first desolvation volume - num_volume2: number of heavy atoms within second desolvation volume - Returns: - desolvation weight - """ - num_volume = num_volume1 + num_volume2 - num_min = 2*parameters.Nmin - num_max = 2*parameters.Nmax - weight = float(num_volume - num_min)/float(num_max - num_min) - weight = min(1.0, weight) - weight = max(0.0, weight) - return weight - - -def hydrogen_bond_energy(dist, dpka_max, cutoffs, f_angle=1.0): - """Calculate hydrogen-bond interaction pKa shift. - - Args: - dist: distance for hydrogen bond - dpka_max: maximum pKa value shift - cutoffs: array with max and min distance values - f_angle: angle scaling factor - Returns: - pKa shift value - """ - if dist < cutoffs[0]: - value = 1.00 - elif dist > cutoffs[1]: - value = 0.00 - else: - value = 1.0 - (dist - cutoffs[0])/(cutoffs[1] - cutoffs[0]) - dpka = dpka_max*value*f_angle - return abs(dpka) - - -def angle_distance_factors(atom1=None, atom2=None, atom3=None, center=None): - """Calculate distance and angle factors for three atoms for backbone interactions. - - NOTE - you need to use atom1 to be the e.g. ASP atom if distance is reset at - return: [O1 -- H2-N3]. - - Also generalized to be able to be used for residue 'centers' for C=O COO interactions. - - Args: - atom1: first atom for calculation (could be None) - atom2: second atom for calculation - atom3: third atom for calculation - center: center point between atoms 1 and 2 - Returns - [distance factor between atoms 1 and 2, - angle factor, - distance factor between atoms 2 and 3] - """ - # The angle factor - # - # ---closest_atom1/2 - # . - # . - # the_hydrogen---closest_atom2/1--- - dx_32 = atom2.x - atom3.x - dy_32 = atom2.y - atom3.y - dz_32 = atom2.z - atom3.z - dist_23 = math.sqrt(dx_32 * dx_32 + dy_32 * dy_32 + dz_32 * dz_32) - dx_32 = dx_32/dist_23 - dy_32 = dy_32/dist_23 - dz_32 = dz_32/dist_23 - if atom1 is None: - dx_21 = center[0] - atom2.x - dy_21 = center[1] - atom2.y - dz_21 = center[2] - atom2.z - else: - dx_21 = atom1.x - atom2.x - dy_21 = atom1.y - atom2.y - dz_21 = atom1.z - atom2.z - dist_12 = math.sqrt(dx_21 * dx_21 + dy_21 * dy_21 + dz_21 * dz_21) - dx_21 = dx_21/dist_12 - dy_21 = dy_21/dist_12 - dz_21 = dz_21/dist_12 - f_angle = dx_21*dx_32 + dy_21*dy_32 + dz_21*dz_32 - return dist_12, f_angle, dist_23 - - -def hydrogen_bond_interaction(group1, group2, version): - """Calculate energy for hydrogen bond interactions between two groups. - - Args: - group1: first interacting group - group2: second interacting group - version: an object that contains version-specific parameters - Returns: - hydrogen bond interaction energy - """ - # find the smallest distance between interacting atoms - atoms1 = group1.get_interaction_atoms(group2) - atoms2 = group2.get_interaction_atoms(group1) - [closest_atom1, dist, closest_atom2] = get_smallest_distance(atoms1, atoms2) - if None in [closest_atom1, closest_atom2]: - warning( - 'Side chain interaction failed for {0:s} and {1:s}'.format( - group1.label, group2.label)) - return None - # get the parameters - [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1, - closest_atom2) - if (dpka_max is None) or (None in cutoff): - return None - # check that the closest atoms are close enough - if dist >= cutoff[1]: - return None - # check that bond distance criteria is met - min_hbond_dist = version.parameters.min_bond_distance_for_hydrogen_bonds - if group1.atom.is_atom_within_bond_distance(group2.atom, min_hbond_dist, 1): - return None - # set angle factor - f_angle = 1.0 - if group2.type in version.parameters.angular_dependent_sidechain_interactions: - if closest_atom2.element == 'H': - heavy_atom = closest_atom2.bonded_atoms[0] - hydrogen = closest_atom2 - dist, f_angle, _ = angle_distance_factors(closest_atom1, hydrogen, - heavy_atom) - else: - # Either the structure is corrupt (no hydrogen), or the heavy atom - # is closer to the titratable atom than the hydrogen. In either - # case, we set the angle factor to 0 - f_angle = 0.0 - elif group1.type in version.parameters.angular_dependent_sidechain_interactions: - if closest_atom1.element == 'H': - heavy_atom = closest_atom1.bonded_atoms[0] - hydrogen = closest_atom1 - dist, f_angle, _ = angle_distance_factors(closest_atom2, hydrogen, - heavy_atom) - else: - # Either the structure is corrupt (no hydrogen), or the heavy atom - # is closer to the titratable atom than the hydrogen. In either - # case, we set the angle factor to 0 - f_angle = 0.0 - weight = version.calculate_pair_weight(group1.num_volume, group2.num_volume) - exception, value = version.check_exceptions(group1, group2) - if exception: - # Do nothing, value should have been assigned. - pass - else: - value = version.calculate_side_chain_energy( - dist, dpka_max, cutoff, weight, f_angle) - return value - - -def electrostatic_interaction(group1, group2, dist, version): - """Calculate electrostatic interaction betwee two groups. - - Args: - group1: first interacting group - group2: second interacting group - dist: distance between groups - version: version-specific object with parameters and functions - Returns: - electrostatic interaction energy or None (if no interaction is appropriate) - """ - # check if we should do coulomb interaction at all - if not version.check_coulomb_pair(group1, group2, dist): - return None - weight = version.calculate_pair_weight(group1.num_volume, group2.num_volume) - value = version.calculate_coulomb_energy(dist, weight) - return value - - -def check_coulomb_pair(parameters, group1, group2, dist): - """Checks if this Coulomb interaction should be done. - - NOTE - this is a propka2.0 hack - TODO - figure out why a similar function exists in version.py - - Args: - parameters: parameters for Coulomb calculations - group1: first interacting group - group2: second interacting group - dist: distance between groups - Returns: - Boolean - """ - num_volume = group1.num_volume + group2.num_volume - do_coulomb = True - # check if both groups are titratable (ions are taken care of in - # determinants::set_ion_determinants) - if not (group1.titratable and group2.titratable): - do_coulomb = False - # check if the distance is not too big - if dist > parameters.coulomb_cutoff2: - do_coulomb = False - # check that desolvation is ok - if num_volume < parameters.Nmin: - do_coulomb = False - return do_coulomb - - -def coulomb_energy(dist, weight, parameters): - """Calculates the Coulomb interaction pKa shift based on Coulomb's law. - - Args: - dist: distance for electrostatic interaction - weight: scaling of dielectric constant - parameters: parameter object for calculation - Returns: - pKa shift - """ - diel = UNK_DIELECTRIC1 - (UNK_DIELECTRIC1 - UNK_DIELECTRIC2)*weight - dist = max(dist, parameters.coulomb_cutoff1) - scale = ( - (dist - parameters.coulomb_cutoff2) - / (parameters.coulomb_cutoff1 - parameters.coulomb_cutoff2)) - scale = max(0.0, scale) - scale = min(1.0, scale) - dpka = UNK_PKA_SCALING1/(diel*dist)*scale - return abs(dpka) - - -def backbone_reorganization(_, conformation): - """Perform calculations related to backbone reorganizations. - - NOTE - this was described in the code as "adding test stuff" - NOTE - this function does not appear to be used - TODO - figure out why a similar function exists in version.py - - Args: - _: not used - conformation: specific molecule conformation - """ - titratable_groups = conformation.get_backbone_reorganisation_groups() - bbc_groups = conformation.get_backbone_co_groups() - - for titratable_group in titratable_groups: - weight = titratable_group.buried - dpka = 0.00 - for bbc_group in bbc_groups: - center = [titratable_group.x, titratable_group.y, titratable_group.z] - atom2 = bbc_group.get_interaction_atoms(titratable_group)[0] - dist, f_angle, _ = angle_distance_factors(atom2=atom2, - atom3=bbc_group.atom, - center=center) - if dist < UNK_BACKBONE_DISTANCE1 and f_angle > UNK_FANGLE_MIN: - value = ( - 1.0 - (dist-UNK_BACKBONE_DISTANCE2) - / (UNK_BACKBONE_DISTANCE1-UNK_BACKBONE_DISTANCE2)) - dpka += UNK_PKA_SCALING2 * min(1.0, value) - titratable_group.energy_local = dpka*weight - - -def check_exceptions(version, group1, group2): - """Checks for atypical behavior in interactions between two groups. - Checks are made based on group type. - - TODO - figure out why a similar function exists in version.py - - Args: - version: version object - group1: first group for check - group2: second group for check - Returns: - 1. Boolean indicating atypical behavior, - 2. value associated with atypical interaction (None if Boolean is False) - """ - res_type1 = group1.type - res_type2 = group2.type - if (res_type1 == "COO") and (res_type2 == "ARG"): - exception, value = check_coo_arg_exception(group1, group2, version) - elif (res_type1 == "ARG") and (res_type2 == "COO"): - exception, value = check_coo_arg_exception(group2, group1, version) - elif (res_type1 == "COO") and (res_type2 == "COO"): - exception, value = check_coo_coo_exception(group1, group2, version) - elif (res_type1 == "CYS") and (res_type2 == "CYS"): - exception, value = check_cys_cys_exception(group1, group2, version) - elif ((res_type1 == "COO") and (res_type2 == "HIS") - or (res_type1 == "HIS") and (res_type2 == "COO")): - exception, value = check_coo_his_exception(group1, group2, version) - elif ((res_type1 == "OCO") and (res_type2 == "HIS") - or (res_type1 == "HIS") and (res_type2 == "OCO")): - exception, value = check_oco_his_exception(group1, group2, version) - elif ((res_type1 == "CYS") and (res_type2 == "HIS") - or (res_type1 == "HIS") and (res_type2 == "CYS")): - exception, value = check_cys_his_exception(group1, group2, version) - else: - # do nothing, no exception for this pair - exception = False - value = None - return exception, value - - -def check_coo_arg_exception(group_coo, group_arg, version): - """Check for COO-ARG interaction atypical behavior. - - Uses the two shortest unique distances (involving 2+2 atoms) - - Args: - group_coo: COO group - group_arg: ARG group - version: version object - Returns: - 1. Boolean indicating atypical behavior, - 2. value associated with atypical interaction (None if Boolean is False) - """ - exception = True - value_tot = 0.00 - # needs to be this way since you want to find shortest distance first - atoms_coo = [] - atoms_coo.extend(group_coo.get_interaction_atoms(group_arg)) - atoms_arg = [] - atoms_arg.extend(group_arg.get_interaction_atoms(group_coo)) - for _ in ["shortest", "runner-up"]: - # find the closest interaction pair - [closest_coo_atom, dist, closest_arg_atom] = get_smallest_distance(atoms_coo, - atoms_arg) - [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_coo_atom, - closest_arg_atom) - # calculate and sum up interaction energy - f_angle = 1.00 - if group_arg.type in version.parameters.angular_dependent_sidechain_interactions: - atom3 = closest_arg_atom.bonded_atoms[0] - dist, f_angle, _ = angle_distance_factors(closest_coo_atom, - closest_arg_atom, - atom3) - value = hydrogen_bond_energy(dist, dpka_max, cutoff, f_angle) - value_tot += value - # remove closest atoms before we attemp to find the runner-up pair - atoms_coo.remove(closest_coo_atom) - atoms_arg.remove(closest_arg_atom) - return exception, value_tot - - -def check_coo_coo_exception(group1, group2, version): - """Check for COO-COO hydrogen-bond atypical interaction behavior. - - Args: - group1: first group for check - group2: second group for check - version: version object - Returns: - 1. Boolean indicating atypical behavior, - 2. value associated with atypical interaction (None if Boolean is False) - """ - exception = True - interact_groups12 = group1.get_interaction_atoms(group2) - interact_groups21 = group2.get_interaction_atoms(group1) - [closest_atom1, dist, closest_atom2] = get_smallest_distance(interact_groups12, - interact_groups21) - [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1, - closest_atom2) - f_angle = 1.00 - value = hydrogen_bond_energy(dist, dpka_max, cutoff, f_angle) - weight = calculate_pair_weight(version.parameters, group1.num_volume, group2.num_volume) - value = value * (1.0 + weight) - return exception, value - - -def check_coo_his_exception(group1, group2, version): - """Check for COO-HIS atypical interaction behavior - - Args: - group1: first group for check - group2: second group for check - version: version object - Returns: - 1. Boolean indicating atypical behavior, - 2. value associated with atypical interaction (None if Boolean is False) - """ - exception = False - if check_buried(group1.num_volume, group2.num_volume): - exception = True - return exception, version.parameters.COO_HIS_exception - - -def check_oco_his_exception(group1, group2, version): - """Check for OCO-HIS atypical interaction behavior - - Args: - group1: first group for check - group2: second group for check - version: version object - Returns: - 1. Boolean indicating atypical behavior, - 2. value associated with atypical interaction (None if Boolean is False) - """ - exception = False - if check_buried(group1.num_volume, group2.num_volume): - exception = True - return exception, version.parameters.OCO_HIS_exception - - -def check_cys_his_exception(group1, group2, version): - """Check for CYS-HIS atypical interaction behavior - - Args: - group1: first group for check - group2: second group for check - version: version object - Returns: - 1. Boolean indicating atypical behavior, - 2. value associated with atypical interaction (None if Boolean is False) - """ - exception = False - if check_buried(group1.num_volume, group2.num_volume): - exception = True - return exception, version.parameters.CYS_HIS_exception - - -def check_cys_cys_exception(group1, group2, version): - """Check for CYS-CYS atypical interaction behavior - - Args: - group1: first group for check - group2: second group for check - version: version object - Returns: - 1. Boolean indicating atypical behavior, - 2. value associated with atypical interaction (None if Boolean is False) - """ - exception = False - if check_buried(group1.num_volume, group2.num_volume): - exception = True - return exception, version.parameters.CYS_CYS_exception - - -def check_buried(num_volume1, num_volume2): - """Check to see if an interaction is buried - - Args: - num_volume1: number of buried heavy atoms in volume 1 - num_volume2: number of buried heavy atoms in volume 2 - Returns: - True if interaction is buried, False otherwise - """ - if ((num_volume1 + num_volume2 <= COMBINED_NUM_BURIED_MAX) - and (num_volume1 <= SEPARATE_NUM_BURIED_MAX - or num_volume2 <= SEPARATE_NUM_BURIED_MAX)): - return False - return True diff --git a/propka/determinants.py b/propka/determinants.py index 9a82e33..7dc8ee3 100644 --- a/propka/determinants.py +++ b/propka/determinants.py @@ -8,7 +8,7 @@ import propka.lib import propka.vector_algebra from propka.calculations import squared_distance, get_smallest_distance -from propka.calculations import angle_distance_factors, hydrogen_bond_energy +from propka.energy import angle_distance_factors, hydrogen_bond_energy from propka.determinant import Determinant diff --git a/propka/energy.py b/propka/energy.py new file mode 100644 index 0000000..631c1fc --- /dev/null +++ b/propka/energy.py @@ -0,0 +1,531 @@ +"""Energy calculations.""" +import math +from propka.lib import warning +from propka.calculations import squared_distance, get_smallest_distance + + +# TODO - I have no idea what these constants mean so I labeled them "UNK_" +UNK_MIN_DISTANCE = 2.75 +MIN_DISTANCE_4TH = math.pow(UNK_MIN_DISTANCE, 4) +UNK_DIELECTRIC1 = 160 +UNK_DIELECTRIC2 = 30 +UNK_PKA_SCALING1 = 244.12 +UNK_BACKBONE_DISTANCE1 = 6.0 +UNK_BACKBONE_DISTANCE2 = 3.0 +UNK_FANGLE_MIN = 0.001 +UNK_PKA_SCALING2 = 0.8 +COMBINED_NUM_BURIED_MAX = 900 +SEPARATE_NUM_BURIED_MAX = 400 + + +def radial_volume_desolvation(parameters, group): + """Calculate desolvation terms for group. + + Args: + parameters: parameters for desolvation calculation + group: group of atoms for calculation + """ + all_atoms = group.atom.conformation_container.get_non_hydrogen_atoms() + volume = 0.0 + group.num_volume = 0 + min_dist_4th = MIN_DISTANCE_4TH + for atom in all_atoms: + # ignore atoms in the same residue + if (atom.res_num == group.atom.res_num + and atom.chain_id == group.atom.chain_id): + continue + sq_dist = squared_distance(group, atom) + # desolvation + if sq_dist < parameters.desolv_cutoff_squared: + # use a default relative volume of 1.0 if the volume of the element + # is not found in parameters + # insert check for methyl groups + if atom.element == 'C' and atom.name not in ['CA', 'C']: + dvol = parameters.VanDerWaalsVolume['C4'] + else: + dvol = parameters.VanDerWaalsVolume.get(atom.element, 1.0) + dv_inc = dvol/max(min_dist_4th, sq_dist*sq_dist) + volume += dv_inc + # buried + if sq_dist < parameters.buried_cutoff_squared: + group.num_volume += 1 + group.buried = calculate_weight(parameters, group.num_volume) + scale_factor = calculate_scale_factor(parameters, group.buried) + volume_after_allowance = max(0.00, volume-parameters.desolvationAllowance) + group.energy_volume = ( + group.charge * parameters.desolvationPrefactor + * volume_after_allowance * scale_factor) + + +def calculate_scale_factor(parameters, weight): + """Calculate desolvation scaling factor. + + Args: + parameters: parameters for desolvation calculation + weight: weight for scaling factor + Returns: + scaling factor + """ + scale_factor = 1.0 - (1.0 - parameters.desolvationSurfaceScalingFactor)*(1.0 - weight) + return scale_factor + + +def calculate_weight(parameters, num_volume): + """Calculate the atom-based desolvation weight. + + TODO - figure out why a similar function exists in version.py + + Args: + parameters: parameters for desolvation calculation + num_volume: number of heavy atoms within desolvation calculation volume + Returns: + desolvation weight + """ + weight = ( + float(num_volume - parameters.Nmin) + / float(parameters.Nmax - parameters.Nmin)) + weight = min(1.0, weight) + weight = max(0.0, weight) + return weight + + +def calculate_pair_weight(parameters, num_volume1, num_volume2): + """Calculate the atom-pair based desolvation weight. + + Args: + num_volume1: number of heavy atoms within first desolvation volume + num_volume2: number of heavy atoms within second desolvation volume + Returns: + desolvation weight + """ + num_volume = num_volume1 + num_volume2 + num_min = 2*parameters.Nmin + num_max = 2*parameters.Nmax + weight = float(num_volume - num_min)/float(num_max - num_min) + weight = min(1.0, weight) + weight = max(0.0, weight) + return weight + + +def hydrogen_bond_energy(dist, dpka_max, cutoffs, f_angle=1.0): + """Calculate hydrogen-bond interaction pKa shift. + + Args: + dist: distance for hydrogen bond + dpka_max: maximum pKa value shift + cutoffs: array with max and min distance values + f_angle: angle scaling factor + Returns: + pKa shift value + """ + if dist < cutoffs[0]: + value = 1.00 + elif dist > cutoffs[1]: + value = 0.00 + else: + value = 1.0 - (dist - cutoffs[0])/(cutoffs[1] - cutoffs[0]) + dpka = dpka_max*value*f_angle + return abs(dpka) + + +def angle_distance_factors(atom1=None, atom2=None, atom3=None, center=None): + """Calculate distance and angle factors for three atoms for backbone interactions. + + NOTE - you need to use atom1 to be the e.g. ASP atom if distance is reset at + return: [O1 -- H2-N3]. + + Also generalized to be able to be used for residue 'centers' for C=O COO interactions. + + Args: + atom1: first atom for calculation (could be None) + atom2: second atom for calculation + atom3: third atom for calculation + center: center point between atoms 1 and 2 + Returns + [distance factor between atoms 1 and 2, + angle factor, + distance factor between atoms 2 and 3] + """ + # The angle factor + # + # ---closest_atom1/2 + # . + # . + # the_hydrogen---closest_atom2/1--- + dx_32 = atom2.x - atom3.x + dy_32 = atom2.y - atom3.y + dz_32 = atom2.z - atom3.z + dist_23 = math.sqrt(dx_32 * dx_32 + dy_32 * dy_32 + dz_32 * dz_32) + dx_32 = dx_32/dist_23 + dy_32 = dy_32/dist_23 + dz_32 = dz_32/dist_23 + if atom1 is None: + dx_21 = center[0] - atom2.x + dy_21 = center[1] - atom2.y + dz_21 = center[2] - atom2.z + else: + dx_21 = atom1.x - atom2.x + dy_21 = atom1.y - atom2.y + dz_21 = atom1.z - atom2.z + dist_12 = math.sqrt(dx_21 * dx_21 + dy_21 * dy_21 + dz_21 * dz_21) + dx_21 = dx_21/dist_12 + dy_21 = dy_21/dist_12 + dz_21 = dz_21/dist_12 + f_angle = dx_21*dx_32 + dy_21*dy_32 + dz_21*dz_32 + return dist_12, f_angle, dist_23 + + +def hydrogen_bond_interaction(group1, group2, version): + """Calculate energy for hydrogen bond interactions between two groups. + + Args: + group1: first interacting group + group2: second interacting group + version: an object that contains version-specific parameters + Returns: + hydrogen bond interaction energy + """ + # find the smallest distance between interacting atoms + atoms1 = group1.get_interaction_atoms(group2) + atoms2 = group2.get_interaction_atoms(group1) + [closest_atom1, dist, closest_atom2] = get_smallest_distance(atoms1, atoms2) + if None in [closest_atom1, closest_atom2]: + warning( + 'Side chain interaction failed for {0:s} and {1:s}'.format( + group1.label, group2.label)) + return None + # get the parameters + [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1, + closest_atom2) + if (dpka_max is None) or (None in cutoff): + return None + # check that the closest atoms are close enough + if dist >= cutoff[1]: + return None + # check that bond distance criteria is met + min_hbond_dist = version.parameters.min_bond_distance_for_hydrogen_bonds + if group1.atom.is_atom_within_bond_distance(group2.atom, min_hbond_dist, 1): + return None + # set angle factor + f_angle = 1.0 + if group2.type in version.parameters.angular_dependent_sidechain_interactions: + if closest_atom2.element == 'H': + heavy_atom = closest_atom2.bonded_atoms[0] + hydrogen = closest_atom2 + dist, f_angle, _ = angle_distance_factors(closest_atom1, hydrogen, + heavy_atom) + else: + # Either the structure is corrupt (no hydrogen), or the heavy atom + # is closer to the titratable atom than the hydrogen. In either + # case, we set the angle factor to 0 + f_angle = 0.0 + elif group1.type in version.parameters.angular_dependent_sidechain_interactions: + if closest_atom1.element == 'H': + heavy_atom = closest_atom1.bonded_atoms[0] + hydrogen = closest_atom1 + dist, f_angle, _ = angle_distance_factors(closest_atom2, hydrogen, + heavy_atom) + else: + # Either the structure is corrupt (no hydrogen), or the heavy atom + # is closer to the titratable atom than the hydrogen. In either + # case, we set the angle factor to 0 + f_angle = 0.0 + weight = version.calculate_pair_weight(group1.num_volume, group2.num_volume) + exception, value = version.check_exceptions(group1, group2) + if exception: + # Do nothing, value should have been assigned. + pass + else: + value = version.calculate_side_chain_energy( + dist, dpka_max, cutoff, weight, f_angle) + return value + + +def electrostatic_interaction(group1, group2, dist, version): + """Calculate electrostatic interaction betwee two groups. + + Args: + group1: first interacting group + group2: second interacting group + dist: distance between groups + version: version-specific object with parameters and functions + Returns: + electrostatic interaction energy or None (if no interaction is appropriate) + """ + # check if we should do coulomb interaction at all + if not version.check_coulomb_pair(group1, group2, dist): + return None + weight = version.calculate_pair_weight(group1.num_volume, group2.num_volume) + value = version.calculate_coulomb_energy(dist, weight) + return value + + +def check_coulomb_pair(parameters, group1, group2, dist): + """Checks if this Coulomb interaction should be done. + + NOTE - this is a propka2.0 hack + TODO - figure out why a similar function exists in version.py + + Args: + parameters: parameters for Coulomb calculations + group1: first interacting group + group2: second interacting group + dist: distance between groups + Returns: + Boolean + """ + num_volume = group1.num_volume + group2.num_volume + do_coulomb = True + # check if both groups are titratable (ions are taken care of in + # determinants::set_ion_determinants) + if not (group1.titratable and group2.titratable): + do_coulomb = False + # check if the distance is not too big + if dist > parameters.coulomb_cutoff2: + do_coulomb = False + # check that desolvation is ok + if num_volume < parameters.Nmin: + do_coulomb = False + return do_coulomb + + +def coulomb_energy(dist, weight, parameters): + """Calculates the Coulomb interaction pKa shift based on Coulomb's law. + + Args: + dist: distance for electrostatic interaction + weight: scaling of dielectric constant + parameters: parameter object for calculation + Returns: + pKa shift + """ + diel = UNK_DIELECTRIC1 - (UNK_DIELECTRIC1 - UNK_DIELECTRIC2)*weight + dist = max(dist, parameters.coulomb_cutoff1) + scale = ( + (dist - parameters.coulomb_cutoff2) + / (parameters.coulomb_cutoff1 - parameters.coulomb_cutoff2)) + scale = max(0.0, scale) + scale = min(1.0, scale) + dpka = UNK_PKA_SCALING1/(diel*dist)*scale + return abs(dpka) + + +def backbone_reorganization(_, conformation): + """Perform calculations related to backbone reorganizations. + + NOTE - this was described in the code as "adding test stuff" + NOTE - this function does not appear to be used + TODO - figure out why a similar function exists in version.py + + Args: + _: not used + conformation: specific molecule conformation + """ + titratable_groups = conformation.get_backbone_reorganisation_groups() + bbc_groups = conformation.get_backbone_co_groups() + + for titratable_group in titratable_groups: + weight = titratable_group.buried + dpka = 0.00 + for bbc_group in bbc_groups: + center = [titratable_group.x, titratable_group.y, titratable_group.z] + atom2 = bbc_group.get_interaction_atoms(titratable_group)[0] + dist, f_angle, _ = angle_distance_factors(atom2=atom2, + atom3=bbc_group.atom, + center=center) + if dist < UNK_BACKBONE_DISTANCE1 and f_angle > UNK_FANGLE_MIN: + value = ( + 1.0 - (dist-UNK_BACKBONE_DISTANCE2) + / (UNK_BACKBONE_DISTANCE1-UNK_BACKBONE_DISTANCE2)) + dpka += UNK_PKA_SCALING2 * min(1.0, value) + titratable_group.energy_local = dpka*weight + + +def check_exceptions(version, group1, group2): + """Checks for atypical behavior in interactions between two groups. + Checks are made based on group type. + + TODO - figure out why a similar function exists in version.py + + Args: + version: version object + group1: first group for check + group2: second group for check + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) + """ + res_type1 = group1.type + res_type2 = group2.type + if (res_type1 == "COO") and (res_type2 == "ARG"): + exception, value = check_coo_arg_exception(group1, group2, version) + elif (res_type1 == "ARG") and (res_type2 == "COO"): + exception, value = check_coo_arg_exception(group2, group1, version) + elif (res_type1 == "COO") and (res_type2 == "COO"): + exception, value = check_coo_coo_exception(group1, group2, version) + elif (res_type1 == "CYS") and (res_type2 == "CYS"): + exception, value = check_cys_cys_exception(group1, group2, version) + elif ((res_type1 == "COO") and (res_type2 == "HIS") + or (res_type1 == "HIS") and (res_type2 == "COO")): + exception, value = check_coo_his_exception(group1, group2, version) + elif ((res_type1 == "OCO") and (res_type2 == "HIS") + or (res_type1 == "HIS") and (res_type2 == "OCO")): + exception, value = check_oco_his_exception(group1, group2, version) + elif ((res_type1 == "CYS") and (res_type2 == "HIS") + or (res_type1 == "HIS") and (res_type2 == "CYS")): + exception, value = check_cys_his_exception(group1, group2, version) + else: + # do nothing, no exception for this pair + exception = False + value = None + return exception, value + + +def check_coo_arg_exception(group_coo, group_arg, version): + """Check for COO-ARG interaction atypical behavior. + + Uses the two shortest unique distances (involving 2+2 atoms) + + Args: + group_coo: COO group + group_arg: ARG group + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) + """ + exception = True + value_tot = 0.00 + # needs to be this way since you want to find shortest distance first + atoms_coo = [] + atoms_coo.extend(group_coo.get_interaction_atoms(group_arg)) + atoms_arg = [] + atoms_arg.extend(group_arg.get_interaction_atoms(group_coo)) + for _ in ["shortest", "runner-up"]: + # find the closest interaction pair + [closest_coo_atom, dist, closest_arg_atom] = get_smallest_distance(atoms_coo, + atoms_arg) + [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_coo_atom, + closest_arg_atom) + # calculate and sum up interaction energy + f_angle = 1.00 + if group_arg.type in version.parameters.angular_dependent_sidechain_interactions: + atom3 = closest_arg_atom.bonded_atoms[0] + dist, f_angle, _ = angle_distance_factors(closest_coo_atom, + closest_arg_atom, + atom3) + value = hydrogen_bond_energy(dist, dpka_max, cutoff, f_angle) + value_tot += value + # remove closest atoms before we attemp to find the runner-up pair + atoms_coo.remove(closest_coo_atom) + atoms_arg.remove(closest_arg_atom) + return exception, value_tot + + +def check_coo_coo_exception(group1, group2, version): + """Check for COO-COO hydrogen-bond atypical interaction behavior. + + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) + """ + exception = True + interact_groups12 = group1.get_interaction_atoms(group2) + interact_groups21 = group2.get_interaction_atoms(group1) + [closest_atom1, dist, closest_atom2] = get_smallest_distance(interact_groups12, + interact_groups21) + [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1, + closest_atom2) + f_angle = 1.00 + value = hydrogen_bond_energy(dist, dpka_max, cutoff, f_angle) + weight = calculate_pair_weight(version.parameters, group1.num_volume, group2.num_volume) + value = value * (1.0 + weight) + return exception, value + + +def check_coo_his_exception(group1, group2, version): + """Check for COO-HIS atypical interaction behavior + + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) + """ + exception = False + if check_buried(group1.num_volume, group2.num_volume): + exception = True + return exception, version.parameters.COO_HIS_exception + + +def check_oco_his_exception(group1, group2, version): + """Check for OCO-HIS atypical interaction behavior + + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) + """ + exception = False + if check_buried(group1.num_volume, group2.num_volume): + exception = True + return exception, version.parameters.OCO_HIS_exception + + +def check_cys_his_exception(group1, group2, version): + """Check for CYS-HIS atypical interaction behavior + + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) + """ + exception = False + if check_buried(group1.num_volume, group2.num_volume): + exception = True + return exception, version.parameters.CYS_HIS_exception + + +def check_cys_cys_exception(group1, group2, version): + """Check for CYS-CYS atypical interaction behavior + + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) + """ + exception = False + if check_buried(group1.num_volume, group2.num_volume): + exception = True + return exception, version.parameters.CYS_CYS_exception + + +def check_buried(num_volume1, num_volume2): + """Check to see if an interaction is buried + + Args: + num_volume1: number of buried heavy atoms in volume 1 + num_volume2: number of buried heavy atoms in volume 2 + Returns: + True if interaction is buried, False otherwise + """ + if ((num_volume1 + num_volume2 <= COMBINED_NUM_BURIED_MAX) + and (num_volume1 <= SEPARATE_NUM_BURIED_MAX + or num_volume2 <= SEPARATE_NUM_BURIED_MAX)): + return False + return True diff --git a/propka/hydrogens.py b/propka/hydrogens.py new file mode 100644 index 0000000..1427758 --- /dev/null +++ b/propka/hydrogens.py @@ -0,0 +1,346 @@ +"""Calculations related to hydrogen placement.""" +import math +from propka.lib import info +from propka.protonate import Protonate +from propka.bonds import BondMaker +from propka.atom import Atom + + + +def setup_bonding_and_protonation(molecular_container): + """Set up bonding and protonation for a molecule. + + Args: + parameters: not used + molecular_container: molecule container. + """ + # make bonds + my_bond_maker = setup_bonding(molecular_container) + # set up ligand atom names + set_ligand_atom_names(molecular_container) + # apply information on pi electrons + my_bond_maker.add_pi_electron_information(molecular_container) + # Protonate atoms + if molecular_container.options.protonate_all: + protonator = Protonate(verbose=False) + protonator.protonate(molecular_container) + + +def setup_bonding(molecular_container): + """Set up bonding for a molecular container. + + Args: + molecular_container: the molecular container in question + Returns: + BondMaker object + """ + my_bond_maker = BondMaker() + my_bond_maker.find_bonds_for_molecules_using_boxes(molecular_container) + return my_bond_maker + + +def setup_bonding_and_protonation_30_style(molecular_container): + """Set up bonding for a molecular container. + + Args: + molecular_container: the molecular container in question + Returns: + BondMaker object + """ + # Protonate atoms + protonate_30_style(molecular_container) + # make bonds + bond_maker = BondMaker() + bond_maker.find_bonds_for_molecules_using_boxes(molecular_container) + return bond_maker + + +def protonate_30_style(molecular_container): + """Protonate the molecule. + + Args: + molecular_container: molecule + """ + for name in molecular_container.conformation_names: + info('Now protonating', name) + # split atom into residues + curres = -1000000 + residue = [] + o_atom = None + c_atom = None + for atom in molecular_container.conformations[name].atoms: + if atom.res_num != curres: + curres = atom.res_num + if len(residue) > 0: + #backbone + [o_atom, c_atom] = add_backbone_hydrogen( + residue, o_atom, c_atom) + #arginine + if residue[0].res_name == 'ARG': + add_arg_hydrogen(residue) + #histidine + if residue[0].res_name == 'HIS': + add_his_hydrogen(residue) + #tryptophan + if residue[0].res_name == 'TRP': + add_trp_hydrogen(residue) + #amides + if residue[0].res_name in ['GLN', 'ASN']: + add_amd_hydrogen(residue) + residue = [] + if atom.type == 'atom': + residue.append(atom) + + +def set_ligand_atom_names(molecular_container): + """Set names for ligands in molecular container. + + Args: + molecular_container: molecular container for ligand names + """ + for name in molecular_container.conformation_names: + molecular_container.conformations[name].set_ligand_atom_names() + + +def add_arg_hydrogen(residue): + """Adds Arg hydrogen atoms to residues according to the 'old way'. + + Args: + residue: arginine residue to protonate + Returns: + list of hydrogen atoms + """ + #info('Adding arg H',residue) + for atom in residue: + if atom.name == "CD": + cd_atom = atom + elif atom.name == "CZ": + cz_atom = atom + elif atom.name == "NE": + ne_atom = atom + elif atom.name == "NH1": + nh1_atom = atom + elif atom.name == "NH2": + nh2_atom = atom + h1_atom = protonate_sp2(cd_atom, ne_atom, cz_atom) + h1_atom.name = "HE" + h2_atom = protonate_direction(nh1_atom, ne_atom, cz_atom) + h2_atom.name = "HN1" + h3_atom = protonate_direction(nh1_atom, ne_atom, cd_atom) + h3_atom.name = "HN2" + h4_atom = protonate_direction(nh2_atom, ne_atom, cz_atom) + h4_atom.name = "HN3" + h5_atom = protonate_direction(nh2_atom, ne_atom, h1_atom) + h5_atom.name = "HN4" + return [h1_atom, h2_atom, h3_atom, h4_atom, h5_atom] + + +def add_his_hydrogen(residue): + """Adds His hydrogen atoms to residues according to the 'old way'. + + Args: + residue: histidine residue to protonate + """ + for atom in residue: + if atom.name == "CG": + cg_atom = atom + elif atom.name == "ND1": + nd_atom = atom + elif atom.name == "CD2": + cd_atom = atom + elif atom.name == "CE1": + ce_atom = atom + elif atom.name == "NE2": + ne_atom = atom + hd_atom = protonate_sp2(cg_atom, nd_atom, ce_atom) + hd_atom.name = "HND" + he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom) + he_atom.name = "HNE" + + +def add_trp_hydrogen(residue): + """Adds Trp hydrogen atoms to residues according to the 'old way'. + + Args: + residue: tryptophan residue to protonate + """ + cd_atom = None + ne_atom = None + for atom in residue: + if atom.name == "CD1": + cd_atom = atom + elif atom.name == "NE1": + ne_atom = atom + elif atom.name == "CE2": + ce_atom = atom + if (cd_atom is None) or (ne_atom is None) or (ce_atom is None): + str_ = "Unable to find all atoms for {0:s} {1:s}".format( + residue[0].res_name, residue[0].res_num) + raise ValueError(str_) + he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom) + he_atom.name = "HNE" + + +def add_amd_hydrogen(residue): + """Adds Gln & Asn hydrogen atoms to residues according to the 'old way'. + + Args: + residue: glutamine or asparagine residue to protonate + """ + c_atom = None + o_atom = None + n_atom = None + for atom in residue: + if ((atom.res_name == "GLN" and atom.name == "CD") + or (atom.res_name == "ASN" and atom.name == "CG")): + c_atom = atom + elif ((atom.res_name == "GLN" and atom.name == "OE1") + or (atom.res_name == "ASN" and atom.name == "OD1")): + o_atom = atom + elif ((atom.res_name == "GLN" and atom.name == "NE2") + or (atom.res_name == "ASN" and atom.name == "ND2")): + n_atom = atom + if (c_atom is None) or (o_atom is None) or (n_atom is None): + str_ = "Unable to find all atoms for {0:s} {1:s}".format( + residue[0].res_name, residue[0].res_num) + raise ValueError(str_) + h1_atom = protonate_direction(n_atom, o_atom, c_atom) + h1_atom.name = "HN1" + h2_atom = protonate_average_direction(n_atom, c_atom, o_atom) + h2_atom.name = "HN2" + + +def add_backbone_hydrogen(residue, o_atom, c_atom): + """Adds hydrogen backbone atoms to residues according to the old way. + + dR is wrong for the N-terminus (i.e. first residue) but it doesn't affect + anything at the moment. Could be improved, but works for now. + + Args: + residue: residue to protonate + o_atom: backbone oxygen atom + c_atom: backbone carbon atom + Returns: + [new backbone oxygen atom, new backbone carbon atom] + """ + new_c_atom = None + new_o_atom = None + n_atom = None + for atom in residue: + if atom.name == "N": + n_atom = atom + if atom.name == "C": + new_c_atom = atom + if atom.name == "O": + new_o_atom = atom + if None in [c_atom, o_atom, n_atom]: + return [new_o_atom, new_c_atom] + if n_atom.res_name == "PRO": + # PRO doesn't have an H-atom; do nothing + pass + else: + h_atom = protonate_direction(n_atom, o_atom, c_atom) + h_atom.name = "H" + return [new_o_atom, new_c_atom] + + +def protonate_direction(x1_atom, x2_atom, x3_atom): + """Protonates an atom, x1_atom, given a direction. + + New direction for x1_atom proton is (x2_atom -> x3_atom). + + Args: + x1_atom: atom to be protonated + x2_atom: atom for direction + x3_atom: other atom for direction + Returns: + new hydrogen atom + """ + dx = (x3_atom.x - x2_atom.x) + dy = (x3_atom.y - x2_atom.y) + dz = (x3_atom.z - x2_atom.z) + length = math.sqrt(dx*dx + dy*dy + dz*dz) + x = x1_atom.x + dx/length + y = x1_atom.y + dy/length + z = x1_atom.z + dz/length + h_atom = make_new_h(x1_atom, x, y, z) + h_atom.name = "H" + return h_atom + + +def protonate_average_direction(x1_atom, x2_atom, x3_atom): + """Protonates an atom, x1_atom, given a direction. + + New direction for x1_atom is (x1_atom/x2_atom -> x3_atom). + Note, this one uses the average of x1_atom & x2_atom (N & O) unlike + the previous N - C = O + + Args: + x1_atom: atom to be protonated + x2_atom: atom for direction + x3_atom: other atom for direction + Returns: + new hydrogen atom + """ + dx = (x3_atom.x + x1_atom.x)*0.5 - x2_atom.x + dy = (x3_atom.y + x1_atom.y)*0.5 - x2_atom.y + dz = (x3_atom.z + x1_atom.z)*0.5 - x2_atom.z + length = math.sqrt(dx*dx + dy*dy + dz*dz) + x = x1_atom.x + dx/length + y = x1_atom.y + dy/length + z = x1_atom.z + dz/length + h_atom = make_new_h(x1_atom, x, y, z) + h_atom.name = "H" + return h_atom + + +def protonate_sp2(x1_atom, x2_atom, x3_atom): + """Protonates a SP2 atom, given a list of atoms + + Args: + x1_atom: atom to set direction + x2_atom: atom to be protonated + x3_atom: other atom to set direction + Returns: + new hydrogen atom + """ + dx = (x1_atom.x + x3_atom.x)*0.5 - x2_atom.x + dy = (x1_atom.y + x3_atom.y)*0.5 - x2_atom.y + dz = (x1_atom.z + x3_atom.z)*0.5 - x2_atom.z + length = math.sqrt(dx*dx + dy*dy + dz*dz) + x = x2_atom.x - dx/length + y = x2_atom.y - dy/length + z = x2_atom.z - dz/length + h_atom = make_new_h(x2_atom, x, y, z) + h_atom.name = "H" + return h_atom + + +def make_new_h(atom, x, y, z): + """Add a new hydrogen to an atom at the specified position. + + Args: + atom: atom to protonate + x: x position of hydrogen + y: y position of hydrogen + z: z position of hydrogen + Returns: + new hydrogen atom + """ + new_h = Atom() + new_h.set_property( + numb=None, name='H{0:s}'.format(atom.name[1:]), + res_name=atom.res_name, chain_id=atom.chain_id, + res_num=atom.res_num, x=x, y=y, z=z, occ=None, beta=None) + new_h.element = 'H' + new_h.bonded_atoms = [atom] + new_h.charge = 0 + new_h.steric_number = 0 + new_h.number_of_lone_pairs = 0 + new_h.number_of_protons_to_add = 0 + new_h.num_pi_elec_2_3_bonds = 0 + atom.bonded_atoms.append(new_h) + atom.conformation_container.add_atom(new_h) + return new_h + + diff --git a/propka/version.py b/propka/version.py index 9ff5d87..0eb0737 100644 --- a/propka/version.py +++ b/propka/version.py @@ -3,7 +3,13 @@ TODO - this module unnecessarily confuses the code. Can we eliminate it? """ from propka.lib import info -import propka.calculations as calcs +from propka.hydrogens import setup_bonding_and_protonation, setup_bonding +from propka.hydrogens import setup_bonding_and_protonation_30_style +from propka.energy import radial_volume_desolvation, calculate_pair_weight +from propka.energy import hydrogen_bond_energy, hydrogen_bond_interaction +from propka.energy import electrostatic_interaction, check_coulomb_pair +from propka.energy import coulomb_energy, check_exceptions +from propka.energy import backbone_reorganization class Version: @@ -79,8 +85,7 @@ def check_exceptions(self, group1, group2): def setup_bonding_and_protonation(self, molecular_container): """Setup bonding and protonation using assigned model.""" - return self.molecular_preparation_method( - self.parameters, molecular_container) + return self.molecular_preparation_method(molecular_container) def setup_bonding(self, molecular_container): """Setup bonding using assigned model.""" @@ -94,18 +99,18 @@ def __init__(self, parameters): """Initialize object with parameters.""" # set the calculation rutines used in this version super().__init__(parameters) - self.molecular_preparation_method = calcs.setup_bonding_and_protonation - self.prepare_bonds = calcs.setup_bonding - self.desolvation_model = calcs.radial_volume_desolvation - self.weight_pair_method = calcs.calculate_pair_weight - self.sidechain_interaction_model = calcs.hydrogen_bond_energy - self.hydrogen_bond_interaction_model = calcs.hydrogen_bond_interaction - self.electrostatic_interaction_model = calcs.electrostatic_interaction - self.check_coulomb_pair_method = calcs.check_coulomb_pair - self.coulomb_interaction_model = calcs.coulomb_energy - self.backbone_interaction_model = calcs.hydrogen_bond_energy - self.backbone_reorganisation_method = calcs.backbone_reorganization - self.exception_check_method = calcs.check_exceptions + self.molecular_preparation_method = setup_bonding_and_protonation + self.prepare_bonds = setup_bonding + self.desolvation_model = radial_volume_desolvation + self.weight_pair_method = calculate_pair_weight + self.sidechain_interaction_model = hydrogen_bond_energy + self.hydrogen_bond_interaction_model = hydrogen_bond_interaction + self.electrostatic_interaction_model = electrostatic_interaction + self.check_coulomb_pair_method = check_coulomb_pair + self.coulomb_interaction_model = coulomb_energy + self.backbone_interaction_model = hydrogen_bond_energy + self.backbone_reorganisation_method = backbone_reorganization + self.exception_check_method = check_exceptions def get_hydrogen_bond_parameters(self, atom1, atom2): """Get hydrogen bond parameters for two atoms. @@ -265,14 +270,14 @@ def __init__(self, parameters): # set the calculation routines used in this version super().__init__(parameters) self.molecular_preparation_method = ( - calcs.setup_bonding_and_protonation_30_style) - self.desolvation_model = calcs.radial_volume_desolvation - self.weight_pair_method = calcs.calculate_pair_weight - self.sidechain_interaction_model = calcs.hydrogen_bond_energy - self.check_coulomb_pair_method = calcs.check_coulomb_pair - self.coulomb_interaction_model = calcs.coulomb_energy - self.backbone_reorganisation_method = calcs.backbone_reorganization - self.exception_check_method = calcs.check_exceptions + setup_bonding_and_protonation_30_style) + self.desolvation_model = radial_volume_desolvation + self.weight_pair_method = calculate_pair_weight + self.sidechain_interaction_model = hydrogen_bond_energy + self.check_coulomb_pair_method = check_coulomb_pair + self.coulomb_interaction_model = coulomb_energy + self.backbone_reorganisation_method = backbone_reorganization + self.exception_check_method = check_exceptions def get_hydrogen_bond_parameters(self, atom1, atom2): """Get hydrogen bond parameters for two atoms. From 397d5e10aa02595b6f11a61ed762a1c94e7c8f59 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 30 May 2020 09:26:13 -0700 Subject: [PATCH 2/6] Remove cyclic import based on atom.py Partially addresses https://github.com/jensengroup/propka-3.1/issues/49 --- propka/atom.py | 32 ++++----- propka/calculations.py | 3 - propka/group.py | 13 ++++ propka/ligand_pka_values.py | 7 +- propka/molecular_container.py | 16 ++--- propka/output.py | 121 ++++++++++++++++++++++++++++++++-- propka/pdb.py | 116 +------------------------------- 7 files changed, 155 insertions(+), 153 deletions(-) diff --git a/propka/atom.py b/propka/atom.py index 1e87af2..3bb3f12 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -1,8 +1,7 @@ """Atom class - contains all atom information found in the PDB file""" import string -import propka.lib -import propka.group from . import hybrid36 +from propka.lib import make_tidy_atom_label # Format strings that get used in multiple places (or are very complex) @@ -50,6 +49,9 @@ def __init__(self, line=None): self.z = None self.group = None self.group_type = None + self.group_label = None + self.group_model_pka = None + self.group_model_pka_set = None self.number_of_bonded_elements = {} self.cysteine_bridge = False self.bonded_atoms = [] @@ -267,7 +269,7 @@ def make_input_line(self): model_pka = PKA_FMT.format(self.group.model_pka) str_ = INPUT_LINE_FMT.format( type=self.type.upper(), r=self, - atom_label=propka.lib.make_tidy_atom_label(self.name, self.element), + atom_label=make_tidy_atom_label(self.name, self.element), group=group, pka=model_pka) return str_ @@ -313,21 +315,11 @@ def get_input_parameters(self): self.occ = self.occ.replace('ALG', 'titratable_ligand') self.occ = self.occ.replace('BLG', 'titratable_ligand') self.occ = self.occ.replace('LG', 'non_titratable_ligand') - # try to initialise the group - try: - group_attr = "{0:s}_group".format(self.occ) - group_attr = getattr(propka.group, group_attr) - self.group = group_attr(self) - except: - # TODO - be more specific with expection handling here - str_ = ( - '{0:s} in input_file is not recognized as a group'.format( - self.occ)) - raise Exception(str_) + self.group_label = "{0:s}_group".format(self.occ) # set the model pKa value if self.beta != '-': - self.group.model_pka = float(self.beta) - self.group.model_pka_set = True + self.group_model_pka = float(self.beta) + self.group_model_pka_set = True # set occ and beta to standard values self.occ = '1.00' self.beta = '0.00' @@ -344,7 +336,7 @@ def make_pdb_line(self): """ str_ = PDB_LINE_FMT1.format( type=self.type.upper(), r=self, - atom_label=propka.lib.make_tidy_atom_label(self.name, self.element)) + atom_label=make_tidy_atom_label(self.name, self.element)) return str_ def make_mol2_line(self, id_): @@ -359,7 +351,7 @@ def make_mol2_line(self, id_): """ str_ = MOL2_LINE_FMT.format( id=id_, r=self, - atom_label=propka.lib.make_tidy_atom_label(self.name, self.element)) + atom_label=make_tidy_atom_label(self.name, self.element)) return str_ def make_pdb_line2(self, numb=None, name=None, res_name=None, chain_id=None, @@ -397,7 +389,7 @@ def make_pdb_line2(self, numb=None, name=None, res_name=None, chain_id=None, str_ = PDB_LINE_FMT2.format( numb=numb, res_name=res_name, chain_id=chain_id, res_num=res_num, x=x, y=y, z=z, occ=occ, beta=beta, - atom_label=propka.lib.make_tidy_atom_label(name, self.element) + atom_label=make_tidy_atom_label(name, self.element) ) return str_ @@ -408,7 +400,7 @@ def get_tidy_label(self): Returns: String with label""" - return propka.lib.make_tidy_atom_label(self.name, self.element) + return make_tidy_atom_label(self.name, self.element) def __str__(self): """Return an undefined-format string version of this atom.""" diff --git a/propka/calculations.py b/propka/calculations.py index df8db5d..915b713 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -1,8 +1,5 @@ """PROPKA calculations.""" import math -import propka.protonate -import propka.bonds -from propka.lib import warning, info # TODO - this file should be broken into three separate files: diff --git a/propka/group.py b/propka/group.py index 465c195..96c0a30 100644 --- a/propka/group.py +++ b/propka/group.py @@ -6,6 +6,7 @@ from propka.determinant import Determinant from propka.lib import info, warning + # Constants that start with "UNK_" are a mystery to me UNK_PKA_SCALING = -1.36 PROTONATOR = propka.protonate.Protonate(verbose=False) @@ -1417,3 +1418,15 @@ def is_ion_group(parameters, atom): if atom.res_name.strip() in parameters.ions.keys(): return IonGroup(atom) return None + +def initialize_atom_group(atom): + """Initialize an atom group. + + Args: + atom: atom to initialize + """ + # try to initialise the group + group_attr = globals()[atom.group_label] + atom.group = group_attr(atom) + atom.group.model_pka = atom.group_model_pka + atom.group.model_pka_set = atom.group_model_pka_set diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py index d7a5423..abfc3e4 100644 --- a/propka/ligand_pka_values.py +++ b/propka/ligand_pka_values.py @@ -6,7 +6,7 @@ import propka.calculations import propka.parameters import propka.pdb -import propka.lib +from propka.output import write_mol2_for_atoms from propka.lib import info, warning @@ -133,7 +133,7 @@ def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2', """ # print out structure unless we are using user-modified structure if not reuse: - propka.pdb.write_mol2_for_atoms(atoms, filename) + write_mol2_for_atoms(atoms, filename) # check that we actually have a file to work with if not os.path.isfile(filename): errstr = ( @@ -141,7 +141,7 @@ def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2', "- generating one".format( filename)) warning(errstr) - propka.pdb.write_mol2_for_atoms(atoms, filename) + write_mol2_for_atoms(atoms, filename) # Marvin calculate pKa values fmt = ( 'pka -a {num1} -b {num2} --min {min_ph} ' @@ -197,3 +197,4 @@ def extract_pkas(output): if len(indices) != len(values) != len(types): raise Exception('Lengths of atoms and pka values mismatch') return indices, values, types + diff --git a/propka/molecular_container.py b/propka/molecular_container.py index 3d2409f..d99c2c0 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -1,13 +1,11 @@ """Molecular container for storing all contents of PDB files.""" import os import sys -import propka.pdb import propka.version -import propka.output -import propka.group -import propka.lib +from propka.pdb import read_input +from propka.output import write_input from propka.conformation_container import ConformationContainer -from propka.lib import info, warning +from propka.lib import info, warning, make_grid # TODO - these are constants whose origins are a little murky @@ -78,7 +76,7 @@ def __init__(self, input_file, options=None): self.find_covalently_coupled_groups() # write out the input file filename = self.file.replace(input_file_extension, '.propka_input') - propka.pdb.write_input(self, filename) + write_input(self, filename) elif input_file_extension == '.propka_input': #input is a propka_input file [self.conformations, self.conformation_names] = ( @@ -155,7 +153,7 @@ def average_of_conformations(self): else: str_ = ( 'Group {0:s} could not be found in ' - 'conformation {0:s}.'.format( + 'conformation {1:s}.'.format( group.atom.residue_label, name)) warning(str_) # ... and store the average value @@ -214,7 +212,7 @@ def get_folding_profile(self, conformation='AVR', reference="neutral", """ # calculate stability profile profile = [] - for ph in propka.lib.make_grid(*grid): + for ph in make_grid(*grid): conf = self.conformations[conformation] ddg = conf.calculate_folding_energy(ph=ph, reference=reference) profile.append([ph, ddg]) @@ -244,7 +242,7 @@ def get_charge_profile(self, conformation='AVR', grid=[0., 14., .1]): list of charge state values """ charge_profile = [] - for ph in propka.lib.make_grid(*grid): + for ph in make_grid(*grid): conf = self.conformations[conformation] q_unfolded, q_folded = conf.calculate_charge( self.version.parameters, ph=ph) diff --git a/propka/output.py b/propka/output.py index 61f001b..61f8a68 100644 --- a/propka/output.py +++ b/propka/output.py @@ -1,6 +1,6 @@ """Output routines.""" from datetime import date -from propka.lib import info +from propka.lib import info, open_file_for_writing def print_header(): @@ -11,8 +11,8 @@ def print_header(): info(str_) -def write_pdb(protein, pdbfile=None, filename=None, include_hydrogens=False, - _=None): +def write_pdb_for_protein( + protein, pdbfile=None, filename=None, include_hydrogens=False, _=None): """Write a residue to the new PDB file. Args: @@ -50,6 +50,16 @@ def write_pdb(protein, pdbfile=None, filename=None, include_hydrogens=False, pdbfile.close() +def write_pdb_for_conformation(conformation, filename): + """Write PDB conformation to a file. + + Args: + conformation: conformation container + filename: filename for output + """ + write_pdb_for_atoms(conformation.atoms, filename) + + def write_pka(protein, parameters, filename=None, conformation='1A', reference="neutral", _="folding", verbose=False, __=None): @@ -204,8 +214,8 @@ def get_summary_section(protein, conformation, parameters): def get_folding_profile_section( - protein, conformation='AVR', direction="folding", reference="neutral", - window=[0., 14., 1.0], _=False, __=None): + protein, conformation='AVR', direction="folding", reference="neutral", + window=[0., 14., 1.0], _=False, __=None): """Returns string with the folding profile section of the results. Args: @@ -461,3 +471,104 @@ def make_interaction_map(name, list_, interaction): tag = ' X ' res += '{0:>10s}| '.format(tag) return res + + +def write_pdb_for_atoms(atoms, filename, make_conect_section=False): + """Write out PDB file for atoms. + + Args: + atoms: list of atoms + filename: name of file + make_conect_section: generate a CONECT PDB section + """ + out = open_file_for_writing(filename) + for atom in atoms: + out.write(atom.make_pdb_line()) + if make_conect_section: + for atom in atoms: + out.write(atom.make_conect_line()) + out.close() + + +def get_bond_order(atom1, atom2): + """Get the order of a bond between two atoms. + + Args: + atom1: first atom in bond + atom2: second atom in bond + Returns: + string with bond type + """ + type_ = '1' + pi_electrons1 = atom1.num_pi_elec_2_3_bonds + pi_electrons2 = atom2.num_pi_elec_2_3_bonds + if '.ar' in atom1.sybyl_type: + pi_electrons1 -= 1 + if '.ar' in atom2.sybyl_type: + pi_electrons2 -= 1 + if pi_electrons1 > 0 and pi_electrons2 > 0: + type_ = '{0:d}'.format(min(pi_electrons1, pi_electrons2)+1) + if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type: + type_ = 'ar' + return type_ + + +def write_mol2_for_atoms(atoms, filename): + """Write out MOL2 file for atoms. + + Args: + atoms: list of atoms + filename: name of file + """ + # TODO - header needs to be converted to format string + header = '@MOLECULE\n\n{natom:d} {id:d}\nSMALL\nUSER_CHARGES\n' + atoms_section = '@ATOM\n' + for i, atom in enumerate(atoms): + atoms_section += atom.make_mol2_line(i+1) + bonds_section = '@BOND\n' + id_ = 1 + for i, atom1 in enumerate(atoms): + for j, atom2 in enumerate(atoms, i+1): + if atom1 in atom2.bonded_atoms: + type_ = get_bond_order(atom1, atom2) + bonds_section += '{0:>7d} {1:>7d} {2:>7d} {3:>7s}\n'.format( + id_, i+1, j+1, type_) + id_ += 1 + substructure_section = '@SUBSTRUCTURE\n\n' + if len(atoms) > 0: + substructure_section = ( + '@SUBSTRUCTURE\n{0:<7d} {1:>10s} {2:>7d}\n'.format( + atoms[0].res_num, atoms[0].res_name, atoms[0].numb)) + out = open_file_for_writing(filename) + out.write(header.format(natom=len(atoms), id=id_-1)) + out.write(atoms_section) + out.write(bonds_section) + out.write(substructure_section) + out.close() + +def write_input(molecular_container, filename): + """Write PROPKA input file for molecular container. + + Args: + molecular_container: molecular container + filename: output file name + """ + out = open_file_for_writing(filename) + for conformation_name in molecular_container.conformation_names: + out.write('MODEL {0:s}\n'.format(conformation_name)) + # write atoms + for atom in molecular_container.conformations[conformation_name].atoms: + out.write(atom.make_input_line()) + # write bonds + for atom in molecular_container.conformations[conformation_name].atoms: + out.write(atom.make_conect_line()) + # write covalently coupled groups + for group in ( + molecular_container.conformations[conformation_name].groups): + out.write(group.make_covalently_coupled_line()) + # write non-covalently coupled groups + for group in ( + molecular_container.conformations[conformation_name].groups): + out.write(group.make_non_covalently_coupled_line()) + out.write('ENDMDL\n') + out.close() diff --git a/propka/pdb.py b/propka/pdb.py index 0074938..f1eaf16 100644 --- a/propka/pdb.py +++ b/propka/pdb.py @@ -1,7 +1,8 @@ -"""PDB parsing functionality.""" +"""Read and parse PDB-like input files.""" import propka.lib from propka.lib import warning from propka.atom import Atom +from propka.group import initialize_atom_group from propka.conformation_container import ConformationContainer @@ -158,118 +159,6 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, terminal = None -def write_pdb(conformation, filename): - """Write PDB conformation to a file. - - Args: - conformation: conformation container - filename: filename for output - """ - write_pdb_for_atoms(conformation.atoms, filename) - - -def write_pdb_for_atoms(atoms, filename, make_conect_section=False): - """Write out PDB file for atoms. - - Args: - atoms: list of atoms - filename: name of file - make_conect_section: generate a CONECT PDB section - """ - out = propka.lib.open_file_for_writing(filename) - for atom in atoms: - out.write(atom.make_pdb_line()) - if make_conect_section: - for atom in atoms: - out.write(atom.make_conect_line()) - out.close() - - -def write_mol2_for_atoms(atoms, filename): - """Write out MOL2 file for atoms. - - Args: - atoms: list of atoms - filename: name of file - """ - # TODO - header needs to be converted to format string - header = '@MOLECULE\n\n{natom:d} {id:d}\nSMALL\nUSER_CHARGES\n' - atoms_section = '@ATOM\n' - for i, atom in enumerate(atoms): - atoms_section += atom.make_mol2_line(i+1) - bonds_section = '@BOND\n' - id_ = 1 - for i, atom1 in enumerate(atoms): - for j, atom2 in enumerate(atoms, i+1): - if atom1 in atom2.bonded_atoms: - type_ = get_bond_order(atom1, atom2) - bonds_section += '{0:>7d} {1:>7d} {2:>7d} {3:>7s}\n'.format( - id_, i+1, j+1, type_) - id_ += 1 - substructure_section = '@SUBSTRUCTURE\n\n' - if len(atoms) > 0: - substructure_section = ( - '@SUBSTRUCTURE\n{0:<7d} {1:>10s} {2:>7d}\n'.format( - atoms[0].res_num, atoms[0].res_name, atoms[0].numb)) - out = propka.lib.open_file_for_writing(filename) - out.write(header.format(natom=len(atoms), id=id_-1)) - out.write(atoms_section) - out.write(bonds_section) - out.write(substructure_section) - out.close() - - -def get_bond_order(atom1, atom2): - """Get the order of a bond between two atoms. - - Args: - atom1: first atom in bond - atom2: second atom in bond - Returns: - string with bond type - """ - type_ = '1' - pi_electrons1 = atom1.num_pi_elec_2_3_bonds - pi_electrons2 = atom2.num_pi_elec_2_3_bonds - if '.ar' in atom1.sybyl_type: - pi_electrons1 -= 1 - if '.ar' in atom2.sybyl_type: - pi_electrons2 -= 1 - if pi_electrons1 > 0 and pi_electrons2 > 0: - type_ = '{0:d}'.format(min(pi_electrons1, pi_electrons2)+1) - if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type: - type_ = 'ar' - return type_ - - -def write_input(molecular_container, filename): - """Write PROPKA input file for molecular container. - - Args: - molecular_container: molecular container - filename: output file name - """ - out = propka.lib.open_file_for_writing(filename) - for conformation_name in molecular_container.conformation_names: - out.write('MODEL {0:s}\n'.format(conformation_name)) - # write atoms - for atom in molecular_container.conformations[conformation_name].atoms: - out.write(atom.make_input_line()) - # write bonds - for atom in molecular_container.conformations[conformation_name].atoms: - out.write(atom.make_conect_line()) - # write covalently coupled groups - for group in ( - molecular_container.conformations[conformation_name].groups): - out.write(group.make_covalently_coupled_line()) - # write non-covalently coupled groups - for group in ( - molecular_container.conformations[conformation_name].groups): - out.write(group.make_non_covalently_coupled_line()) - out.write('ENDMDL\n') - out.close() - - def read_input(input_file, parameters, molecule): """Read PROPKA input file for molecular container. @@ -316,6 +205,7 @@ def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']): if tag in tags: atom = Atom(line=line) atom.get_input_parameters() + initialize_atom_group(atom) atom.groups_extracted = 1 atom.is_protonated = True atoms[atom.numb] = atom From b597a6f25752cf3b5f19fe101c30115088cf779f Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 30 May 2020 10:00:31 -0700 Subject: [PATCH 3/6] Remove cyclic import based on I/O in pdb.py. Partially addresses https://github.com/jensengroup/propka-3.1/issues/49 --- propka/{pdb.py => input.py} | 151 ++++++++++++++++------------------ propka/lib.py | 106 +++++++++++------------- propka/ligand_pka_values.py | 1 - propka/molecular_container.py | 23 +++--- propka/output.py | 38 ++++++++- propka/parameters.py | 18 +--- 6 files changed, 171 insertions(+), 166 deletions(-) rename propka/{pdb.py => input.py} (72%) diff --git a/propka/pdb.py b/propka/input.py similarity index 72% rename from propka/pdb.py rename to propka/input.py index f1eaf16..e8c7f07 100644 --- a/propka/pdb.py +++ b/propka/input.py @@ -1,96 +1,58 @@ -"""Read and parse PDB-like input files.""" -import propka.lib -from propka.lib import warning +"""Input routines.""" +from pkg_resources import resource_filename from propka.atom import Atom -from propka.group import initialize_atom_group from propka.conformation_container import ConformationContainer +from propka.group import initialize_atom_group -EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6, - 'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8, - 'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7, - 'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7} - +def open_file_for_reading(input_file): + """Open file or file-like stream for reading. -def read_pdb(pdb_file, parameters, molecule): - """Parse a PDB file. + TODO - convert this to a context manager Args: - pdb_file: file to read - parameters: parameters to guide parsing - molecule: molecular container - Returns: - list with elements: - 1. list of conformations - 2. list of names + input_file: path to file or file-like object. If file-like object, + then will attempt fseek(0). """ - conformations = {} - # read in all atoms in the file - lines = get_atom_lines_from_pdb( - pdb_file, ignore_residues=parameters.ignore_residues, - keep_protons=molecule.options.keep_protons, - chains=molecule.options.chains) - for (name, atom) in lines: - if not name in conformations.keys(): - conformations[name] = ConformationContainer( - name=name, parameters=parameters, molecular_container=molecule) - conformations[name].add_atom(atom) - # make a sorted list of conformation names - names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) - return [conformations, names] + try: + input_file.fseek(0) + return input_file + except AttributeError: + pass + try: + file_ = open(input_file, 'rt') + except: + raise IOError('Cannot find file {0:s}'.format(input_file)) + return file_ -def protein_precheck(conformations, names): - """Check protein for correct number of atoms, etc. + +def read_parameter_file(input_file, parameters): + """Read a parameter file. Args: - names: conformation names to check + input_file: input file to read + parameters: Parameters object + Returns: + updated Parameters object """ - for name in names: - atoms = conformations[name].atoms - # Group the atoms by their residue: - atoms_by_residue = {} - for atom in atoms: - if atom.element != 'H': - res_id = resid_from_atom(atom) - try: - atoms_by_residue[res_id].append(atom) - except KeyError: - atoms_by_residue[res_id] = [atom] - for res_id, res_atoms in atoms_by_residue.items(): - res_name = res_atoms[0].res_name - residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id) - # ignore ligand residues - if res_name not in EXPECTED_ATOM_NUMBERS: - continue - # check for c-terminal - if 'C-' in [a.terminal for a in res_atoms]: - if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1: - str_ = ("Unexpected number ({num:d}) of atoms in residue " - "{res:s} in conformation {conf:s}".format( - num=len(res_atoms), res=residue_label, - conf=name)) - warning(str_) - continue - # check number of atoms in residue - if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]: - str_ = ("Unexpected number ({num:d}) of atoms in residue " - "{res:s} in conformation {conf:s}".format( - num=len(res_atoms), res=residue_label, - conf=name)) - warning(str_) + # try to locate the parameter file + try: + ifile = resource_filename(__name__, input_file) + input_ = open_file_for_reading(ifile) + except (IOError, FileNotFoundError, ValueError): + input_ = open_file_for_reading(input_file) + for line in input_: + parameters.parse_line(line) + return parameters -def resid_from_atom(atom): - """Return string with atom residue information. - Args: - atom: atom to generate string for - Returns - string - """ - return '{0:>4d} {1:s} {2:s}'.format( - atom.res_num, atom.chain_id, atom.icode) +def conformation_sorter(conf): + """TODO - figure out what this function does.""" + model = int(conf[:-1]) + altloc = conf[-1:] + return model*100+ord(altloc) def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, @@ -104,7 +66,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, tags: tags of lines that include atoms chains: list of chains """ - lines = propka.lib.open_file_for_reading(pdb_file).readlines() + lines = open_file_for_reading(pdb_file).readlines() nterm_residue = 'next_residue' old_residue = None terminal = None @@ -179,7 +141,7 @@ def read_input(input_file, parameters, molecule): molecular_container=molecule) conformations[name].add_atom(atom) # make a sorted list of conformation names - names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) + names = sorted(conformations.keys(), key=conformation_sorter) return [conformations, names] @@ -192,7 +154,7 @@ def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']): Yields: conformation container, list of atoms """ - lines = propka.lib.open_file_for_reading(input_file).readlines() + lines = open_file_for_reading(input_file).readlines() conformation = '' atoms = {} numbers = [] @@ -246,3 +208,32 @@ def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']): # prepare for next conformation atoms = {} numbers = [] + +def read_pdb(pdb_file, parameters, molecule): + """Parse a PDB file. + + Args: + pdb_file: file to read + parameters: parameters to guide parsing + molecule: molecular container + Returns: + list with elements: + 1. list of conformations + 2. list of names + """ + conformations = {} + # read in all atoms in the file + lines = get_atom_lines_from_pdb( + pdb_file, ignore_residues=parameters.ignore_residues, + keep_protons=molecule.options.keep_protons, + chains=molecule.options.chains) + for (name, atom) in lines: + if not name in conformations.keys(): + conformations[name] = ConformationContainer( + name=name, parameters=parameters, molecular_container=molecule) + conformations[name].add_atom(atom) + # make a sorted list of conformation names + names = sorted(conformations.keys(), key=conformation_sorter) + return [conformations, names] + + diff --git a/propka/lib.py b/propka/lib.py index 48eab4e..518f7e6 100644 --- a/propka/lib.py +++ b/propka/lib.py @@ -11,56 +11,63 @@ _LOGGER.addHandler(_STDOUT_HANDLER) -def open_file_for_reading(input_file): - """Open file or file-like stream for reading. +EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6, + 'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8, + 'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7, + 'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7} - TODO - convert this to a context manager + +def protein_precheck(conformations, names): + """Check protein for correct number of atoms, etc. Args: - input_file: path to file or file-like object. If file-like object, - then will attempt fseek(0). + names: conformation names to check """ - try: - input_file.fseek(0) - return input_file - except AttributeError: - pass - - try: - file_ = open(input_file, 'rt') - except: - raise IOError('Cannot find file {0:s}'.format(input_file)) - return file_ - - -def open_file_for_writing(input_file): - """Open file or file-like stream for writing. - - TODO - convert this to a context manager. + for name in names: + atoms = conformations[name].atoms + # Group the atoms by their residue: + atoms_by_residue = {} + for atom in atoms: + if atom.element != 'H': + res_id = resid_from_atom(atom) + try: + atoms_by_residue[res_id].append(atom) + except KeyError: + atoms_by_residue[res_id] = [atom] + for res_id, res_atoms in atoms_by_residue.items(): + res_name = res_atoms[0].res_name + residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id) + # ignore ligand residues + if res_name not in EXPECTED_ATOM_NUMBERS: + continue + # check for c-terminal + if 'C-' in [a.terminal for a in res_atoms]: + if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1: + str_ = ("Unexpected number ({num:d}) of atoms in residue " + "{res:s} in conformation {conf:s}".format( + num=len(res_atoms), res=residue_label, + conf=name)) + warning(str_) + continue + # check number of atoms in residue + if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]: + str_ = ("Unexpected number ({num:d}) of atoms in residue " + "{res:s} in conformation {conf:s}".format( + num=len(res_atoms), res=residue_label, + conf=name)) + warning(str_) + + +def resid_from_atom(atom): + """Return string with atom residue information. Args: - input_file: path to file or file-like object. If file-like object, - then will attempt to get file mode. + atom: atom to generate string for + Returns + string """ - try: - mode = input_file.mode - if not ("w" in mode or "a" in mode or "+" in mode): - raise IOError("File/stream not open for writing") - return input_file - except AttributeError: - pass - try: - file_ = open(input_file, 'wt') - except FileNotFoundError: - raise Exception('Could not open {0:s}'.format(input_file)) - return file_ - - -def conformation_sorter(conf): - """TODO - figure out what this function does.""" - model = int(conf[:-1]) - altloc = conf[-1:] - return model*100+ord(altloc) + return '{0:>4d} {1:s} {2:s}'.format( + atom.res_num, atom.chain_id, atom.icode) def split_atoms_into_molecules(atoms): @@ -354,19 +361,6 @@ def configuration_compare(conf): return 100*int(conf[1:-2]) + ord(conf[-1]) -def write_file(filename, lines): - """Writes a new file. - - Args: - filename: name of file - lines: lines to write to file - """ - file_ = open_file_for_writing(filename) - for line in lines: - file_.write("{0:s}\n".format(line)) - file_.close() - - def _args_to_str(arg_list): """Summarize list of arguments in string. diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py index abfc3e4..ab7fa28 100644 --- a/propka/ligand_pka_values.py +++ b/propka/ligand_pka_values.py @@ -5,7 +5,6 @@ import propka.molecular_container import propka.calculations import propka.parameters -import propka.pdb from propka.output import write_mol2_for_atoms from propka.lib import info, warning diff --git a/propka/molecular_container.py b/propka/molecular_container.py index d99c2c0..ae82fb9 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -2,10 +2,11 @@ import os import sys import propka.version -from propka.pdb import read_input +from propka.input import read_pdb, read_input, read_parameter_file +from propka.parameters import Parameters from propka.output import write_input from propka.conformation_container import ConformationContainer -from propka.lib import info, warning, make_grid +from propka.lib import info, warning, protein_precheck, make_grid # TODO - these are constants whose origins are a little murky @@ -38,11 +39,12 @@ def __init__(self, input_file, options=None): self.file = os.path.split(input_file)[1] self.name = self.file[0:self.file.rfind('.')] input_file_extension = input_file[input_file.rfind('.'):] - # set the version + parameters = Parameters() if options: - parameters = propka.parameters.Parameters(self.options.parameters) + parameters = read_parameter_file( + self.options.parameters, parameters) else: - parameters = propka.parameters.Parameters('propka.cfg') + parameters = read_parameter_file('propka.cfg', parameters) try: version_class = getattr(propka.version, parameters.version) self.version = version_class(parameters) @@ -56,15 +58,15 @@ def __init__(self, input_file, options=None): # input is a pdb file. read in atoms and top up containers to make # sure that all atoms are present in all conformations [self.conformations, self.conformation_names] = ( - propka.pdb.read_pdb(input_file, self.version.parameters, self)) + read_pdb(input_file, self.version.parameters, self)) if len(self.conformations) == 0: info('Error: The pdb file does not seems to contain any ' 'molecular conformations') sys.exit(-1) self.top_up_conformations() # make a structure precheck - propka.pdb.protein_precheck(self.conformations, - self.conformation_names) + protein_precheck( + self.conformations, self.conformation_names) # set up atom bonding and protonation self.version.setup_bonding_and_protonation(self) # Extract groups @@ -79,9 +81,8 @@ def __init__(self, input_file, options=None): write_input(self, filename) elif input_file_extension == '.propka_input': #input is a propka_input file - [self.conformations, self.conformation_names] = ( - propka.pdb.read_input(input_file, self.version.parameters, - self)) + [self.conformations, self.conformation_names] = read_input( + input_file, self.version.parameters, self) # Extract groups - this merely sets up the groups found in the # input file self.extract_groups() diff --git a/propka/output.py b/propka/output.py index 61f8a68..a72114e 100644 --- a/propka/output.py +++ b/propka/output.py @@ -1,6 +1,42 @@ """Output routines.""" from datetime import date -from propka.lib import info, open_file_for_writing +from propka.lib import info + + +def open_file_for_writing(input_file): + """Open file or file-like stream for writing. + + TODO - convert this to a context manager. + + Args: + input_file: path to file or file-like object. If file-like object, + then will attempt to get file mode. + """ + try: + mode = input_file.mode + if not ("w" in mode or "a" in mode or "+" in mode): + raise IOError("File/stream not open for writing") + return input_file + except AttributeError: + pass + try: + file_ = open(input_file, 'wt') + except FileNotFoundError: + raise Exception('Could not open {0:s}'.format(input_file)) + return file_ + + +def write_file(filename, lines): + """Writes a new file. + + Args: + filename: name of file + lines: lines to write to file + """ + file_ = open_file_for_writing(filename) + for line in lines: + file_.write("{0:s}\n".format(line)) + file_.close() def print_header(): diff --git a/propka/parameters.py b/propka/parameters.py index 960cb81..b708fce 100644 --- a/propka/parameters.py +++ b/propka/parameters.py @@ -35,7 +35,7 @@ class Parameters: """PROPKA parameter class.""" - def __init__(self, parameter_file): + def __init__(self): """Initialize parameter class. Args: @@ -52,22 +52,6 @@ def __init__(self, parameter_file): self.CYS_CYS_exception = None # These functions set up remaining data structures implicitly self.set_up_data_structures() - self.read_parameters(parameter_file) - - def read_parameters(self, file_): - """Read parameters from file. - - Args: - file_: file to read - """ - # try to locate the parameters file - try: - ifile = pkg_resources.resource_filename(__name__, file_) - input_ = lib.open_file_for_reading(ifile) - except (IOError, FileNotFoundError, ValueError): - input_ = lib.open_file_for_reading(file_) - for line in input_: - self.parse_line(line) def parse_line(self, line): """Parse parameter file line.""" From 84846aad8c71dd9bebb0b330f7958a2f26a399bc Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 30 May 2020 12:01:30 -0700 Subject: [PATCH 4/6] Resolve cyclic import problem. Fixes https://github.com/jensengroup/propka-3.1/issues/49 Moved I/O into higher level of of code; should make issue https://github.com/jensengroup/propka-3.1/issues/51 easier to resolve --- propka/__init__.py | 6 +-- propka/atom.py | 4 +- propka/conformation_container.py | 1 - propka/input.py | 69 ++++++++++++++++++++++++++++++-- propka/ligand_pka_values.py | 14 +++---- propka/molecular_container.py | 66 ++++-------------------------- propka/output.py | 2 +- propka/parameters.py | 4 +- propka/run.py | 12 ++++-- scripts/propka31.py | 8 +++- tests/test_basic_regression.py | 13 +++--- 11 files changed, 107 insertions(+), 92 deletions(-) diff --git a/propka/__init__.py b/propka/__init__.py index 8d2447f..ecfacd3 100644 --- a/propka/__init__.py +++ b/propka/__init__.py @@ -15,6 +15,6 @@ """ __all__ = ["atom", "bonds", "calculations", "conformation_container", "coupled_groups", "determinant", "determinants", "group", - "hybrid36", "iterative", "lib", "ligand_pka_values", "ligand", - "molecular_container", "output", "parameters", "pdb", "protonate", - "run", "vector_algebra", "version"] + "hybrid36", "iterative", "input", "lib", "ligand_pka_values", + "ligand", "molecular_container", "output", "parameters", + "protonate", "run", "vector_algebra", "version"] diff --git a/propka/atom.py b/propka/atom.py index 3bb3f12..fcd6825 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -1,7 +1,7 @@ """Atom class - contains all atom information found in the PDB file""" import string -from . import hybrid36 from propka.lib import make_tidy_atom_label +from . import hybrid36 # Format strings that get used in multiple places (or are very complex) @@ -25,7 +25,7 @@ "({r.chain_id:1s}) [{r.x:>8.3f} {r.y:>8.3f} {r.z:>8.3f}] {r.element:s}") -class Atom(object): +class Atom: """Atom class - contains all atom information found in the PDB file""" def __init__(self, line=None): diff --git a/propka/conformation_container.py b/propka/conformation_container.py index d44e30c..d1e57ed 100644 --- a/propka/conformation_container.py +++ b/propka/conformation_container.py @@ -36,7 +36,6 @@ def __init__(self, name='', parameters=None, molecular_container=None): self.groups = [] self.chains = [] self.current_iter_item = 0 - # TODO - what is marvin_pkas_calculated? self.marvin_pkas_calculated = False self.non_covalently_coupled_groups = False diff --git a/propka/input.py b/propka/input.py index e8c7f07..675a770 100644 --- a/propka/input.py +++ b/propka/input.py @@ -1,5 +1,8 @@ """Input routines.""" +from pathlib import Path from pkg_resources import resource_filename +from propka.lib import protein_precheck +from propka.output import write_propka from propka.atom import Atom from propka.conformation_container import ConformationContainer from propka.group import initialize_atom_group @@ -27,6 +30,67 @@ def open_file_for_reading(input_file): return file_ +def read_molecule_file(input_file, mol_container): + """Read input file (PDB or PROPKA) for a molecular container + + Args + input_file: input file to read + mol_container: MolecularContainer object + Returns + updated MolecularContainer object + Raises + ValuError if invalid input given + """ + input_path = Path(input_file) + mol_container.name = input_path.stem + input_file_extension = input_path.suffix + if input_file_extension.lower() == '.pdb': + # input is a pdb file. read in atoms and top up containers to make + # sure that all atoms are present in all conformations + conformations, conformation_names = read_pdb( + input_path, mol_container.version.parameters, mol_container) + if len(conformations) == 0: + str_ = ('Error: The pdb file does not seems to contain any ' + 'molecular conformations') + raise ValueError(str_) + mol_container.conformations = conformations + mol_container.conformation_names = conformation_names + mol_container.top_up_conformations() + # make a structure precheck + protein_precheck( + mol_container.conformations, mol_container.conformation_names) + # set up atom bonding and protonation + mol_container.version.setup_bonding_and_protonation(mol_container) + # Extract groups + mol_container.extract_groups() + # sort atoms + for name in mol_container.conformation_names: + mol_container.conformations[name].sort_atoms() + # find coupled groups + mol_container.find_covalently_coupled_groups() + # write out the input file + # TODO - figure out why this I/O has to happen here + output_path = Path(input_path.name.replace( + input_file_extension, '.propka_input')) + write_propka(mol_container, output_path) + elif input_file_extension.lower() == '.propka_input': + # input is a propka_input file + conformations, conformation_names = read_propka( + input_file, mol_container.version.parameters, mol_container) + mol_container.conformations = conformations + mol_container.conformation_names = conformation_names + # Extract groups - this merely sets up the groups found in the + # input file + mol_container.extract_groups() + # do some additional set up + mol_container.additional_setup_when_reading_input_file() + else: + str_ = "Unknown input file type {0!s} for file {1!s}".format( + input_file_extension, input_path) + raise ValueError(str_) + return mol_container + + def read_parameter_file(input_file, parameters): """Read a parameter file. @@ -47,7 +111,6 @@ def read_parameter_file(input_file, parameters): return parameters - def conformation_sorter(conf): """TODO - figure out what this function does.""" model = int(conf[:-1]) @@ -121,7 +184,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, terminal = None -def read_input(input_file, parameters, molecule): +def read_propka(input_file, parameters, molecule): """Read PROPKA input file for molecular container. Args: @@ -235,5 +298,3 @@ def read_pdb(pdb_file, parameters, molecule): # make a sorted list of conformation names names = sorted(conformations.keys(), key=conformation_sorter) return [conformations, names] - - diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py index ab7fa28..023845d 100644 --- a/propka/ligand_pka_values.py +++ b/propka/ligand_pka_values.py @@ -2,11 +2,8 @@ import os import subprocess import sys -import propka.molecular_container -import propka.calculations -import propka.parameters from propka.output import write_mol2_for_atoms -from propka.lib import info, warning +from propka.lib import info, warning, split_atoms_into_molecules class LigandPkaValues: @@ -47,17 +44,17 @@ def find_in_path(program): sys.exit(-1) return locs[0] - def get_marvin_pkas_for_pdb_file(self, pdbfile, num_pkas=10, min_ph=-10, - max_ph=20): + def get_marvin_pkas_for_pdb_file( + self, molecule, parameters, num_pkas=10, min_ph=-10, max_ph=20): """Use Marvin executables to get pKas for a PDB file. Args: pdbfile: PDB file + molecule: MolecularContainer object num_pkas: number of pKas to get min_ph: minimum pH value max_ph: maximum pH value """ - molecule = propka.molecular_container.Molecular_container(pdbfile) self.get_marvin_pkas_for_molecular_container( molecule, num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph) @@ -110,7 +107,7 @@ def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False, max_ph: maximum pH value """ # do one molecule at the time so we don't confuse marvin - molecules = propka.lib.split_atoms_into_molecules(atoms) + molecules = split_atoms_into_molecules(atoms) for i, molecule in enumerate(molecules): filename = '{0:s}_{1:d}.mol2'.format(name, i+1) self.get_marvin_pkas_for_molecule( @@ -196,4 +193,3 @@ def extract_pkas(output): if len(indices) != len(values) != len(types): raise Exception('Lengths of atoms and pka values mismatch') return indices, values, types - diff --git a/propka/molecular_container.py b/propka/molecular_container.py index ae82fb9..43ad372 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -1,12 +1,8 @@ """Molecular container for storing all contents of PDB files.""" import os -import sys import propka.version -from propka.input import read_pdb, read_input, read_parameter_file -from propka.parameters import Parameters -from propka.output import write_input from propka.conformation_container import ConformationContainer -from propka.lib import info, warning, protein_precheck, make_grid +from propka.lib import info, warning, make_grid # TODO - these are constants whose origins are a little murky @@ -15,36 +11,26 @@ MAX_ITERATION = 4 -class Molecular_container: +class MolecularContainer: """Container for storing molecular contents of PDB files. TODO - this class name does not conform to PEP8 but has external use. We should deprecate and change eventually. """ - def __init__(self, input_file, options=None): + def __init__(self, parameters, options=None): """Initialize molecular container. Args: - input_file: molecular input file + parameters: Parameters() object options: options object """ # printing out header before parsing input propka.output.print_header() - # set up some values + self.conformation_names = [] + self.conformations = {} self.options = options - self.input_file = input_file - # TODO - replace this indelicate os.path code with pathlib - self.dir = os.path.split(input_file)[0] - self.file = os.path.split(input_file)[1] - self.name = self.file[0:self.file.rfind('.')] - input_file_extension = input_file[input_file.rfind('.'):] - parameters = Parameters() - if options: - parameters = read_parameter_file( - self.options.parameters, parameters) - else: - parameters = read_parameter_file('propka.cfg', parameters) + self.name = None try: version_class = getattr(propka.version, parameters.version) self.version = version_class(parameters) @@ -53,44 +39,6 @@ def __init__(self, input_file, options=None): errstr = 'Error: Version {0:s} does not exist'.format( parameters.version) raise Exception(errstr) - # read the input file - if input_file_extension[0:4] == '.pdb': - # input is a pdb file. read in atoms and top up containers to make - # sure that all atoms are present in all conformations - [self.conformations, self.conformation_names] = ( - read_pdb(input_file, self.version.parameters, self)) - if len(self.conformations) == 0: - info('Error: The pdb file does not seems to contain any ' - 'molecular conformations') - sys.exit(-1) - self.top_up_conformations() - # make a structure precheck - protein_precheck( - self.conformations, self.conformation_names) - # set up atom bonding and protonation - self.version.setup_bonding_and_protonation(self) - # Extract groups - self.extract_groups() - # sort atoms - for name in self.conformation_names: - self.conformations[name].sort_atoms() - # find coupled groups - self.find_covalently_coupled_groups() - # write out the input file - filename = self.file.replace(input_file_extension, '.propka_input') - write_input(self, filename) - elif input_file_extension == '.propka_input': - #input is a propka_input file - [self.conformations, self.conformation_names] = read_input( - input_file, self.version.parameters, self) - # Extract groups - this merely sets up the groups found in the - # input file - self.extract_groups() - # do some additional set up - self.additional_setup_when_reading_input_file() - else: - info('Unrecognized input file:{0:s}'.format(input_file)) - sys.exit(-1) def top_up_conformations(self): """Makes sure that all atoms are present in all conformations.""" diff --git a/propka/output.py b/propka/output.py index a72114e..2ee5f89 100644 --- a/propka/output.py +++ b/propka/output.py @@ -582,7 +582,7 @@ def write_mol2_for_atoms(atoms, filename): out.write(substructure_section) out.close() -def write_input(molecular_container, filename): +def write_propka(molecular_container, filename): """Write PROPKA input file for molecular container. Args: diff --git a/propka/parameters.py b/propka/parameters.py index b708fce..5b26dee 100644 --- a/propka/parameters.py +++ b/propka/parameters.py @@ -1,6 +1,4 @@ """Holds parameters and settings.""" -import pkg_resources -import propka.lib as lib from propka.lib import info, warning @@ -346,7 +344,7 @@ def print_interactions_latex(self): 'N1', 'O2', 'OP', 'SH'] lines = [ "", - "\\begin{longtable}{{{0:s}}}".format('l'*len(agroups)), + "\\begin{{longtable}}{{{0:s}}}".format('l'*len(agroups)), ("\\caption{{Ligand interaction parameters. For interactions not " "listed, the default value of {0:s} is applied.}}").format( str(self.sidechain_cutoffs.default)), diff --git a/propka/run.py b/propka/run.py index e851309..633e2c5 100644 --- a/propka/run.py +++ b/propka/run.py @@ -1,7 +1,9 @@ """Entry point for PROPKA script.""" import logging from propka.lib import loadOptions -from propka.molecular_container import Molecular_container +from propka.input import read_parameter_file, read_molecule_file +from propka.parameters import Parameters +from propka.molecular_container import MolecularContainer _LOGGER = logging.getLogger("PROPKA") @@ -13,8 +15,10 @@ def main(optargs=None): optargs = optargs if optargs is not None else [] options = loadOptions(*optargs) pdbfiles = options.filenames + parameters = read_parameter_file(options.parameters, Parameters()) for pdbfile in pdbfiles: - my_molecule = Molecular_container(pdbfile, options) + my_molecule = MolecularContainer(parameters, options) + my_molecule = read_molecule_file(pdbfile, my_molecule) my_molecule.calculate_pka() my_molecule.write_pka() @@ -33,9 +37,11 @@ def single(pdbfile, optargs=None): optargs = optargs if optargs is not None else [] options = loadOptions(*optargs) pdbfile = options.filenames.pop(0) + parameters = read_parameter_file(options.parameters, Parameters()) if len(options.filenames) > 0: _LOGGER.warning("Ignoring filenames: {0:s}".format(options.filenames)) - my_molecule = Molecular_container(pdbfile, options) + my_molecule = MolecularContainer(parameters, options) + my_molecule = read_molecule_file(pdbfile, my_molecule) my_molecule.calculate_pka() my_molecule.write_pka() return my_molecule diff --git a/scripts/propka31.py b/scripts/propka31.py index d3459c8..8a90114 100755 --- a/scripts/propka31.py +++ b/scripts/propka31.py @@ -11,7 +11,9 @@ propka31.) """ from propka.lib import loadOptions -from propka.molecular_container import Molecular_container +from propka.input import read_parameter_file, read_molecule_file +from propka.parameters import Parameters +from propka.molecular_container import MolecularContainer def main(): @@ -19,9 +21,11 @@ def main(): # loading options, flaggs and arguments options = loadOptions([]) pdbfiles = options.filenames + parameters = read_parameter_file(options.parameters, Parameters()) for pdbfile in pdbfiles: - my_molecule = Molecular_container(pdbfile, options) + my_molecule = MolecularContainer(parameters, options) + my_molecule = read_molecule_file(pdbfile, my_molecule) my_molecule.calculate_pka() my_molecule.write_pka() diff --git a/tests/test_basic_regression.py b/tests/test_basic_regression.py index cd8036e..9a63854 100644 --- a/tests/test_basic_regression.py +++ b/tests/test_basic_regression.py @@ -5,8 +5,10 @@ from pathlib import Path import pytest from numpy.testing import assert_almost_equal -import propka.lib -import propka.molecular_container +from propka.parameters import Parameters +from propka.molecular_container import MolecularContainer +from propka.input import read_parameter_file, read_molecule_file +from propka.lib import loadOptions _LOGGER = logging.getLogger(__name__) @@ -64,15 +66,16 @@ def run_propka(options, pdb_path, tmp_path): tmp_path: path for working directory """ options += [str(pdb_path)] - args = propka.lib.loadOptions(options) + args = loadOptions(options) try: _LOGGER.warning( "Working in tmpdir {0:s} because of PROPKA file output; " "need to fix this.".format(str(tmp_path))) cwd = Path.cwd() os.chdir(tmp_path) - molecule = propka.molecular_container.Molecular_container( - str(pdb_path), args) + parameters = read_parameter_file(args.parameters, Parameters()) + molecule = MolecularContainer(parameters, args) + molecule = read_molecule_file(str(pdb_path), molecule) molecule.calculate_pka() molecule.write_pka() finally: From 1d17cb05ed695294956dc892205eb451a3d1cd8e Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Sat, 30 May 2020 13:15:45 -0700 Subject: [PATCH 5/6] Remove TODO for refactoring. Done already. --- propka/calculations.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/propka/calculations.py b/propka/calculations.py index 915b713..1c48384 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -2,15 +2,6 @@ import math -# TODO - this file should be broken into three separate files: -# * calculations.py - includes basic functions for calculating distances, etc. -# * hydrogen.py - includes bonding and protonation functions -# * energy.py - includes energy functions (dependent on distance functions) - - -# TODO - the next set of functions form a distinct "module" for distance calculation - - # Maximum distance used to bound calculations of smallest distance MAX_DISTANCE = 1e6 From 7085271ad5d5b09ae0ecc53b729a6b01defc9837 Mon Sep 17 00:00:00 2001 From: Nathan Baker Date: Wed, 3 Jun 2020 17:49:19 -0700 Subject: [PATCH 6/6] Simplify with .writable() method. Addresses https://github.com/jensengroup/propka-3.1/pull/50#discussion_r434349737 --- propka/output.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/propka/output.py b/propka/output.py index 2ee5f89..7353779 100644 --- a/propka/output.py +++ b/propka/output.py @@ -13,8 +13,7 @@ def open_file_for_writing(input_file): then will attempt to get file mode. """ try: - mode = input_file.mode - if not ("w" in mode or "a" in mode or "+" in mode): + if not input_file.writable(): raise IOError("File/stream not open for writing") return input_file except AttributeError: