From 461edda3b268772060954a609f23eb3f2571c006 Mon Sep 17 00:00:00 2001
From: Nathan Baker <nathanandrewbaker@gmail.com>
Date: Sat, 30 May 2020 08:45:39 -0700
Subject: [PATCH 1/6] Break propka.calculations into parts.

Broken into three files to simplify import analysis:

* `calculations.py` - distance calculations and simple math
* `energy.py` - energy calculations
* `hydrogens.py` - hydrogen geometry calculations

Working towards addressing https://github.com/jensengroup/propka-3.1/issues/49
---
 propka/calculations.py | 877 -----------------------------------------
 propka/determinants.py |   2 +-
 propka/energy.py       | 531 +++++++++++++++++++++++++
 propka/hydrogens.py    | 346 ++++++++++++++++
 propka/version.py      |  51 +--
 5 files changed, 906 insertions(+), 901 deletions(-)
 create mode 100644 propka/energy.py
 create mode 100644 propka/hydrogens.py

diff --git a/propka/calculations.py b/propka/calculations.py
index 3770983..df8db5d 100644
--- a/propka/calculations.py
+++ b/propka/calculations.py
@@ -66,880 +66,3 @@ def get_smallest_distance(atoms1, atoms2):
                 res_atom1 = atom1
                 res_atom2 = atom2
     return [res_atom1, math.sqrt(res_dist), res_atom2]
-
-
-# TODO - the next set of functions form a distinct "module" for hydrogen addition
-
-
-def setup_bonding_and_protonation(parameters, molecular_container):
-    """Set up bonding and protonation for a molecule.
-
-    NOTE - the unused `parameters` argument is required for compatibility in version.py
-    TODO - figure out why there is a similar function in version.py
-
-    Args:
-        parameters:  not used
-        molecular_container:  molecule container.
-    """
-    # make bonds
-    my_bond_maker = setup_bonding(molecular_container)
-    # set up ligand atom names
-    set_ligand_atom_names(molecular_container)
-    # apply information on pi electrons
-    my_bond_maker.add_pi_electron_information(molecular_container)
-    # Protonate atoms
-    if molecular_container.options.protonate_all:
-        PROTONATOR = propka.protonate.Protonate(verbose=False)
-        PROTONATOR.protonate(molecular_container)
-
-
-def setup_bonding(molecular_container):
-    """Set up bonding for a molecular container.
-
-    Args:
-        molecular_container:  the molecular container in question
-    Returns:
-        BondMaker object
-    """
-    my_bond_maker = propka.bonds.BondMaker()
-    my_bond_maker.find_bonds_for_molecules_using_boxes(molecular_container)
-    return my_bond_maker
-
-
-def setup_bonding_and_protonation_30_style(parameters, molecular_container):
-    """Set up bonding for a molecular container.
-
-    Args:
-        parameters:  parameters for calculation
-        molecular_container:  the molecular container in question
-    Returns:
-        BondMaker object
-    """
-    # Protonate atoms
-    protonate_30_style(molecular_container)
-    # make bonds
-    bond_maker = propka.bonds.BondMaker()
-    bond_maker.find_bonds_for_molecules_using_boxes(molecular_container)
-    return bond_maker
-
-
-def protonate_30_style(molecular_container):
-    """Protonate the molecule.
-
-    Args:
-        molecular_container:  molecule
-    """
-    for name in molecular_container.conformation_names:
-        info('Now protonating', name)
-        # split atom into residues
-        curres = -1000000
-        residue = []
-        o_atom = None
-        c_atom = None
-        for atom in molecular_container.conformations[name].atoms:
-            if atom.res_num != curres:
-                curres = atom.res_num
-                if len(residue) > 0:
-                    #backbone
-                    [o_atom, c_atom] = add_backbone_hydrogen(
-                        residue, o_atom, c_atom)
-                    #arginine
-                    if residue[0].res_name == 'ARG':
-                        add_arg_hydrogen(residue)
-                    #histidine
-                    if residue[0].res_name == 'HIS':
-                        add_his_hydrogen(residue)
-                    #tryptophan
-                    if residue[0].res_name == 'TRP':
-                        add_trp_hydrogen(residue)
-                    #amides
-                    if residue[0].res_name in ['GLN', 'ASN']:
-                        add_amd_hydrogen(residue)
-                    residue = []
-            if atom.type == 'atom':
-                residue.append(atom)
-
-
-def set_ligand_atom_names(molecular_container):
-    """Set names for ligands in molecular container.
-
-    Args:
-        molecular_container:  molecular container for ligand names
-    """
-    for name in molecular_container.conformation_names:
-        molecular_container.conformations[name].set_ligand_atom_names()
-
-
-def add_arg_hydrogen(residue):
-    """Adds Arg hydrogen atoms to residues according to the 'old way'.
-
-    Args:
-        residue:  arginine residue to protonate
-    Returns:
-        list of hydrogen atoms
-    """
-    #info('Adding arg H',residue)
-    for atom in residue:
-        if atom.name == "CD":
-            cd_atom = atom
-        elif atom.name == "CZ":
-            cz_atom = atom
-        elif atom.name == "NE":
-            ne_atom = atom
-        elif atom.name == "NH1":
-            nh1_atom = atom
-        elif atom.name == "NH2":
-            nh2_atom = atom
-    h1_atom = protonate_sp2(cd_atom, ne_atom, cz_atom)
-    h1_atom.name = "HE"
-    h2_atom = protonate_direction(nh1_atom, ne_atom, cz_atom)
-    h2_atom.name = "HN1"
-    h3_atom = protonate_direction(nh1_atom, ne_atom, cd_atom)
-    h3_atom.name = "HN2"
-    h4_atom = protonate_direction(nh2_atom, ne_atom, cz_atom)
-    h4_atom.name = "HN3"
-    h5_atom = protonate_direction(nh2_atom, ne_atom, h1_atom)
-    h5_atom.name = "HN4"
-    return [h1_atom, h2_atom, h3_atom, h4_atom, h5_atom]
-
-
-def add_his_hydrogen(residue):
-    """Adds His hydrogen atoms to residues according to the 'old way'.
-
-    Args:
-        residue:  histidine residue to protonate
-    """
-    for atom in residue:
-        if atom.name == "CG":
-            cg_atom = atom
-        elif atom.name == "ND1":
-            nd_atom = atom
-        elif atom.name == "CD2":
-            cd_atom = atom
-        elif atom.name == "CE1":
-            ce_atom = atom
-        elif atom.name == "NE2":
-            ne_atom = atom
-    hd_atom = protonate_sp2(cg_atom, nd_atom, ce_atom)
-    hd_atom.name = "HND"
-    he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom)
-    he_atom.name = "HNE"
-
-
-def add_trp_hydrogen(residue):
-    """Adds Trp hydrogen atoms to residues according to the 'old way'.
-
-    Args:
-        residue:  tryptophan residue to protonate
-    """
-    cd_atom = None
-    ne_atom = None
-    for atom in residue:
-        if atom.name == "CD1":
-            cd_atom = atom
-        elif atom.name == "NE1":
-            ne_atom = atom
-        elif atom.name == "CE2":
-            ce_atom = atom
-    if (cd_atom is None) or (ne_atom is None) or (ce_atom is None):
-        str_ = "Unable to find all atoms for {0:s} {1:s}".format(
-            residue[0].res_name, residue[0].res_num)
-        raise ValueError(str_)
-    he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom)
-    he_atom.name = "HNE"
-
-
-def add_amd_hydrogen(residue):
-    """Adds Gln & Asn hydrogen atoms to residues according to the 'old way'.
-
-    Args:
-        residue:  glutamine or asparagine residue to protonate
-    """
-    c_atom = None
-    o_atom = None
-    n_atom = None
-    for atom in residue:
-        if ((atom.res_name == "GLN" and atom.name == "CD")
-                or (atom.res_name == "ASN" and atom.name == "CG")):
-            c_atom = atom
-        elif ((atom.res_name == "GLN" and atom.name == "OE1")
-              or (atom.res_name == "ASN" and atom.name == "OD1")):
-            o_atom = atom
-        elif ((atom.res_name == "GLN" and atom.name == "NE2")
-              or (atom.res_name == "ASN" and atom.name == "ND2")):
-            n_atom = atom
-    if (c_atom is None) or (o_atom is None) or (n_atom is None):
-        str_ = "Unable to find all atoms for {0:s} {1:s}".format(
-            residue[0].res_name, residue[0].res_num)
-        raise ValueError(str_)
-    h1_atom = protonate_direction(n_atom, o_atom, c_atom)
-    h1_atom.name = "HN1"
-    h2_atom = protonate_average_direction(n_atom, c_atom, o_atom)
-    h2_atom.name = "HN2"
-
-
-def add_backbone_hydrogen(residue, o_atom, c_atom):
-    """Adds hydrogen backbone atoms to residues according to the old way.
-
-    dR is wrong for the N-terminus (i.e. first residue) but it doesn't affect
-    anything at the moment. Could be improved, but works for now.
-
-    Args:
-        residue:  residue to protonate
-        o_atom:  backbone oxygen atom
-        c_atom:  backbone carbon atom
-    Returns:
-        [new backbone oxygen atom, new backbone carbon atom]
-    """
-    new_c_atom = None
-    new_o_atom = None
-    n_atom = None
-    for atom in residue:
-        if atom.name == "N":
-            n_atom = atom
-        if atom.name == "C":
-            new_c_atom = atom
-        if atom.name == "O":
-            new_o_atom = atom
-    if None in [c_atom, o_atom, n_atom]:
-        return [new_o_atom, new_c_atom]
-    if n_atom.res_name == "PRO":
-        # PRO doesn't have an H-atom; do nothing
-        pass
-    else:
-        h_atom = protonate_direction(n_atom, o_atom, c_atom)
-        h_atom.name = "H"
-    return [new_o_atom, new_c_atom]
-
-
-def protonate_direction(x1_atom, x2_atom, x3_atom):
-    """Protonates an atom, x1_atom, given a direction.
-
-    New direction for x1_atom proton is (x2_atom -> x3_atom).
-
-    Args:
-        x1_atom:  atom to be protonated
-        x2_atom:  atom for direction
-        x3_atom:  other atom for direction
-    Returns:
-        new hydrogen atom
-    """
-    dx = (x3_atom.x - x2_atom.x)
-    dy = (x3_atom.y - x2_atom.y)
-    dz = (x3_atom.z - x2_atom.z)
-    length = math.sqrt(dx*dx + dy*dy + dz*dz)
-    x = x1_atom.x + dx/length
-    y = x1_atom.y + dy/length
-    z = x1_atom.z + dz/length
-    h_atom = make_new_h(x1_atom, x, y, z)
-    h_atom.name = "H"
-    return h_atom
-
-
-def protonate_average_direction(x1_atom, x2_atom, x3_atom):
-    """Protonates an atom, x1_atom, given a direction.
-
-    New direction for x1_atom is (x1_atom/x2_atom -> x3_atom).
-    Note, this one uses the average of x1_atom & x2_atom (N & O) unlike
-    the previous N - C = O
-
-    Args:
-        x1_atom:  atom to be protonated
-        x2_atom:  atom for direction
-        x3_atom:  other atom for direction
-    Returns:
-        new hydrogen atom
-    """
-    dx = (x3_atom.x + x1_atom.x)*0.5 - x2_atom.x
-    dy = (x3_atom.y + x1_atom.y)*0.5 - x2_atom.y
-    dz = (x3_atom.z + x1_atom.z)*0.5 - x2_atom.z
-    length = math.sqrt(dx*dx + dy*dy + dz*dz)
-    x = x1_atom.x + dx/length
-    y = x1_atom.y + dy/length
-    z = x1_atom.z + dz/length
-    h_atom = make_new_h(x1_atom, x, y, z)
-    h_atom.name = "H"
-    return h_atom
-
-
-def protonate_sp2(x1_atom, x2_atom, x3_atom):
-    """Protonates a SP2 atom, given a list of atoms
-
-    Args:
-        x1_atom:  atom to set direction
-        x2_atom:  atom to be protonated
-        x3_atom:  other atom to set direction
-    Returns:
-        new hydrogen atom
-    """
-    dx = (x1_atom.x + x3_atom.x)*0.5 - x2_atom.x
-    dy = (x1_atom.y + x3_atom.y)*0.5 - x2_atom.y
-    dz = (x1_atom.z + x3_atom.z)*0.5 - x2_atom.z
-    length = math.sqrt(dx*dx + dy*dy + dz*dz)
-    x = x2_atom.x - dx/length
-    y = x2_atom.y - dy/length
-    z = x2_atom.z - dz/length
-    h_atom = make_new_h(x2_atom, x, y, z)
-    h_atom.name = "H"
-    return h_atom
-
-
-def make_new_h(atom, x, y, z):
-    """Add a new hydrogen to an atom at the specified position.
-
-    Args:
-        atom:  atom to protonate
-        x:  x position of hydrogen
-        y:  y position of hydrogen
-        z:  z position of hydrogen
-    Returns:
-        new hydrogen atom
-    """
-    new_h = propka.atom.Atom()
-    new_h.set_property(
-        numb=None, name='H{0:s}'.format(atom.name[1:]),
-        res_name=atom.res_name, chain_id=atom.chain_id,
-        res_num=atom.res_num, x=x, y=y, z=z, occ=None, beta=None)
-    new_h.element = 'H'
-    new_h.bonded_atoms = [atom]
-    new_h.charge = 0
-    new_h.steric_number = 0
-    new_h.number_of_lone_pairs = 0
-    new_h.number_of_protons_to_add = 0
-    new_h.num_pi_elec_2_3_bonds = 0
-    atom.bonded_atoms.append(new_h)
-    atom.conformation_container.add_atom(new_h)
-    return new_h
-
-
-# TODO - the remaining functions form a distinct "module" for desolvation
-
-
-# TODO - I have no idea what these constants mean so I labeled them "UNK_"
-UNK_MIN_DISTANCE = 2.75
-MIN_DISTANCE_4TH = math.pow(UNK_MIN_DISTANCE, 4)
-UNK_DIELECTRIC1 = 160
-UNK_DIELECTRIC2 = 30
-UNK_PKA_SCALING1 = 244.12
-UNK_BACKBONE_DISTANCE1 = 6.0
-UNK_BACKBONE_DISTANCE2 = 3.0
-UNK_FANGLE_MIN = 0.001
-UNK_PKA_SCALING2 = 0.8
-COMBINED_NUM_BURIED_MAX = 900
-SEPARATE_NUM_BURIED_MAX = 400
-
-
-def radial_volume_desolvation(parameters, group):
-    """Calculate desolvation terms for group.
-
-    Args:
-        parameters:  parameters for desolvation calculation
-        group:  group of atoms for calculation
-    """
-    all_atoms = group.atom.conformation_container.get_non_hydrogen_atoms()
-    volume = 0.0
-    # TODO - Nathan really wants to rename the num_volume variable.
-    # He had to re-read the original paper to figure out what it was.
-    # A better name would be num_volume.
-    group.num_volume = 0
-    min_dist_4th = MIN_DISTANCE_4TH
-    for atom in all_atoms:
-        # ignore atoms in the same residue
-        if (atom.res_num == group.atom.res_num
-                and atom.chain_id == group.atom.chain_id):
-            continue
-        sq_dist = squared_distance(group, atom)
-        # desolvation
-        if sq_dist < parameters.desolv_cutoff_squared:
-            # use a default relative volume of 1.0 if the volume of the element
-            # is not found in parameters
-            # insert check for methyl groups
-            if atom.element == 'C' and atom.name not in ['CA', 'C']:
-                dvol = parameters.VanDerWaalsVolume['C4']
-            else:
-                dvol = parameters.VanDerWaalsVolume.get(atom.element, 1.0)
-            dv_inc = dvol/max(min_dist_4th, sq_dist*sq_dist)
-            volume += dv_inc
-        # buried
-        if sq_dist < parameters.buried_cutoff_squared:
-            group.num_volume += 1
-    group.buried = calculate_weight(parameters, group.num_volume)
-    scale_factor = calculate_scale_factor(parameters, group.buried)
-    volume_after_allowance = max(0.00, volume-parameters.desolvationAllowance)
-    group.energy_volume = (
-        group.charge * parameters.desolvationPrefactor 
-        * volume_after_allowance * scale_factor)
-
-
-def calculate_scale_factor(parameters, weight):
-    """Calculate desolvation scaling factor.
-
-    Args:
-        parameters:  parameters for desolvation calculation
-        weight:  weight for scaling factor
-    Returns:
-        scaling factor
-    """
-    scale_factor = 1.0 - (1.0 - parameters.desolvationSurfaceScalingFactor)*(1.0 - weight)
-    return scale_factor
-
-
-def calculate_weight(parameters, num_volume):
-    """Calculate the atom-based desolvation weight.
-
-    TODO - figure out why a similar function exists in version.py
-
-    Args:
-        parameters:  parameters for desolvation calculation
-        num_volume:  number of heavy atoms within desolvation calculation volume
-    Returns:
-        desolvation weight
-    """
-    weight = (
-        float(num_volume - parameters.Nmin) 
-        / float(parameters.Nmax - parameters.Nmin))
-    weight = min(1.0, weight)
-    weight = max(0.0, weight)
-    return weight
-
-
-def calculate_pair_weight(parameters, num_volume1, num_volume2):
-    """Calculate the atom-pair based desolvation weight.
-
-    Args:
-        num_volume1:  number of heavy atoms within first desolvation volume
-        num_volume2:  number of heavy atoms within second desolvation volume
-    Returns:
-        desolvation weight
-    """
-    num_volume = num_volume1 + num_volume2
-    num_min = 2*parameters.Nmin
-    num_max = 2*parameters.Nmax
-    weight = float(num_volume - num_min)/float(num_max - num_min)
-    weight = min(1.0, weight)
-    weight = max(0.0, weight)
-    return weight
-
-
-def hydrogen_bond_energy(dist, dpka_max, cutoffs, f_angle=1.0):
-    """Calculate hydrogen-bond interaction pKa shift.
-
-    Args:
-        dist:  distance for hydrogen bond
-        dpka_max:  maximum pKa value shift
-        cutoffs:  array with max and min distance values
-        f_angle:  angle scaling factor
-    Returns:
-        pKa shift value
-    """
-    if dist < cutoffs[0]:
-        value = 1.00
-    elif dist > cutoffs[1]:
-        value = 0.00
-    else:
-        value = 1.0 - (dist - cutoffs[0])/(cutoffs[1] - cutoffs[0])
-    dpka = dpka_max*value*f_angle
-    return abs(dpka)
-
-
-def angle_distance_factors(atom1=None, atom2=None, atom3=None, center=None):
-    """Calculate distance and angle factors for three atoms for backbone interactions.
-
-    NOTE - you need to use atom1 to be the e.g. ASP atom if distance is reset at
-    return: [O1 -- H2-N3].
-
-    Also generalized to be able to be used for residue 'centers' for C=O COO interactions.
-
-    Args:
-        atom1:  first atom for calculation (could be None)
-        atom2:  second atom for calculation
-        atom3:  third atom for calculation
-        center:  center point between atoms 1 and 2
-    Returns
-        [distance factor between atoms 1 and 2,
-         angle factor,
-         distance factor between atoms 2 and 3]
-    """
-    # The angle factor
-    #
-    #  ---closest_atom1/2
-    #                     .
-    #                      .
-    #                       the_hydrogen---closest_atom2/1---
-    dx_32 = atom2.x - atom3.x
-    dy_32 = atom2.y - atom3.y
-    dz_32 = atom2.z - atom3.z
-    dist_23 = math.sqrt(dx_32 * dx_32 + dy_32 * dy_32 + dz_32 * dz_32)
-    dx_32 = dx_32/dist_23
-    dy_32 = dy_32/dist_23
-    dz_32 = dz_32/dist_23
-    if atom1 is None:
-        dx_21 = center[0] - atom2.x
-        dy_21 = center[1] - atom2.y
-        dz_21 = center[2] - atom2.z
-    else:
-        dx_21 = atom1.x - atom2.x
-        dy_21 = atom1.y - atom2.y
-        dz_21 = atom1.z - atom2.z
-    dist_12 = math.sqrt(dx_21 * dx_21 + dy_21 * dy_21 + dz_21 * dz_21)
-    dx_21 = dx_21/dist_12
-    dy_21 = dy_21/dist_12
-    dz_21 = dz_21/dist_12
-    f_angle = dx_21*dx_32 + dy_21*dy_32 + dz_21*dz_32
-    return dist_12, f_angle, dist_23
-
-
-def hydrogen_bond_interaction(group1, group2, version):
-    """Calculate energy for hydrogen bond interactions between two groups.
-
-    Args:
-        group1:  first interacting group
-        group2:  second interacting group
-        version:  an object that contains version-specific parameters
-    Returns:
-        hydrogen bond interaction energy
-    """
-    # find the smallest distance between interacting atoms
-    atoms1 = group1.get_interaction_atoms(group2)
-    atoms2 = group2.get_interaction_atoms(group1)
-    [closest_atom1, dist, closest_atom2] = get_smallest_distance(atoms1, atoms2)
-    if None in [closest_atom1, closest_atom2]:
-        warning(
-            'Side chain interaction failed for {0:s} and {1:s}'.format(
-                group1.label, group2.label))
-        return None
-    # get the parameters
-    [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1,
-                                                              closest_atom2)
-    if (dpka_max is None) or (None in cutoff):
-        return None
-    # check that the closest atoms are close enough
-    if dist >= cutoff[1]:
-        return None
-    # check that bond distance criteria is met
-    min_hbond_dist = version.parameters.min_bond_distance_for_hydrogen_bonds
-    if group1.atom.is_atom_within_bond_distance(group2.atom, min_hbond_dist, 1):
-        return None
-    # set angle factor
-    f_angle = 1.0
-    if group2.type in version.parameters.angular_dependent_sidechain_interactions:
-        if closest_atom2.element == 'H':
-            heavy_atom = closest_atom2.bonded_atoms[0]
-            hydrogen = closest_atom2
-            dist, f_angle, _ = angle_distance_factors(closest_atom1, hydrogen,
-                                                      heavy_atom)
-        else:
-            # Either the structure is corrupt (no hydrogen), or the heavy atom
-            # is closer to the titratable atom than the hydrogen. In either
-            # case, we set the angle factor to 0
-            f_angle = 0.0
-    elif group1.type in version.parameters.angular_dependent_sidechain_interactions:
-        if closest_atom1.element == 'H':
-            heavy_atom = closest_atom1.bonded_atoms[0]
-            hydrogen = closest_atom1
-            dist, f_angle, _ = angle_distance_factors(closest_atom2, hydrogen,
-                                                      heavy_atom)
-        else:
-            # Either the structure is corrupt (no hydrogen), or the heavy atom
-            # is closer to the titratable atom than the hydrogen. In either
-            # case, we set the angle factor to 0
-            f_angle = 0.0
-    weight = version.calculate_pair_weight(group1.num_volume, group2.num_volume)
-    exception, value = version.check_exceptions(group1, group2)
-    if exception:
-        # Do nothing, value should have been assigned.
-        pass
-    else:
-        value = version.calculate_side_chain_energy(
-            dist, dpka_max, cutoff, weight, f_angle)
-    return value
-
-
-def electrostatic_interaction(group1, group2, dist, version):
-    """Calculate electrostatic interaction betwee two groups.
-
-    Args:
-        group1:  first interacting group
-        group2:  second interacting group
-        dist:  distance between groups
-        version:  version-specific object with parameters and functions
-    Returns:
-        electrostatic interaction energy or None (if no interaction is appropriate)
-    """
-    # check if we should do coulomb interaction at all
-    if not version.check_coulomb_pair(group1, group2, dist):
-        return None
-    weight = version.calculate_pair_weight(group1.num_volume, group2.num_volume)
-    value = version.calculate_coulomb_energy(dist, weight)
-    return value
-
-
-def check_coulomb_pair(parameters, group1, group2, dist):
-    """Checks if this Coulomb interaction should be done.
-
-    NOTE - this is a propka2.0 hack
-    TODO - figure out why a similar function exists in version.py
-
-    Args:
-        parameters:  parameters for Coulomb calculations
-        group1:  first interacting group
-        group2:  second interacting group
-        dist:  distance between groups
-    Returns:
-        Boolean
-    """
-    num_volume = group1.num_volume + group2.num_volume
-    do_coulomb = True
-    # check if both groups are titratable (ions are taken care of in
-    # determinants::set_ion_determinants)
-    if not (group1.titratable and group2.titratable):
-        do_coulomb = False
-    # check if the distance is not too big
-    if dist > parameters.coulomb_cutoff2:
-        do_coulomb = False
-    # check that desolvation is ok
-    if num_volume < parameters.Nmin:
-        do_coulomb = False
-    return do_coulomb
-
-
-def coulomb_energy(dist, weight, parameters):
-    """Calculates the Coulomb interaction pKa shift based on Coulomb's law.
-
-    Args:
-        dist:  distance for electrostatic interaction
-        weight:  scaling of dielectric constant
-        parameters:  parameter object for calculation
-    Returns:
-        pKa shift
-    """
-    diel = UNK_DIELECTRIC1 - (UNK_DIELECTRIC1 - UNK_DIELECTRIC2)*weight
-    dist = max(dist, parameters.coulomb_cutoff1)
-    scale = (
-        (dist - parameters.coulomb_cutoff2)
-        / (parameters.coulomb_cutoff1 - parameters.coulomb_cutoff2))
-    scale = max(0.0, scale)
-    scale = min(1.0, scale)
-    dpka = UNK_PKA_SCALING1/(diel*dist)*scale
-    return abs(dpka)
-
-
-def backbone_reorganization(_, conformation):
-    """Perform calculations related to backbone reorganizations.
-
-    NOTE - this was described in the code as "adding test stuff"
-    NOTE - this function does not appear to be used
-    TODO - figure out why a similar function exists in version.py
-
-    Args:
-        _:  not used
-        conformation:  specific molecule conformation
-    """
-    titratable_groups = conformation.get_backbone_reorganisation_groups()
-    bbc_groups = conformation.get_backbone_co_groups()
-
-    for titratable_group in titratable_groups:
-        weight = titratable_group.buried
-        dpka = 0.00
-        for bbc_group in bbc_groups:
-            center = [titratable_group.x, titratable_group.y, titratable_group.z]
-            atom2 = bbc_group.get_interaction_atoms(titratable_group)[0]
-            dist, f_angle, _ = angle_distance_factors(atom2=atom2,
-                                                      atom3=bbc_group.atom,
-                                                      center=center)
-            if dist < UNK_BACKBONE_DISTANCE1 and f_angle > UNK_FANGLE_MIN:
-                value = (
-                    1.0 - (dist-UNK_BACKBONE_DISTANCE2)
-                    / (UNK_BACKBONE_DISTANCE1-UNK_BACKBONE_DISTANCE2))
-                dpka += UNK_PKA_SCALING2 * min(1.0, value)
-        titratable_group.energy_local = dpka*weight
-
-
-def check_exceptions(version, group1, group2):
-    """Checks for atypical behavior in interactions between two groups.
-    Checks are made based on group type.
-
-    TODO - figure out why a similar function exists in version.py
-
-    Args:
-        version:  version object
-        group1:  first group for check
-        group2:  second group for check
-    Returns:
-        1. Boolean indicating atypical behavior,
-        2. value associated with atypical interaction (None if Boolean is False)
-    """
-    res_type1 = group1.type
-    res_type2 = group2.type
-    if (res_type1 == "COO") and (res_type2 == "ARG"):
-        exception, value = check_coo_arg_exception(group1, group2, version)
-    elif (res_type1 == "ARG") and (res_type2 == "COO"):
-        exception, value = check_coo_arg_exception(group2, group1, version)
-    elif (res_type1 == "COO") and (res_type2 == "COO"):
-        exception, value = check_coo_coo_exception(group1, group2, version)
-    elif (res_type1 == "CYS") and (res_type2 == "CYS"):
-        exception, value = check_cys_cys_exception(group1, group2, version)
-    elif ((res_type1 == "COO") and (res_type2 == "HIS")
-          or (res_type1 == "HIS") and (res_type2 == "COO")):
-        exception, value = check_coo_his_exception(group1, group2, version)
-    elif ((res_type1 == "OCO") and (res_type2 == "HIS")
-          or (res_type1 == "HIS") and (res_type2 == "OCO")):
-        exception, value = check_oco_his_exception(group1, group2, version)
-    elif ((res_type1 == "CYS") and (res_type2 == "HIS")
-          or (res_type1 == "HIS") and (res_type2 == "CYS")):
-        exception, value = check_cys_his_exception(group1, group2, version)
-    else:
-        # do nothing, no exception for this pair
-        exception = False
-        value = None
-    return exception, value
-
-
-def check_coo_arg_exception(group_coo, group_arg, version):
-    """Check for COO-ARG interaction atypical behavior.
-
-    Uses the two shortest unique distances (involving 2+2 atoms)
-
-    Args:
-        group_coo:  COO group
-        group_arg:  ARG group
-        version:  version object
-    Returns:
-        1. Boolean indicating atypical behavior,
-        2. value associated with atypical interaction (None if Boolean is False)
-    """
-    exception = True
-    value_tot = 0.00
-    # needs to be this way since you want to find shortest distance first
-    atoms_coo = []
-    atoms_coo.extend(group_coo.get_interaction_atoms(group_arg))
-    atoms_arg = []
-    atoms_arg.extend(group_arg.get_interaction_atoms(group_coo))
-    for _ in ["shortest", "runner-up"]:
-        # find the closest interaction pair
-        [closest_coo_atom, dist, closest_arg_atom] = get_smallest_distance(atoms_coo,
-                                                                           atoms_arg)
-        [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_coo_atom,
-                                                                  closest_arg_atom)
-        # calculate and sum up interaction energy
-        f_angle = 1.00
-        if group_arg.type in version.parameters.angular_dependent_sidechain_interactions:
-            atom3 = closest_arg_atom.bonded_atoms[0]
-            dist, f_angle, _ = angle_distance_factors(closest_coo_atom,
-                                                      closest_arg_atom,
-                                                      atom3)
-        value = hydrogen_bond_energy(dist, dpka_max, cutoff, f_angle)
-        value_tot += value
-        # remove closest atoms before we attemp to find the runner-up pair
-        atoms_coo.remove(closest_coo_atom)
-        atoms_arg.remove(closest_arg_atom)
-    return exception, value_tot
-
-
-def check_coo_coo_exception(group1, group2, version):
-    """Check for COO-COO hydrogen-bond atypical interaction behavior.
-
-    Args:
-        group1:  first group for check
-        group2:  second group for check
-        version:  version object
-    Returns:
-        1. Boolean indicating atypical behavior,
-        2. value associated with atypical interaction (None if Boolean is False)
-    """
-    exception = True
-    interact_groups12 = group1.get_interaction_atoms(group2)
-    interact_groups21 = group2.get_interaction_atoms(group1)
-    [closest_atom1, dist, closest_atom2] = get_smallest_distance(interact_groups12,
-                                                                 interact_groups21)
-    [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1,
-                                                              closest_atom2)
-    f_angle = 1.00
-    value = hydrogen_bond_energy(dist, dpka_max, cutoff, f_angle)
-    weight = calculate_pair_weight(version.parameters, group1.num_volume, group2.num_volume)
-    value = value * (1.0 + weight)
-    return exception, value
-
-
-def check_coo_his_exception(group1, group2, version):
-    """Check for COO-HIS atypical interaction behavior
-
-    Args:
-        group1:  first group for check
-        group2:  second group for check
-        version:  version object
-    Returns:
-        1. Boolean indicating atypical behavior,
-        2. value associated with atypical interaction (None if Boolean is False)
-    """
-    exception = False
-    if check_buried(group1.num_volume, group2.num_volume):
-        exception = True
-    return exception, version.parameters.COO_HIS_exception
-
-
-def check_oco_his_exception(group1, group2, version):
-    """Check for OCO-HIS atypical interaction behavior
-
-    Args:
-        group1:  first group for check
-        group2:  second group for check
-        version:  version object
-    Returns:
-        1. Boolean indicating atypical behavior,
-        2. value associated with atypical interaction (None if Boolean is False)
-    """
-    exception = False
-    if check_buried(group1.num_volume, group2.num_volume):
-        exception = True
-    return exception, version.parameters.OCO_HIS_exception
-
-
-def check_cys_his_exception(group1, group2, version):
-    """Check for CYS-HIS atypical interaction behavior
-
-    Args:
-        group1:  first group for check
-        group2:  second group for check
-        version:  version object
-    Returns:
-        1. Boolean indicating atypical behavior,
-        2. value associated with atypical interaction (None if Boolean is False)
-    """
-    exception = False
-    if check_buried(group1.num_volume, group2.num_volume):
-        exception = True
-    return exception, version.parameters.CYS_HIS_exception
-
-
-def check_cys_cys_exception(group1, group2, version):
-    """Check for CYS-CYS atypical interaction behavior
-
-    Args:
-        group1:  first group for check
-        group2:  second group for check
-        version:  version object
-    Returns:
-        1. Boolean indicating atypical behavior,
-        2. value associated with atypical interaction (None if Boolean is False)
-    """
-    exception = False
-    if check_buried(group1.num_volume, group2.num_volume):
-        exception = True
-    return exception, version.parameters.CYS_CYS_exception
-
-
-def check_buried(num_volume1, num_volume2):
-    """Check to see if an interaction is buried
-
-    Args:
-        num_volume1:  number of buried heavy atoms in volume 1
-        num_volume2:  number of buried heavy atoms in volume 2
-    Returns:
-        True if interaction is buried, False otherwise
-    """
-    if ((num_volume1 + num_volume2 <= COMBINED_NUM_BURIED_MAX)
-            and (num_volume1 <= SEPARATE_NUM_BURIED_MAX
-                 or num_volume2 <= SEPARATE_NUM_BURIED_MAX)):
-        return False
-    return True
diff --git a/propka/determinants.py b/propka/determinants.py
index 9a82e33..7dc8ee3 100644
--- a/propka/determinants.py
+++ b/propka/determinants.py
@@ -8,7 +8,7 @@
 import propka.lib
 import propka.vector_algebra
 from propka.calculations import squared_distance, get_smallest_distance
-from propka.calculations import angle_distance_factors, hydrogen_bond_energy
+from propka.energy import angle_distance_factors, hydrogen_bond_energy
 from propka.determinant import Determinant
 
 
diff --git a/propka/energy.py b/propka/energy.py
new file mode 100644
index 0000000..631c1fc
--- /dev/null
+++ b/propka/energy.py
@@ -0,0 +1,531 @@
+"""Energy calculations."""
+import math
+from propka.lib import warning
+from propka.calculations import squared_distance, get_smallest_distance
+
+
+# TODO - I have no idea what these constants mean so I labeled them "UNK_"
+UNK_MIN_DISTANCE = 2.75
+MIN_DISTANCE_4TH = math.pow(UNK_MIN_DISTANCE, 4)
+UNK_DIELECTRIC1 = 160
+UNK_DIELECTRIC2 = 30
+UNK_PKA_SCALING1 = 244.12
+UNK_BACKBONE_DISTANCE1 = 6.0
+UNK_BACKBONE_DISTANCE2 = 3.0
+UNK_FANGLE_MIN = 0.001
+UNK_PKA_SCALING2 = 0.8
+COMBINED_NUM_BURIED_MAX = 900
+SEPARATE_NUM_BURIED_MAX = 400
+
+
+def radial_volume_desolvation(parameters, group):
+    """Calculate desolvation terms for group.
+
+    Args:
+        parameters:  parameters for desolvation calculation
+        group:  group of atoms for calculation
+    """
+    all_atoms = group.atom.conformation_container.get_non_hydrogen_atoms()
+    volume = 0.0
+    group.num_volume = 0
+    min_dist_4th = MIN_DISTANCE_4TH
+    for atom in all_atoms:
+        # ignore atoms in the same residue
+        if (atom.res_num == group.atom.res_num
+                and atom.chain_id == group.atom.chain_id):
+            continue
+        sq_dist = squared_distance(group, atom)
+        # desolvation
+        if sq_dist < parameters.desolv_cutoff_squared:
+            # use a default relative volume of 1.0 if the volume of the element
+            # is not found in parameters
+            # insert check for methyl groups
+            if atom.element == 'C' and atom.name not in ['CA', 'C']:
+                dvol = parameters.VanDerWaalsVolume['C4']
+            else:
+                dvol = parameters.VanDerWaalsVolume.get(atom.element, 1.0)
+            dv_inc = dvol/max(min_dist_4th, sq_dist*sq_dist)
+            volume += dv_inc
+        # buried
+        if sq_dist < parameters.buried_cutoff_squared:
+            group.num_volume += 1
+    group.buried = calculate_weight(parameters, group.num_volume)
+    scale_factor = calculate_scale_factor(parameters, group.buried)
+    volume_after_allowance = max(0.00, volume-parameters.desolvationAllowance)
+    group.energy_volume = (
+        group.charge * parameters.desolvationPrefactor
+        * volume_after_allowance * scale_factor)
+
+
+def calculate_scale_factor(parameters, weight):
+    """Calculate desolvation scaling factor.
+
+    Args:
+        parameters:  parameters for desolvation calculation
+        weight:  weight for scaling factor
+    Returns:
+        scaling factor
+    """
+    scale_factor = 1.0 - (1.0 - parameters.desolvationSurfaceScalingFactor)*(1.0 - weight)
+    return scale_factor
+
+
+def calculate_weight(parameters, num_volume):
+    """Calculate the atom-based desolvation weight.
+
+    TODO - figure out why a similar function exists in version.py
+
+    Args:
+        parameters:  parameters for desolvation calculation
+        num_volume:  number of heavy atoms within desolvation calculation volume
+    Returns:
+        desolvation weight
+    """
+    weight = (
+        float(num_volume - parameters.Nmin)
+        / float(parameters.Nmax - parameters.Nmin))
+    weight = min(1.0, weight)
+    weight = max(0.0, weight)
+    return weight
+
+
+def calculate_pair_weight(parameters, num_volume1, num_volume2):
+    """Calculate the atom-pair based desolvation weight.
+
+    Args:
+        num_volume1:  number of heavy atoms within first desolvation volume
+        num_volume2:  number of heavy atoms within second desolvation volume
+    Returns:
+        desolvation weight
+    """
+    num_volume = num_volume1 + num_volume2
+    num_min = 2*parameters.Nmin
+    num_max = 2*parameters.Nmax
+    weight = float(num_volume - num_min)/float(num_max - num_min)
+    weight = min(1.0, weight)
+    weight = max(0.0, weight)
+    return weight
+
+
+def hydrogen_bond_energy(dist, dpka_max, cutoffs, f_angle=1.0):
+    """Calculate hydrogen-bond interaction pKa shift.
+
+    Args:
+        dist:  distance for hydrogen bond
+        dpka_max:  maximum pKa value shift
+        cutoffs:  array with max and min distance values
+        f_angle:  angle scaling factor
+    Returns:
+        pKa shift value
+    """
+    if dist < cutoffs[0]:
+        value = 1.00
+    elif dist > cutoffs[1]:
+        value = 0.00
+    else:
+        value = 1.0 - (dist - cutoffs[0])/(cutoffs[1] - cutoffs[0])
+    dpka = dpka_max*value*f_angle
+    return abs(dpka)
+
+
+def angle_distance_factors(atom1=None, atom2=None, atom3=None, center=None):
+    """Calculate distance and angle factors for three atoms for backbone interactions.
+
+    NOTE - you need to use atom1 to be the e.g. ASP atom if distance is reset at
+    return: [O1 -- H2-N3].
+
+    Also generalized to be able to be used for residue 'centers' for C=O COO interactions.
+
+    Args:
+        atom1:  first atom for calculation (could be None)
+        atom2:  second atom for calculation
+        atom3:  third atom for calculation
+        center:  center point between atoms 1 and 2
+    Returns
+        [distance factor between atoms 1 and 2,
+         angle factor,
+         distance factor between atoms 2 and 3]
+    """
+    # The angle factor
+    #
+    #  ---closest_atom1/2
+    #                     .
+    #                      .
+    #                       the_hydrogen---closest_atom2/1---
+    dx_32 = atom2.x - atom3.x
+    dy_32 = atom2.y - atom3.y
+    dz_32 = atom2.z - atom3.z
+    dist_23 = math.sqrt(dx_32 * dx_32 + dy_32 * dy_32 + dz_32 * dz_32)
+    dx_32 = dx_32/dist_23
+    dy_32 = dy_32/dist_23
+    dz_32 = dz_32/dist_23
+    if atom1 is None:
+        dx_21 = center[0] - atom2.x
+        dy_21 = center[1] - atom2.y
+        dz_21 = center[2] - atom2.z
+    else:
+        dx_21 = atom1.x - atom2.x
+        dy_21 = atom1.y - atom2.y
+        dz_21 = atom1.z - atom2.z
+    dist_12 = math.sqrt(dx_21 * dx_21 + dy_21 * dy_21 + dz_21 * dz_21)
+    dx_21 = dx_21/dist_12
+    dy_21 = dy_21/dist_12
+    dz_21 = dz_21/dist_12
+    f_angle = dx_21*dx_32 + dy_21*dy_32 + dz_21*dz_32
+    return dist_12, f_angle, dist_23
+
+
+def hydrogen_bond_interaction(group1, group2, version):
+    """Calculate energy for hydrogen bond interactions between two groups.
+
+    Args:
+        group1:  first interacting group
+        group2:  second interacting group
+        version:  an object that contains version-specific parameters
+    Returns:
+        hydrogen bond interaction energy
+    """
+    # find the smallest distance between interacting atoms
+    atoms1 = group1.get_interaction_atoms(group2)
+    atoms2 = group2.get_interaction_atoms(group1)
+    [closest_atom1, dist, closest_atom2] = get_smallest_distance(atoms1, atoms2)
+    if None in [closest_atom1, closest_atom2]:
+        warning(
+            'Side chain interaction failed for {0:s} and {1:s}'.format(
+                group1.label, group2.label))
+        return None
+    # get the parameters
+    [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1,
+                                                              closest_atom2)
+    if (dpka_max is None) or (None in cutoff):
+        return None
+    # check that the closest atoms are close enough
+    if dist >= cutoff[1]:
+        return None
+    # check that bond distance criteria is met
+    min_hbond_dist = version.parameters.min_bond_distance_for_hydrogen_bonds
+    if group1.atom.is_atom_within_bond_distance(group2.atom, min_hbond_dist, 1):
+        return None
+    # set angle factor
+    f_angle = 1.0
+    if group2.type in version.parameters.angular_dependent_sidechain_interactions:
+        if closest_atom2.element == 'H':
+            heavy_atom = closest_atom2.bonded_atoms[0]
+            hydrogen = closest_atom2
+            dist, f_angle, _ = angle_distance_factors(closest_atom1, hydrogen,
+                                                      heavy_atom)
+        else:
+            # Either the structure is corrupt (no hydrogen), or the heavy atom
+            # is closer to the titratable atom than the hydrogen. In either
+            # case, we set the angle factor to 0
+            f_angle = 0.0
+    elif group1.type in version.parameters.angular_dependent_sidechain_interactions:
+        if closest_atom1.element == 'H':
+            heavy_atom = closest_atom1.bonded_atoms[0]
+            hydrogen = closest_atom1
+            dist, f_angle, _ = angle_distance_factors(closest_atom2, hydrogen,
+                                                      heavy_atom)
+        else:
+            # Either the structure is corrupt (no hydrogen), or the heavy atom
+            # is closer to the titratable atom than the hydrogen. In either
+            # case, we set the angle factor to 0
+            f_angle = 0.0
+    weight = version.calculate_pair_weight(group1.num_volume, group2.num_volume)
+    exception, value = version.check_exceptions(group1, group2)
+    if exception:
+        # Do nothing, value should have been assigned.
+        pass
+    else:
+        value = version.calculate_side_chain_energy(
+            dist, dpka_max, cutoff, weight, f_angle)
+    return value
+
+
+def electrostatic_interaction(group1, group2, dist, version):
+    """Calculate electrostatic interaction betwee two groups.
+
+    Args:
+        group1:  first interacting group
+        group2:  second interacting group
+        dist:  distance between groups
+        version:  version-specific object with parameters and functions
+    Returns:
+        electrostatic interaction energy or None (if no interaction is appropriate)
+    """
+    # check if we should do coulomb interaction at all
+    if not version.check_coulomb_pair(group1, group2, dist):
+        return None
+    weight = version.calculate_pair_weight(group1.num_volume, group2.num_volume)
+    value = version.calculate_coulomb_energy(dist, weight)
+    return value
+
+
+def check_coulomb_pair(parameters, group1, group2, dist):
+    """Checks if this Coulomb interaction should be done.
+
+    NOTE - this is a propka2.0 hack
+    TODO - figure out why a similar function exists in version.py
+
+    Args:
+        parameters:  parameters for Coulomb calculations
+        group1:  first interacting group
+        group2:  second interacting group
+        dist:  distance between groups
+    Returns:
+        Boolean
+    """
+    num_volume = group1.num_volume + group2.num_volume
+    do_coulomb = True
+    # check if both groups are titratable (ions are taken care of in
+    # determinants::set_ion_determinants)
+    if not (group1.titratable and group2.titratable):
+        do_coulomb = False
+    # check if the distance is not too big
+    if dist > parameters.coulomb_cutoff2:
+        do_coulomb = False
+    # check that desolvation is ok
+    if num_volume < parameters.Nmin:
+        do_coulomb = False
+    return do_coulomb
+
+
+def coulomb_energy(dist, weight, parameters):
+    """Calculates the Coulomb interaction pKa shift based on Coulomb's law.
+
+    Args:
+        dist:  distance for electrostatic interaction
+        weight:  scaling of dielectric constant
+        parameters:  parameter object for calculation
+    Returns:
+        pKa shift
+    """
+    diel = UNK_DIELECTRIC1 - (UNK_DIELECTRIC1 - UNK_DIELECTRIC2)*weight
+    dist = max(dist, parameters.coulomb_cutoff1)
+    scale = (
+        (dist - parameters.coulomb_cutoff2)
+        / (parameters.coulomb_cutoff1 - parameters.coulomb_cutoff2))
+    scale = max(0.0, scale)
+    scale = min(1.0, scale)
+    dpka = UNK_PKA_SCALING1/(diel*dist)*scale
+    return abs(dpka)
+
+
+def backbone_reorganization(_, conformation):
+    """Perform calculations related to backbone reorganizations.
+
+    NOTE - this was described in the code as "adding test stuff"
+    NOTE - this function does not appear to be used
+    TODO - figure out why a similar function exists in version.py
+
+    Args:
+        _:  not used
+        conformation:  specific molecule conformation
+    """
+    titratable_groups = conformation.get_backbone_reorganisation_groups()
+    bbc_groups = conformation.get_backbone_co_groups()
+
+    for titratable_group in titratable_groups:
+        weight = titratable_group.buried
+        dpka = 0.00
+        for bbc_group in bbc_groups:
+            center = [titratable_group.x, titratable_group.y, titratable_group.z]
+            atom2 = bbc_group.get_interaction_atoms(titratable_group)[0]
+            dist, f_angle, _ = angle_distance_factors(atom2=atom2,
+                                                      atom3=bbc_group.atom,
+                                                      center=center)
+            if dist < UNK_BACKBONE_DISTANCE1 and f_angle > UNK_FANGLE_MIN:
+                value = (
+                    1.0 - (dist-UNK_BACKBONE_DISTANCE2)
+                    / (UNK_BACKBONE_DISTANCE1-UNK_BACKBONE_DISTANCE2))
+                dpka += UNK_PKA_SCALING2 * min(1.0, value)
+        titratable_group.energy_local = dpka*weight
+
+
+def check_exceptions(version, group1, group2):
+    """Checks for atypical behavior in interactions between two groups.
+    Checks are made based on group type.
+
+    TODO - figure out why a similar function exists in version.py
+
+    Args:
+        version:  version object
+        group1:  first group for check
+        group2:  second group for check
+    Returns:
+        1. Boolean indicating atypical behavior,
+        2. value associated with atypical interaction (None if Boolean is False)
+    """
+    res_type1 = group1.type
+    res_type2 = group2.type
+    if (res_type1 == "COO") and (res_type2 == "ARG"):
+        exception, value = check_coo_arg_exception(group1, group2, version)
+    elif (res_type1 == "ARG") and (res_type2 == "COO"):
+        exception, value = check_coo_arg_exception(group2, group1, version)
+    elif (res_type1 == "COO") and (res_type2 == "COO"):
+        exception, value = check_coo_coo_exception(group1, group2, version)
+    elif (res_type1 == "CYS") and (res_type2 == "CYS"):
+        exception, value = check_cys_cys_exception(group1, group2, version)
+    elif ((res_type1 == "COO") and (res_type2 == "HIS")
+          or (res_type1 == "HIS") and (res_type2 == "COO")):
+        exception, value = check_coo_his_exception(group1, group2, version)
+    elif ((res_type1 == "OCO") and (res_type2 == "HIS")
+          or (res_type1 == "HIS") and (res_type2 == "OCO")):
+        exception, value = check_oco_his_exception(group1, group2, version)
+    elif ((res_type1 == "CYS") and (res_type2 == "HIS")
+          or (res_type1 == "HIS") and (res_type2 == "CYS")):
+        exception, value = check_cys_his_exception(group1, group2, version)
+    else:
+        # do nothing, no exception for this pair
+        exception = False
+        value = None
+    return exception, value
+
+
+def check_coo_arg_exception(group_coo, group_arg, version):
+    """Check for COO-ARG interaction atypical behavior.
+
+    Uses the two shortest unique distances (involving 2+2 atoms)
+
+    Args:
+        group_coo:  COO group
+        group_arg:  ARG group
+        version:  version object
+    Returns:
+        1. Boolean indicating atypical behavior,
+        2. value associated with atypical interaction (None if Boolean is False)
+    """
+    exception = True
+    value_tot = 0.00
+    # needs to be this way since you want to find shortest distance first
+    atoms_coo = []
+    atoms_coo.extend(group_coo.get_interaction_atoms(group_arg))
+    atoms_arg = []
+    atoms_arg.extend(group_arg.get_interaction_atoms(group_coo))
+    for _ in ["shortest", "runner-up"]:
+        # find the closest interaction pair
+        [closest_coo_atom, dist, closest_arg_atom] = get_smallest_distance(atoms_coo,
+                                                                           atoms_arg)
+        [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_coo_atom,
+                                                                  closest_arg_atom)
+        # calculate and sum up interaction energy
+        f_angle = 1.00
+        if group_arg.type in version.parameters.angular_dependent_sidechain_interactions:
+            atom3 = closest_arg_atom.bonded_atoms[0]
+            dist, f_angle, _ = angle_distance_factors(closest_coo_atom,
+                                                      closest_arg_atom,
+                                                      atom3)
+        value = hydrogen_bond_energy(dist, dpka_max, cutoff, f_angle)
+        value_tot += value
+        # remove closest atoms before we attemp to find the runner-up pair
+        atoms_coo.remove(closest_coo_atom)
+        atoms_arg.remove(closest_arg_atom)
+    return exception, value_tot
+
+
+def check_coo_coo_exception(group1, group2, version):
+    """Check for COO-COO hydrogen-bond atypical interaction behavior.
+
+    Args:
+        group1:  first group for check
+        group2:  second group for check
+        version:  version object
+    Returns:
+        1. Boolean indicating atypical behavior,
+        2. value associated with atypical interaction (None if Boolean is False)
+    """
+    exception = True
+    interact_groups12 = group1.get_interaction_atoms(group2)
+    interact_groups21 = group2.get_interaction_atoms(group1)
+    [closest_atom1, dist, closest_atom2] = get_smallest_distance(interact_groups12,
+                                                                 interact_groups21)
+    [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1,
+                                                              closest_atom2)
+    f_angle = 1.00
+    value = hydrogen_bond_energy(dist, dpka_max, cutoff, f_angle)
+    weight = calculate_pair_weight(version.parameters, group1.num_volume, group2.num_volume)
+    value = value * (1.0 + weight)
+    return exception, value
+
+
+def check_coo_his_exception(group1, group2, version):
+    """Check for COO-HIS atypical interaction behavior
+
+    Args:
+        group1:  first group for check
+        group2:  second group for check
+        version:  version object
+    Returns:
+        1. Boolean indicating atypical behavior,
+        2. value associated with atypical interaction (None if Boolean is False)
+    """
+    exception = False
+    if check_buried(group1.num_volume, group2.num_volume):
+        exception = True
+    return exception, version.parameters.COO_HIS_exception
+
+
+def check_oco_his_exception(group1, group2, version):
+    """Check for OCO-HIS atypical interaction behavior
+
+    Args:
+        group1:  first group for check
+        group2:  second group for check
+        version:  version object
+    Returns:
+        1. Boolean indicating atypical behavior,
+        2. value associated with atypical interaction (None if Boolean is False)
+    """
+    exception = False
+    if check_buried(group1.num_volume, group2.num_volume):
+        exception = True
+    return exception, version.parameters.OCO_HIS_exception
+
+
+def check_cys_his_exception(group1, group2, version):
+    """Check for CYS-HIS atypical interaction behavior
+
+    Args:
+        group1:  first group for check
+        group2:  second group for check
+        version:  version object
+    Returns:
+        1. Boolean indicating atypical behavior,
+        2. value associated with atypical interaction (None if Boolean is False)
+    """
+    exception = False
+    if check_buried(group1.num_volume, group2.num_volume):
+        exception = True
+    return exception, version.parameters.CYS_HIS_exception
+
+
+def check_cys_cys_exception(group1, group2, version):
+    """Check for CYS-CYS atypical interaction behavior
+
+    Args:
+        group1:  first group for check
+        group2:  second group for check
+        version:  version object
+    Returns:
+        1. Boolean indicating atypical behavior,
+        2. value associated with atypical interaction (None if Boolean is False)
+    """
+    exception = False
+    if check_buried(group1.num_volume, group2.num_volume):
+        exception = True
+    return exception, version.parameters.CYS_CYS_exception
+
+
+def check_buried(num_volume1, num_volume2):
+    """Check to see if an interaction is buried
+
+    Args:
+        num_volume1:  number of buried heavy atoms in volume 1
+        num_volume2:  number of buried heavy atoms in volume 2
+    Returns:
+        True if interaction is buried, False otherwise
+    """
+    if ((num_volume1 + num_volume2 <= COMBINED_NUM_BURIED_MAX)
+            and (num_volume1 <= SEPARATE_NUM_BURIED_MAX
+                 or num_volume2 <= SEPARATE_NUM_BURIED_MAX)):
+        return False
+    return True
diff --git a/propka/hydrogens.py b/propka/hydrogens.py
new file mode 100644
index 0000000..1427758
--- /dev/null
+++ b/propka/hydrogens.py
@@ -0,0 +1,346 @@
+"""Calculations related to hydrogen placement."""
+import math
+from propka.lib import info
+from propka.protonate import Protonate
+from propka.bonds import BondMaker
+from propka.atom import Atom
+
+
+
+def setup_bonding_and_protonation(molecular_container):
+    """Set up bonding and protonation for a molecule.
+
+    Args:
+        parameters:  not used
+        molecular_container:  molecule container.
+    """
+    # make bonds
+    my_bond_maker = setup_bonding(molecular_container)
+    # set up ligand atom names
+    set_ligand_atom_names(molecular_container)
+    # apply information on pi electrons
+    my_bond_maker.add_pi_electron_information(molecular_container)
+    # Protonate atoms
+    if molecular_container.options.protonate_all:
+        protonator = Protonate(verbose=False)
+        protonator.protonate(molecular_container)
+
+
+def setup_bonding(molecular_container):
+    """Set up bonding for a molecular container.
+
+    Args:
+        molecular_container:  the molecular container in question
+    Returns:
+        BondMaker object
+    """
+    my_bond_maker = BondMaker()
+    my_bond_maker.find_bonds_for_molecules_using_boxes(molecular_container)
+    return my_bond_maker
+
+
+def setup_bonding_and_protonation_30_style(molecular_container):
+    """Set up bonding for a molecular container.
+
+    Args:
+        molecular_container:  the molecular container in question
+    Returns:
+        BondMaker object
+    """
+    # Protonate atoms
+    protonate_30_style(molecular_container)
+    # make bonds
+    bond_maker = BondMaker()
+    bond_maker.find_bonds_for_molecules_using_boxes(molecular_container)
+    return bond_maker
+
+
+def protonate_30_style(molecular_container):
+    """Protonate the molecule.
+
+    Args:
+        molecular_container:  molecule
+    """
+    for name in molecular_container.conformation_names:
+        info('Now protonating', name)
+        # split atom into residues
+        curres = -1000000
+        residue = []
+        o_atom = None
+        c_atom = None
+        for atom in molecular_container.conformations[name].atoms:
+            if atom.res_num != curres:
+                curres = atom.res_num
+                if len(residue) > 0:
+                    #backbone
+                    [o_atom, c_atom] = add_backbone_hydrogen(
+                        residue, o_atom, c_atom)
+                    #arginine
+                    if residue[0].res_name == 'ARG':
+                        add_arg_hydrogen(residue)
+                    #histidine
+                    if residue[0].res_name == 'HIS':
+                        add_his_hydrogen(residue)
+                    #tryptophan
+                    if residue[0].res_name == 'TRP':
+                        add_trp_hydrogen(residue)
+                    #amides
+                    if residue[0].res_name in ['GLN', 'ASN']:
+                        add_amd_hydrogen(residue)
+                    residue = []
+            if atom.type == 'atom':
+                residue.append(atom)
+
+
+def set_ligand_atom_names(molecular_container):
+    """Set names for ligands in molecular container.
+
+    Args:
+        molecular_container:  molecular container for ligand names
+    """
+    for name in molecular_container.conformation_names:
+        molecular_container.conformations[name].set_ligand_atom_names()
+
+
+def add_arg_hydrogen(residue):
+    """Adds Arg hydrogen atoms to residues according to the 'old way'.
+
+    Args:
+        residue:  arginine residue to protonate
+    Returns:
+        list of hydrogen atoms
+    """
+    #info('Adding arg H',residue)
+    for atom in residue:
+        if atom.name == "CD":
+            cd_atom = atom
+        elif atom.name == "CZ":
+            cz_atom = atom
+        elif atom.name == "NE":
+            ne_atom = atom
+        elif atom.name == "NH1":
+            nh1_atom = atom
+        elif atom.name == "NH2":
+            nh2_atom = atom
+    h1_atom = protonate_sp2(cd_atom, ne_atom, cz_atom)
+    h1_atom.name = "HE"
+    h2_atom = protonate_direction(nh1_atom, ne_atom, cz_atom)
+    h2_atom.name = "HN1"
+    h3_atom = protonate_direction(nh1_atom, ne_atom, cd_atom)
+    h3_atom.name = "HN2"
+    h4_atom = protonate_direction(nh2_atom, ne_atom, cz_atom)
+    h4_atom.name = "HN3"
+    h5_atom = protonate_direction(nh2_atom, ne_atom, h1_atom)
+    h5_atom.name = "HN4"
+    return [h1_atom, h2_atom, h3_atom, h4_atom, h5_atom]
+
+
+def add_his_hydrogen(residue):
+    """Adds His hydrogen atoms to residues according to the 'old way'.
+
+    Args:
+        residue:  histidine residue to protonate
+    """
+    for atom in residue:
+        if atom.name == "CG":
+            cg_atom = atom
+        elif atom.name == "ND1":
+            nd_atom = atom
+        elif atom.name == "CD2":
+            cd_atom = atom
+        elif atom.name == "CE1":
+            ce_atom = atom
+        elif atom.name == "NE2":
+            ne_atom = atom
+    hd_atom = protonate_sp2(cg_atom, nd_atom, ce_atom)
+    hd_atom.name = "HND"
+    he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom)
+    he_atom.name = "HNE"
+
+
+def add_trp_hydrogen(residue):
+    """Adds Trp hydrogen atoms to residues according to the 'old way'.
+
+    Args:
+        residue:  tryptophan residue to protonate
+    """
+    cd_atom = None
+    ne_atom = None
+    for atom in residue:
+        if atom.name == "CD1":
+            cd_atom = atom
+        elif atom.name == "NE1":
+            ne_atom = atom
+        elif atom.name == "CE2":
+            ce_atom = atom
+    if (cd_atom is None) or (ne_atom is None) or (ce_atom is None):
+        str_ = "Unable to find all atoms for {0:s} {1:s}".format(
+            residue[0].res_name, residue[0].res_num)
+        raise ValueError(str_)
+    he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom)
+    he_atom.name = "HNE"
+
+
+def add_amd_hydrogen(residue):
+    """Adds Gln & Asn hydrogen atoms to residues according to the 'old way'.
+
+    Args:
+        residue:  glutamine or asparagine residue to protonate
+    """
+    c_atom = None
+    o_atom = None
+    n_atom = None
+    for atom in residue:
+        if ((atom.res_name == "GLN" and atom.name == "CD")
+                or (atom.res_name == "ASN" and atom.name == "CG")):
+            c_atom = atom
+        elif ((atom.res_name == "GLN" and atom.name == "OE1")
+              or (atom.res_name == "ASN" and atom.name == "OD1")):
+            o_atom = atom
+        elif ((atom.res_name == "GLN" and atom.name == "NE2")
+              or (atom.res_name == "ASN" and atom.name == "ND2")):
+            n_atom = atom
+    if (c_atom is None) or (o_atom is None) or (n_atom is None):
+        str_ = "Unable to find all atoms for {0:s} {1:s}".format(
+            residue[0].res_name, residue[0].res_num)
+        raise ValueError(str_)
+    h1_atom = protonate_direction(n_atom, o_atom, c_atom)
+    h1_atom.name = "HN1"
+    h2_atom = protonate_average_direction(n_atom, c_atom, o_atom)
+    h2_atom.name = "HN2"
+
+
+def add_backbone_hydrogen(residue, o_atom, c_atom):
+    """Adds hydrogen backbone atoms to residues according to the old way.
+
+    dR is wrong for the N-terminus (i.e. first residue) but it doesn't affect
+    anything at the moment. Could be improved, but works for now.
+
+    Args:
+        residue:  residue to protonate
+        o_atom:  backbone oxygen atom
+        c_atom:  backbone carbon atom
+    Returns:
+        [new backbone oxygen atom, new backbone carbon atom]
+    """
+    new_c_atom = None
+    new_o_atom = None
+    n_atom = None
+    for atom in residue:
+        if atom.name == "N":
+            n_atom = atom
+        if atom.name == "C":
+            new_c_atom = atom
+        if atom.name == "O":
+            new_o_atom = atom
+    if None in [c_atom, o_atom, n_atom]:
+        return [new_o_atom, new_c_atom]
+    if n_atom.res_name == "PRO":
+        # PRO doesn't have an H-atom; do nothing
+        pass
+    else:
+        h_atom = protonate_direction(n_atom, o_atom, c_atom)
+        h_atom.name = "H"
+    return [new_o_atom, new_c_atom]
+
+
+def protonate_direction(x1_atom, x2_atom, x3_atom):
+    """Protonates an atom, x1_atom, given a direction.
+
+    New direction for x1_atom proton is (x2_atom -> x3_atom).
+
+    Args:
+        x1_atom:  atom to be protonated
+        x2_atom:  atom for direction
+        x3_atom:  other atom for direction
+    Returns:
+        new hydrogen atom
+    """
+    dx = (x3_atom.x - x2_atom.x)
+    dy = (x3_atom.y - x2_atom.y)
+    dz = (x3_atom.z - x2_atom.z)
+    length = math.sqrt(dx*dx + dy*dy + dz*dz)
+    x = x1_atom.x + dx/length
+    y = x1_atom.y + dy/length
+    z = x1_atom.z + dz/length
+    h_atom = make_new_h(x1_atom, x, y, z)
+    h_atom.name = "H"
+    return h_atom
+
+
+def protonate_average_direction(x1_atom, x2_atom, x3_atom):
+    """Protonates an atom, x1_atom, given a direction.
+
+    New direction for x1_atom is (x1_atom/x2_atom -> x3_atom).
+    Note, this one uses the average of x1_atom & x2_atom (N & O) unlike
+    the previous N - C = O
+
+    Args:
+        x1_atom:  atom to be protonated
+        x2_atom:  atom for direction
+        x3_atom:  other atom for direction
+    Returns:
+        new hydrogen atom
+    """
+    dx = (x3_atom.x + x1_atom.x)*0.5 - x2_atom.x
+    dy = (x3_atom.y + x1_atom.y)*0.5 - x2_atom.y
+    dz = (x3_atom.z + x1_atom.z)*0.5 - x2_atom.z
+    length = math.sqrt(dx*dx + dy*dy + dz*dz)
+    x = x1_atom.x + dx/length
+    y = x1_atom.y + dy/length
+    z = x1_atom.z + dz/length
+    h_atom = make_new_h(x1_atom, x, y, z)
+    h_atom.name = "H"
+    return h_atom
+
+
+def protonate_sp2(x1_atom, x2_atom, x3_atom):
+    """Protonates a SP2 atom, given a list of atoms
+
+    Args:
+        x1_atom:  atom to set direction
+        x2_atom:  atom to be protonated
+        x3_atom:  other atom to set direction
+    Returns:
+        new hydrogen atom
+    """
+    dx = (x1_atom.x + x3_atom.x)*0.5 - x2_atom.x
+    dy = (x1_atom.y + x3_atom.y)*0.5 - x2_atom.y
+    dz = (x1_atom.z + x3_atom.z)*0.5 - x2_atom.z
+    length = math.sqrt(dx*dx + dy*dy + dz*dz)
+    x = x2_atom.x - dx/length
+    y = x2_atom.y - dy/length
+    z = x2_atom.z - dz/length
+    h_atom = make_new_h(x2_atom, x, y, z)
+    h_atom.name = "H"
+    return h_atom
+
+
+def make_new_h(atom, x, y, z):
+    """Add a new hydrogen to an atom at the specified position.
+
+    Args:
+        atom:  atom to protonate
+        x:  x position of hydrogen
+        y:  y position of hydrogen
+        z:  z position of hydrogen
+    Returns:
+        new hydrogen atom
+    """
+    new_h = Atom()
+    new_h.set_property(
+        numb=None, name='H{0:s}'.format(atom.name[1:]),
+        res_name=atom.res_name, chain_id=atom.chain_id,
+        res_num=atom.res_num, x=x, y=y, z=z, occ=None, beta=None)
+    new_h.element = 'H'
+    new_h.bonded_atoms = [atom]
+    new_h.charge = 0
+    new_h.steric_number = 0
+    new_h.number_of_lone_pairs = 0
+    new_h.number_of_protons_to_add = 0
+    new_h.num_pi_elec_2_3_bonds = 0
+    atom.bonded_atoms.append(new_h)
+    atom.conformation_container.add_atom(new_h)
+    return new_h
+
+
diff --git a/propka/version.py b/propka/version.py
index 9ff5d87..0eb0737 100644
--- a/propka/version.py
+++ b/propka/version.py
@@ -3,7 +3,13 @@
 TODO - this module unnecessarily confuses the code.  Can we eliminate it?
 """
 from propka.lib import info
-import propka.calculations as calcs
+from propka.hydrogens import setup_bonding_and_protonation, setup_bonding
+from propka.hydrogens import setup_bonding_and_protonation_30_style
+from propka.energy import radial_volume_desolvation, calculate_pair_weight
+from propka.energy import hydrogen_bond_energy, hydrogen_bond_interaction
+from propka.energy import electrostatic_interaction, check_coulomb_pair
+from propka.energy import coulomb_energy, check_exceptions
+from propka.energy import backbone_reorganization
 
 
 class Version:
@@ -79,8 +85,7 @@ def check_exceptions(self, group1, group2):
 
     def setup_bonding_and_protonation(self, molecular_container):
         """Setup bonding and protonation using assigned model."""
-        return self.molecular_preparation_method(
-            self.parameters, molecular_container)
+        return self.molecular_preparation_method(molecular_container)
 
     def setup_bonding(self, molecular_container):
         """Setup bonding using assigned model."""
@@ -94,18 +99,18 @@ def __init__(self, parameters):
         """Initialize object with parameters."""
         # set the calculation rutines used in this version
         super().__init__(parameters)
-        self.molecular_preparation_method = calcs.setup_bonding_and_protonation
-        self.prepare_bonds = calcs.setup_bonding
-        self.desolvation_model = calcs.radial_volume_desolvation
-        self.weight_pair_method = calcs.calculate_pair_weight
-        self.sidechain_interaction_model = calcs.hydrogen_bond_energy
-        self.hydrogen_bond_interaction_model = calcs.hydrogen_bond_interaction
-        self.electrostatic_interaction_model = calcs.electrostatic_interaction
-        self.check_coulomb_pair_method = calcs.check_coulomb_pair
-        self.coulomb_interaction_model = calcs.coulomb_energy
-        self.backbone_interaction_model = calcs.hydrogen_bond_energy
-        self.backbone_reorganisation_method = calcs.backbone_reorganization
-        self.exception_check_method = calcs.check_exceptions
+        self.molecular_preparation_method = setup_bonding_and_protonation
+        self.prepare_bonds = setup_bonding
+        self.desolvation_model = radial_volume_desolvation
+        self.weight_pair_method = calculate_pair_weight
+        self.sidechain_interaction_model = hydrogen_bond_energy
+        self.hydrogen_bond_interaction_model = hydrogen_bond_interaction
+        self.electrostatic_interaction_model = electrostatic_interaction
+        self.check_coulomb_pair_method = check_coulomb_pair
+        self.coulomb_interaction_model = coulomb_energy
+        self.backbone_interaction_model = hydrogen_bond_energy
+        self.backbone_reorganisation_method = backbone_reorganization
+        self.exception_check_method = check_exceptions
 
     def get_hydrogen_bond_parameters(self, atom1, atom2):
         """Get hydrogen bond parameters for two atoms.
@@ -265,14 +270,14 @@ def __init__(self, parameters):
         # set the calculation routines used in this version
         super().__init__(parameters)
         self.molecular_preparation_method = (
-            calcs.setup_bonding_and_protonation_30_style)
-        self.desolvation_model = calcs.radial_volume_desolvation
-        self.weight_pair_method = calcs.calculate_pair_weight
-        self.sidechain_interaction_model = calcs.hydrogen_bond_energy
-        self.check_coulomb_pair_method = calcs.check_coulomb_pair
-        self.coulomb_interaction_model = calcs.coulomb_energy
-        self.backbone_reorganisation_method = calcs.backbone_reorganization
-        self.exception_check_method = calcs.check_exceptions
+            setup_bonding_and_protonation_30_style)
+        self.desolvation_model = radial_volume_desolvation
+        self.weight_pair_method = calculate_pair_weight
+        self.sidechain_interaction_model = hydrogen_bond_energy
+        self.check_coulomb_pair_method = check_coulomb_pair
+        self.coulomb_interaction_model = coulomb_energy
+        self.backbone_reorganisation_method = backbone_reorganization
+        self.exception_check_method = check_exceptions
 
     def get_hydrogen_bond_parameters(self, atom1, atom2):
         """Get hydrogen bond parameters for two atoms.

From 397d5e10aa02595b6f11a61ed762a1c94e7c8f59 Mon Sep 17 00:00:00 2001
From: Nathan Baker <nathanandrewbaker@gmail.com>
Date: Sat, 30 May 2020 09:26:13 -0700
Subject: [PATCH 2/6] Remove cyclic import based on atom.py

Partially addresses https://github.com/jensengroup/propka-3.1/issues/49
---
 propka/atom.py                |  32 ++++-----
 propka/calculations.py        |   3 -
 propka/group.py               |  13 ++++
 propka/ligand_pka_values.py   |   7 +-
 propka/molecular_container.py |  16 ++---
 propka/output.py              | 121 ++++++++++++++++++++++++++++++++--
 propka/pdb.py                 | 116 +-------------------------------
 7 files changed, 155 insertions(+), 153 deletions(-)

diff --git a/propka/atom.py b/propka/atom.py
index 1e87af2..3bb3f12 100644
--- a/propka/atom.py
+++ b/propka/atom.py
@@ -1,8 +1,7 @@
 """Atom class - contains all atom information found in the PDB file"""
 import string
-import propka.lib
-import propka.group
 from . import hybrid36
+from propka.lib import make_tidy_atom_label
 
 
 # Format strings that get used in multiple places (or are very complex)
@@ -50,6 +49,9 @@ def __init__(self, line=None):
         self.z = None
         self.group = None
         self.group_type = None
+        self.group_label = None
+        self.group_model_pka = None
+        self.group_model_pka_set = None
         self.number_of_bonded_elements = {}
         self.cysteine_bridge = False
         self.bonded_atoms = []
@@ -267,7 +269,7 @@ def make_input_line(self):
                 model_pka = PKA_FMT.format(self.group.model_pka)
         str_ = INPUT_LINE_FMT.format(
             type=self.type.upper(), r=self,
-            atom_label=propka.lib.make_tidy_atom_label(self.name, self.element),
+            atom_label=make_tidy_atom_label(self.name, self.element),
             group=group, pka=model_pka)
         return str_
 
@@ -313,21 +315,11 @@ def get_input_parameters(self):
             self.occ = self.occ.replace('ALG', 'titratable_ligand')
             self.occ = self.occ.replace('BLG', 'titratable_ligand')
             self.occ = self.occ.replace('LG', 'non_titratable_ligand')
-            # try to initialise the group
-            try:
-                group_attr = "{0:s}_group".format(self.occ)
-                group_attr = getattr(propka.group, group_attr)
-                self.group = group_attr(self)
-            except:
-                # TODO - be more specific with expection handling here
-                str_ = (
-                    '{0:s} in input_file is not recognized as a group'.format(
-                        self.occ))
-                raise Exception(str_)
+            self.group_label = "{0:s}_group".format(self.occ)
         # set the model pKa value
         if self.beta != '-':
-            self.group.model_pka = float(self.beta)
-            self.group.model_pka_set = True
+            self.group_model_pka = float(self.beta)
+            self.group_model_pka_set = True
         # set occ and beta to standard values
         self.occ = '1.00'
         self.beta = '0.00'
@@ -344,7 +336,7 @@ def make_pdb_line(self):
         """
         str_ = PDB_LINE_FMT1.format(
             type=self.type.upper(), r=self,
-            atom_label=propka.lib.make_tidy_atom_label(self.name, self.element))
+            atom_label=make_tidy_atom_label(self.name, self.element))
         return str_
 
     def make_mol2_line(self, id_):
@@ -359,7 +351,7 @@ def make_mol2_line(self, id_):
         """
         str_ = MOL2_LINE_FMT.format(
             id=id_, r=self,
-            atom_label=propka.lib.make_tidy_atom_label(self.name, self.element))
+            atom_label=make_tidy_atom_label(self.name, self.element))
         return str_
 
     def make_pdb_line2(self, numb=None, name=None, res_name=None, chain_id=None,
@@ -397,7 +389,7 @@ def make_pdb_line2(self, numb=None, name=None, res_name=None, chain_id=None,
         str_ = PDB_LINE_FMT2.format(
             numb=numb, res_name=res_name, chain_id=chain_id, res_num=res_num,
             x=x, y=y, z=z, occ=occ, beta=beta,
-            atom_label=propka.lib.make_tidy_atom_label(name, self.element)
+            atom_label=make_tidy_atom_label(name, self.element)
         )
         return str_
 
@@ -408,7 +400,7 @@ def get_tidy_label(self):
 
         Returns:
             String with label"""
-        return propka.lib.make_tidy_atom_label(self.name, self.element)
+        return make_tidy_atom_label(self.name, self.element)
 
     def __str__(self):
         """Return an undefined-format string version of this atom."""
diff --git a/propka/calculations.py b/propka/calculations.py
index df8db5d..915b713 100644
--- a/propka/calculations.py
+++ b/propka/calculations.py
@@ -1,8 +1,5 @@
 """PROPKA calculations."""
 import math
-import propka.protonate
-import propka.bonds
-from propka.lib import warning, info
 
 
 # TODO - this file should be broken into three separate files:
diff --git a/propka/group.py b/propka/group.py
index 465c195..96c0a30 100644
--- a/propka/group.py
+++ b/propka/group.py
@@ -6,6 +6,7 @@
 from propka.determinant import Determinant
 from propka.lib import info, warning
 
+
 # Constants that start with "UNK_" are a mystery to me
 UNK_PKA_SCALING = -1.36
 PROTONATOR = propka.protonate.Protonate(verbose=False)
@@ -1417,3 +1418,15 @@ def is_ion_group(parameters, atom):
     if atom.res_name.strip() in parameters.ions.keys():
         return IonGroup(atom)
     return None
+
+def initialize_atom_group(atom):
+    """Initialize an atom group.
+
+    Args:
+        atom:  atom to initialize
+    """
+        # try to initialise the group
+    group_attr = globals()[atom.group_label]
+    atom.group = group_attr(atom)
+    atom.group.model_pka = atom.group_model_pka
+    atom.group.model_pka_set = atom.group_model_pka_set
diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py
index d7a5423..abfc3e4 100644
--- a/propka/ligand_pka_values.py
+++ b/propka/ligand_pka_values.py
@@ -6,7 +6,7 @@
 import propka.calculations
 import propka.parameters
 import propka.pdb
-import propka.lib
+from propka.output import write_mol2_for_atoms
 from propka.lib import info, warning
 
 
@@ -133,7 +133,7 @@ def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2',
         """
         # print out structure unless we are using user-modified structure
         if not reuse:
-            propka.pdb.write_mol2_for_atoms(atoms, filename)
+            write_mol2_for_atoms(atoms, filename)
         # check that we actually have a file to work with
         if not os.path.isfile(filename):
             errstr = (
@@ -141,7 +141,7 @@ def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2',
                 "- generating one".format(
                     filename))
             warning(errstr)
-            propka.pdb.write_mol2_for_atoms(atoms, filename)
+            write_mol2_for_atoms(atoms, filename)
         # Marvin calculate pKa values
         fmt = (
             'pka -a {num1} -b {num2} --min {min_ph} '
@@ -197,3 +197,4 @@ def extract_pkas(output):
         if len(indices) != len(values) != len(types):
             raise Exception('Lengths of atoms and pka values mismatch')
         return indices, values, types
+
diff --git a/propka/molecular_container.py b/propka/molecular_container.py
index 3d2409f..d99c2c0 100644
--- a/propka/molecular_container.py
+++ b/propka/molecular_container.py
@@ -1,13 +1,11 @@
 """Molecular container for storing all contents of PDB files."""
 import os
 import sys
-import propka.pdb
 import propka.version
-import propka.output
-import propka.group
-import propka.lib
+from propka.pdb import read_input
+from propka.output import write_input
 from propka.conformation_container import ConformationContainer
-from propka.lib import info, warning
+from propka.lib import info, warning, make_grid
 
 
 # TODO - these are constants whose origins are a little murky
@@ -78,7 +76,7 @@ def __init__(self, input_file, options=None):
             self.find_covalently_coupled_groups()
             # write out the input file
             filename = self.file.replace(input_file_extension, '.propka_input')
-            propka.pdb.write_input(self, filename)
+            write_input(self, filename)
         elif input_file_extension == '.propka_input':
             #input is a propka_input file
             [self.conformations, self.conformation_names] = (
@@ -155,7 +153,7 @@ def average_of_conformations(self):
                 else:
                     str_ = (
                         'Group {0:s} could not be found in '
-                        'conformation {0:s}.'.format(
+                        'conformation {1:s}.'.format(
                             group.atom.residue_label, name))
                     warning(str_)
             # ... and store the average value
@@ -214,7 +212,7 @@ def get_folding_profile(self, conformation='AVR', reference="neutral",
         """
         # calculate stability profile
         profile = []
-        for ph in propka.lib.make_grid(*grid):
+        for ph in make_grid(*grid):
             conf = self.conformations[conformation]
             ddg = conf.calculate_folding_energy(ph=ph, reference=reference)
             profile.append([ph, ddg])
@@ -244,7 +242,7 @@ def get_charge_profile(self, conformation='AVR', grid=[0., 14., .1]):
             list of charge state values
         """
         charge_profile = []
-        for ph in propka.lib.make_grid(*grid):
+        for ph in make_grid(*grid):
             conf = self.conformations[conformation]
             q_unfolded, q_folded = conf.calculate_charge(
                 self.version.parameters, ph=ph)
diff --git a/propka/output.py b/propka/output.py
index 61f001b..61f8a68 100644
--- a/propka/output.py
+++ b/propka/output.py
@@ -1,6 +1,6 @@
 """Output routines."""
 from datetime import date
-from propka.lib import info
+from propka.lib import info, open_file_for_writing
 
 
 def print_header():
@@ -11,8 +11,8 @@ def print_header():
     info(str_)
 
 
-def write_pdb(protein, pdbfile=None, filename=None, include_hydrogens=False,
-              _=None):
+def write_pdb_for_protein(
+        protein, pdbfile=None, filename=None, include_hydrogens=False, _=None):
     """Write a residue to the new PDB file.
 
     Args:
@@ -50,6 +50,16 @@ def write_pdb(protein, pdbfile=None, filename=None, include_hydrogens=False,
         pdbfile.close()
 
 
+def write_pdb_for_conformation(conformation, filename):
+    """Write PDB conformation to a file.
+
+    Args:
+        conformation:  conformation container
+        filename:  filename for output
+    """
+    write_pdb_for_atoms(conformation.atoms, filename)
+
+
 def write_pka(protein, parameters, filename=None, conformation='1A',
               reference="neutral", _="folding", verbose=False,
               __=None):
@@ -204,8 +214,8 @@ def get_summary_section(protein, conformation, parameters):
 
 
 def get_folding_profile_section(
-    protein, conformation='AVR', direction="folding", reference="neutral",
-    window=[0., 14., 1.0], _=False, __=None):
+        protein, conformation='AVR', direction="folding", reference="neutral",
+        window=[0., 14., 1.0], _=False, __=None):
     """Returns string with the folding profile section of the results.
 
     Args:
@@ -461,3 +471,104 @@ def make_interaction_map(name, list_, interaction):
                 tag = '    X     '
             res += '{0:>10s}| '.format(tag)
     return res
+
+
+def write_pdb_for_atoms(atoms, filename, make_conect_section=False):
+    """Write out PDB file for atoms.
+
+    Args:
+        atoms:  list of atoms
+        filename:  name of file
+        make_conect_section:  generate a CONECT PDB section
+    """
+    out = open_file_for_writing(filename)
+    for atom in atoms:
+        out.write(atom.make_pdb_line())
+    if make_conect_section:
+        for atom in atoms:
+            out.write(atom.make_conect_line())
+    out.close()
+
+
+def get_bond_order(atom1, atom2):
+    """Get the order of a bond between two atoms.
+
+    Args:
+        atom1:  first atom in bond
+        atom2:  second atom in bond
+    Returns:
+        string with bond type
+    """
+    type_ = '1'
+    pi_electrons1 = atom1.num_pi_elec_2_3_bonds
+    pi_electrons2 = atom2.num_pi_elec_2_3_bonds
+    if '.ar' in atom1.sybyl_type:
+        pi_electrons1 -= 1
+    if '.ar' in atom2.sybyl_type:
+        pi_electrons2 -= 1
+    if pi_electrons1 > 0 and pi_electrons2 > 0:
+        type_ = '{0:d}'.format(min(pi_electrons1, pi_electrons2)+1)
+    if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type:
+        type_ = 'ar'
+    return type_
+
+
+def write_mol2_for_atoms(atoms, filename):
+    """Write out MOL2 file for atoms.
+
+    Args:
+        atoms:  list of atoms
+        filename:  name of file
+    """
+    # TODO - header needs to be converted to format string
+    header = '@<TRIPOS>MOLECULE\n\n{natom:d} {id:d}\nSMALL\nUSER_CHARGES\n'
+    atoms_section = '@<TRIPOS>ATOM\n'
+    for i, atom in enumerate(atoms):
+        atoms_section += atom.make_mol2_line(i+1)
+    bonds_section = '@<TRIPOS>BOND\n'
+    id_ = 1
+    for i, atom1 in enumerate(atoms):
+        for j, atom2 in enumerate(atoms, i+1):
+            if atom1 in atom2.bonded_atoms:
+                type_ = get_bond_order(atom1, atom2)
+                bonds_section += '{0:>7d} {1:>7d} {2:>7d} {3:>7s}\n'.format(
+                    id_, i+1, j+1, type_)
+                id_ += 1
+    substructure_section = '@<TRIPOS>SUBSTRUCTURE\n\n'
+    if len(atoms) > 0:
+        substructure_section = (
+            '@<TRIPOS>SUBSTRUCTURE\n{0:<7d} {1:>10s} {2:>7d}\n'.format(
+                atoms[0].res_num, atoms[0].res_name, atoms[0].numb))
+    out = open_file_for_writing(filename)
+    out.write(header.format(natom=len(atoms), id=id_-1))
+    out.write(atoms_section)
+    out.write(bonds_section)
+    out.write(substructure_section)
+    out.close()
+
+def write_input(molecular_container, filename):
+    """Write PROPKA input file for molecular container.
+
+    Args:
+        molecular_container:  molecular container
+        filename:  output file name
+    """
+    out = open_file_for_writing(filename)
+    for conformation_name in molecular_container.conformation_names:
+        out.write('MODEL {0:s}\n'.format(conformation_name))
+        # write atoms
+        for atom in molecular_container.conformations[conformation_name].atoms:
+            out.write(atom.make_input_line())
+        # write bonds
+        for atom in molecular_container.conformations[conformation_name].atoms:
+            out.write(atom.make_conect_line())
+        # write covalently coupled groups
+        for group in (
+                molecular_container.conformations[conformation_name].groups):
+            out.write(group.make_covalently_coupled_line())
+        # write non-covalently coupled groups
+        for group in (
+                molecular_container.conformations[conformation_name].groups):
+            out.write(group.make_non_covalently_coupled_line())
+        out.write('ENDMDL\n')
+    out.close()
diff --git a/propka/pdb.py b/propka/pdb.py
index 0074938..f1eaf16 100644
--- a/propka/pdb.py
+++ b/propka/pdb.py
@@ -1,7 +1,8 @@
-"""PDB parsing functionality."""
+"""Read and parse PDB-like input files."""
 import propka.lib
 from propka.lib import warning
 from propka.atom import Atom
+from propka.group import initialize_atom_group
 from propka.conformation_container import ConformationContainer
 
 
@@ -158,118 +159,6 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
             terminal = None
 
 
-def write_pdb(conformation, filename):
-    """Write PDB conformation to a file.
-
-    Args:
-        conformation:  conformation container
-        filename:  filename for output
-    """
-    write_pdb_for_atoms(conformation.atoms, filename)
-
-
-def write_pdb_for_atoms(atoms, filename, make_conect_section=False):
-    """Write out PDB file for atoms.
-
-    Args:
-        atoms:  list of atoms
-        filename:  name of file
-        make_conect_section:  generate a CONECT PDB section
-    """
-    out = propka.lib.open_file_for_writing(filename)
-    for atom in atoms:
-        out.write(atom.make_pdb_line())
-    if make_conect_section:
-        for atom in atoms:
-            out.write(atom.make_conect_line())
-    out.close()
-
-
-def write_mol2_for_atoms(atoms, filename):
-    """Write out MOL2 file for atoms.
-
-    Args:
-        atoms:  list of atoms
-        filename:  name of file
-    """
-    # TODO - header needs to be converted to format string
-    header = '@<TRIPOS>MOLECULE\n\n{natom:d} {id:d}\nSMALL\nUSER_CHARGES\n'
-    atoms_section = '@<TRIPOS>ATOM\n'
-    for i, atom in enumerate(atoms):
-        atoms_section += atom.make_mol2_line(i+1)
-    bonds_section = '@<TRIPOS>BOND\n'
-    id_ = 1
-    for i, atom1 in enumerate(atoms):
-        for j, atom2 in enumerate(atoms, i+1):
-            if atom1 in atom2.bonded_atoms:
-                type_ = get_bond_order(atom1, atom2)
-                bonds_section += '{0:>7d} {1:>7d} {2:>7d} {3:>7s}\n'.format(
-                    id_, i+1, j+1, type_)
-                id_ += 1
-    substructure_section = '@<TRIPOS>SUBSTRUCTURE\n\n'
-    if len(atoms) > 0:
-        substructure_section = (
-            '@<TRIPOS>SUBSTRUCTURE\n{0:<7d} {1:>10s} {2:>7d}\n'.format(
-                atoms[0].res_num, atoms[0].res_name, atoms[0].numb))
-    out = propka.lib.open_file_for_writing(filename)
-    out.write(header.format(natom=len(atoms), id=id_-1))
-    out.write(atoms_section)
-    out.write(bonds_section)
-    out.write(substructure_section)
-    out.close()
-
-
-def get_bond_order(atom1, atom2):
-    """Get the order of a bond between two atoms.
-
-    Args:
-        atom1:  first atom in bond
-        atom2:  second atom in bond
-    Returns:
-        string with bond type
-    """
-    type_ = '1'
-    pi_electrons1 = atom1.num_pi_elec_2_3_bonds
-    pi_electrons2 = atom2.num_pi_elec_2_3_bonds
-    if '.ar' in atom1.sybyl_type:
-        pi_electrons1 -= 1
-    if '.ar' in atom2.sybyl_type:
-        pi_electrons2 -= 1
-    if pi_electrons1 > 0 and pi_electrons2 > 0:
-        type_ = '{0:d}'.format(min(pi_electrons1, pi_electrons2)+1)
-    if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type:
-        type_ = 'ar'
-    return type_
-
-
-def write_input(molecular_container, filename):
-    """Write PROPKA input file for molecular container.
-
-    Args:
-        molecular_container:  molecular container
-        filename:  output file name
-    """
-    out = propka.lib.open_file_for_writing(filename)
-    for conformation_name in molecular_container.conformation_names:
-        out.write('MODEL {0:s}\n'.format(conformation_name))
-        # write atoms
-        for atom in molecular_container.conformations[conformation_name].atoms:
-            out.write(atom.make_input_line())
-        # write bonds
-        for atom in molecular_container.conformations[conformation_name].atoms:
-            out.write(atom.make_conect_line())
-        # write covalently coupled groups
-        for group in (
-                molecular_container.conformations[conformation_name].groups):
-            out.write(group.make_covalently_coupled_line())
-        # write non-covalently coupled groups
-        for group in (
-                molecular_container.conformations[conformation_name].groups):
-            out.write(group.make_non_covalently_coupled_line())
-        out.write('ENDMDL\n')
-    out.close()
-
-
 def read_input(input_file, parameters, molecule):
     """Read PROPKA input file for molecular container.
 
@@ -316,6 +205,7 @@ def get_atom_lines_from_input(input_file, tags=['ATOM  ', 'HETATM']):
         if tag in tags:
             atom = Atom(line=line)
             atom.get_input_parameters()
+            initialize_atom_group(atom)
             atom.groups_extracted = 1
             atom.is_protonated = True
             atoms[atom.numb] = atom

From b597a6f25752cf3b5f19fe101c30115088cf779f Mon Sep 17 00:00:00 2001
From: Nathan Baker <nathanandrewbaker@gmail.com>
Date: Sat, 30 May 2020 10:00:31 -0700
Subject: [PATCH 3/6] Remove cyclic import based on I/O in pdb.py.

Partially addresses https://github.com/jensengroup/propka-3.1/issues/49
---
 propka/{pdb.py => input.py}   | 151 ++++++++++++++++------------------
 propka/lib.py                 | 106 +++++++++++-------------
 propka/ligand_pka_values.py   |   1 -
 propka/molecular_container.py |  23 +++---
 propka/output.py              |  38 ++++++++-
 propka/parameters.py          |  18 +---
 6 files changed, 171 insertions(+), 166 deletions(-)
 rename propka/{pdb.py => input.py} (72%)

diff --git a/propka/pdb.py b/propka/input.py
similarity index 72%
rename from propka/pdb.py
rename to propka/input.py
index f1eaf16..e8c7f07 100644
--- a/propka/pdb.py
+++ b/propka/input.py
@@ -1,96 +1,58 @@
-"""Read and parse PDB-like input files."""
-import propka.lib
-from propka.lib import warning
+"""Input routines."""
+from pkg_resources import resource_filename
 from propka.atom import Atom
-from propka.group import initialize_atom_group
 from propka.conformation_container import ConformationContainer
+from propka.group import initialize_atom_group
 
 
-EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
-                         'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
-                         'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
-                         'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
-
+def open_file_for_reading(input_file):
+    """Open file or file-like stream for reading.
 
-def read_pdb(pdb_file, parameters, molecule):
-    """Parse a PDB file.
+    TODO - convert this to a context manager
 
     Args:
-        pdb_file:  file to read
-        parameters:  parameters to guide parsing
-        molecule:  molecular container
-    Returns:
-        list with elements:
-            1. list of conformations
-            2. list of names
+        input_file: path to file or file-like object. If file-like object,
+        then will attempt fseek(0).
     """
-    conformations = {}
-    # read in all atoms in the file
-    lines = get_atom_lines_from_pdb(
-        pdb_file, ignore_residues=parameters.ignore_residues,
-        keep_protons=molecule.options.keep_protons,
-        chains=molecule.options.chains)
-    for (name, atom) in lines:
-        if not name in conformations.keys():
-            conformations[name] = ConformationContainer(
-                name=name, parameters=parameters, molecular_container=molecule)
-        conformations[name].add_atom(atom)
-    # make a sorted list of conformation names
-    names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
-    return [conformations, names]
+    try:
+        input_file.fseek(0)
+        return input_file
+    except AttributeError:
+        pass
 
+    try:
+        file_ = open(input_file, 'rt')
+    except:
+        raise IOError('Cannot find file {0:s}'.format(input_file))
+    return file_
 
-def protein_precheck(conformations, names):
-    """Check protein for correct number of atoms, etc.
+
+def read_parameter_file(input_file, parameters):
+    """Read a parameter file.
 
     Args:
-        names:  conformation names to check
+        input_file:  input file to read
+        parameters:  Parameters object
+    Returns:
+        updated Parameters object
     """
-    for name in names:
-        atoms = conformations[name].atoms
-        # Group the atoms by their residue:
-        atoms_by_residue = {}
-        for atom in atoms:
-            if atom.element != 'H':
-                res_id = resid_from_atom(atom)
-                try:
-                    atoms_by_residue[res_id].append(atom)
-                except KeyError:
-                    atoms_by_residue[res_id] = [atom]
-        for res_id, res_atoms in atoms_by_residue.items():
-            res_name = res_atoms[0].res_name
-            residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
-            # ignore ligand residues
-            if res_name not in EXPECTED_ATOM_NUMBERS:
-                continue
-            # check for c-terminal
-            if 'C-' in [a.terminal for a in res_atoms]:
-                if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
-                    str_ = ("Unexpected number ({num:d}) of atoms in residue "
-                            "{res:s} in conformation {conf:s}".format(
-                                num=len(res_atoms), res=residue_label,
-                                conf=name))
-                    warning(str_)
-                continue
-            # check number of atoms in residue
-            if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
-                str_ = ("Unexpected number ({num:d}) of atoms in residue "
-                        "{res:s} in conformation {conf:s}".format(
-                            num=len(res_atoms), res=residue_label,
-                            conf=name))
-                warning(str_)
+    # try to locate the parameter file
+    try:
+        ifile = resource_filename(__name__, input_file)
+        input_ = open_file_for_reading(ifile)
+    except (IOError, FileNotFoundError, ValueError):
+        input_ = open_file_for_reading(input_file)
+    for line in input_:
+        parameters.parse_line(line)
+    return parameters
 
 
-def resid_from_atom(atom):
-    """Return string with atom residue information.
 
-    Args:
-        atom:  atom to generate string for
-    Returns
-        string
-    """
-    return '{0:>4d} {1:s} {2:s}'.format(
-        atom.res_num, atom.chain_id, atom.icode)
+def conformation_sorter(conf):
+    """TODO - figure out what this function does."""
+    model = int(conf[:-1])
+    altloc = conf[-1:]
+    return model*100+ord(altloc)
 
 
 def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
@@ -104,7 +66,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
         tags:  tags of lines that include atoms
         chains:  list of chains
     """
-    lines = propka.lib.open_file_for_reading(pdb_file).readlines()
+    lines = open_file_for_reading(pdb_file).readlines()
     nterm_residue = 'next_residue'
     old_residue = None
     terminal = None
@@ -179,7 +141,7 @@ def read_input(input_file, parameters, molecule):
                 molecular_container=molecule)
         conformations[name].add_atom(atom)
     # make a sorted list of conformation names
-    names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
+    names = sorted(conformations.keys(), key=conformation_sorter)
     return [conformations, names]
 
 
@@ -192,7 +154,7 @@ def get_atom_lines_from_input(input_file, tags=['ATOM  ', 'HETATM']):
     Yields:
         conformation container, list of atoms
     """
-    lines = propka.lib.open_file_for_reading(input_file).readlines()
+    lines = open_file_for_reading(input_file).readlines()
     conformation = ''
     atoms = {}
     numbers = []
@@ -246,3 +208,32 @@ def get_atom_lines_from_input(input_file, tags=['ATOM  ', 'HETATM']):
             # prepare for next conformation
             atoms = {}
             numbers = []
+
+def read_pdb(pdb_file, parameters, molecule):
+    """Parse a PDB file.
+
+    Args:
+        pdb_file:  file to read
+        parameters:  parameters to guide parsing
+        molecule:  molecular container
+    Returns:
+        list with elements:
+            1. list of conformations
+            2. list of names
+    """
+    conformations = {}
+    # read in all atoms in the file
+    lines = get_atom_lines_from_pdb(
+        pdb_file, ignore_residues=parameters.ignore_residues,
+        keep_protons=molecule.options.keep_protons,
+        chains=molecule.options.chains)
+    for (name, atom) in lines:
+        if not name in conformations.keys():
+            conformations[name] = ConformationContainer(
+                name=name, parameters=parameters, molecular_container=molecule)
+        conformations[name].add_atom(atom)
+    # make a sorted list of conformation names
+    names = sorted(conformations.keys(), key=conformation_sorter)
+    return [conformations, names]
+
+
diff --git a/propka/lib.py b/propka/lib.py
index 48eab4e..518f7e6 100644
--- a/propka/lib.py
+++ b/propka/lib.py
@@ -11,56 +11,63 @@
 _LOGGER.addHandler(_STDOUT_HANDLER)
 
 
-def open_file_for_reading(input_file):
-    """Open file or file-like stream for reading.
+EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
+                         'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
+                         'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
+                         'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
 
-    TODO - convert this to a context manager
+
+def protein_precheck(conformations, names):
+    """Check protein for correct number of atoms, etc.
 
     Args:
-        input_file: path to file or file-like object. If file-like object,
-        then will attempt fseek(0).
+        names:  conformation names to check
     """
-    try:
-        input_file.fseek(0)
-        return input_file
-    except AttributeError:
-        pass
-
-    try:
-        file_ = open(input_file, 'rt')
-    except:
-        raise IOError('Cannot find file {0:s}'.format(input_file))
-    return file_
-
-
-def open_file_for_writing(input_file):
-    """Open file or file-like stream for writing.
-
-    TODO - convert this to a context manager.
+    for name in names:
+        atoms = conformations[name].atoms
+        # Group the atoms by their residue:
+        atoms_by_residue = {}
+        for atom in atoms:
+            if atom.element != 'H':
+                res_id = resid_from_atom(atom)
+                try:
+                    atoms_by_residue[res_id].append(atom)
+                except KeyError:
+                    atoms_by_residue[res_id] = [atom]
+        for res_id, res_atoms in atoms_by_residue.items():
+            res_name = res_atoms[0].res_name
+            residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
+            # ignore ligand residues
+            if res_name not in EXPECTED_ATOM_NUMBERS:
+                continue
+            # check for c-terminal
+            if 'C-' in [a.terminal for a in res_atoms]:
+                if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
+                    str_ = ("Unexpected number ({num:d}) of atoms in residue "
+                            "{res:s} in conformation {conf:s}".format(
+                                num=len(res_atoms), res=residue_label,
+                                conf=name))
+                    warning(str_)
+                continue
+            # check number of atoms in residue
+            if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
+                str_ = ("Unexpected number ({num:d}) of atoms in residue "
+                        "{res:s} in conformation {conf:s}".format(
+                            num=len(res_atoms), res=residue_label,
+                            conf=name))
+                warning(str_)
+
+
+def resid_from_atom(atom):
+    """Return string with atom residue information.
 
     Args:
-        input_file: path to file or file-like object. If file-like object,
-        then will attempt to get file mode.
+        atom:  atom to generate string for
+    Returns
+        string
     """
-    try:
-        mode = input_file.mode
-        if not ("w" in mode or "a" in mode or "+" in mode):
-            raise IOError("File/stream not open for writing")
-        return input_file
-    except AttributeError:
-        pass
-    try:
-        file_ = open(input_file, 'wt')
-    except FileNotFoundError:
-        raise Exception('Could not open {0:s}'.format(input_file))
-    return file_
-
-
-def conformation_sorter(conf):
-    """TODO - figure out what this function does."""
-    model = int(conf[:-1])
-    altloc = conf[-1:]
-    return model*100+ord(altloc)
+    return '{0:>4d} {1:s} {2:s}'.format(
+        atom.res_num, atom.chain_id, atom.icode)
 
 
 def split_atoms_into_molecules(atoms):
@@ -354,19 +361,6 @@ def configuration_compare(conf):
     return 100*int(conf[1:-2]) + ord(conf[-1])
 
 
-def write_file(filename, lines):
-    """Writes a new file.
-
-    Args:
-        filename:  name of file
-        lines:  lines to write to file
-    """
-    file_ = open_file_for_writing(filename)
-    for line in lines:
-        file_.write("{0:s}\n".format(line))
-    file_.close()
-
-
 def _args_to_str(arg_list):
     """Summarize list of arguments in string.
 
diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py
index abfc3e4..ab7fa28 100644
--- a/propka/ligand_pka_values.py
+++ b/propka/ligand_pka_values.py
@@ -5,7 +5,6 @@
 import propka.molecular_container
 import propka.calculations
 import propka.parameters
-import propka.pdb
 from propka.output import write_mol2_for_atoms
 from propka.lib import info, warning
 
diff --git a/propka/molecular_container.py b/propka/molecular_container.py
index d99c2c0..ae82fb9 100644
--- a/propka/molecular_container.py
+++ b/propka/molecular_container.py
@@ -2,10 +2,11 @@
 import os
 import sys
 import propka.version
-from propka.pdb import read_input
+from propka.input import read_pdb, read_input, read_parameter_file
+from propka.parameters import Parameters
 from propka.output import write_input
 from propka.conformation_container import ConformationContainer
-from propka.lib import info, warning, make_grid
+from propka.lib import info, warning, protein_precheck, make_grid
 
 
 # TODO - these are constants whose origins are a little murky
@@ -38,11 +39,12 @@ def __init__(self, input_file, options=None):
         self.file = os.path.split(input_file)[1]
         self.name = self.file[0:self.file.rfind('.')]
         input_file_extension = input_file[input_file.rfind('.'):]
-        # set the version
+        parameters = Parameters()
         if options:
-            parameters = propka.parameters.Parameters(self.options.parameters)
+            parameters = read_parameter_file(
+                self.options.parameters, parameters)
         else:
-            parameters = propka.parameters.Parameters('propka.cfg')
+            parameters = read_parameter_file('propka.cfg', parameters)
         try:
             version_class = getattr(propka.version, parameters.version)
             self.version = version_class(parameters)
@@ -56,15 +58,15 @@ def __init__(self, input_file, options=None):
             # input is a pdb file. read in atoms and top up containers to make
             # sure that all atoms are present in all conformations
             [self.conformations, self.conformation_names] = (
-                propka.pdb.read_pdb(input_file, self.version.parameters, self))
+                read_pdb(input_file, self.version.parameters, self))
             if len(self.conformations) == 0:
                 info('Error: The pdb file does not seems to contain any '
                      'molecular conformations')
                 sys.exit(-1)
             self.top_up_conformations()
             # make a structure precheck
-            propka.pdb.protein_precheck(self.conformations,
-                                        self.conformation_names)
+            protein_precheck(
+                self.conformations, self.conformation_names)
             # set up atom bonding and protonation
             self.version.setup_bonding_and_protonation(self)
             # Extract groups
@@ -79,9 +81,8 @@ def __init__(self, input_file, options=None):
             write_input(self, filename)
         elif input_file_extension == '.propka_input':
             #input is a propka_input file
-            [self.conformations, self.conformation_names] = (
-                propka.pdb.read_input(input_file, self.version.parameters,
-                                      self))
+            [self.conformations, self.conformation_names] = read_input(
+                input_file, self.version.parameters, self)
             # Extract groups - this merely sets up the groups found in the
             # input file
             self.extract_groups()
diff --git a/propka/output.py b/propka/output.py
index 61f8a68..a72114e 100644
--- a/propka/output.py
+++ b/propka/output.py
@@ -1,6 +1,42 @@
 """Output routines."""
 from datetime import date
-from propka.lib import info, open_file_for_writing
+from propka.lib import info
+
+
+def open_file_for_writing(input_file):
+    """Open file or file-like stream for writing.
+
+    TODO - convert this to a context manager.
+
+    Args:
+        input_file: path to file or file-like object. If file-like object,
+        then will attempt to get file mode.
+    """
+    try:
+        mode = input_file.mode
+        if not ("w" in mode or "a" in mode or "+" in mode):
+            raise IOError("File/stream not open for writing")
+        return input_file
+    except AttributeError:
+        pass
+    try:
+        file_ = open(input_file, 'wt')
+    except FileNotFoundError:
+        raise Exception('Could not open {0:s}'.format(input_file))
+    return file_
+
+
+def write_file(filename, lines):
+    """Writes a new file.
+
+    Args:
+        filename:  name of file
+        lines:  lines to write to file
+    """
+    file_ = open_file_for_writing(filename)
+    for line in lines:
+        file_.write("{0:s}\n".format(line))
+    file_.close()
 
 
 def print_header():
diff --git a/propka/parameters.py b/propka/parameters.py
index 960cb81..b708fce 100644
--- a/propka/parameters.py
+++ b/propka/parameters.py
@@ -35,7 +35,7 @@
 class Parameters:
     """PROPKA parameter class."""
 
-    def __init__(self, parameter_file):
+    def __init__(self):
         """Initialize parameter class.
 
         Args:
@@ -52,22 +52,6 @@ def __init__(self, parameter_file):
         self.CYS_CYS_exception = None
         # These functions set up remaining data structures implicitly
         self.set_up_data_structures()
-        self.read_parameters(parameter_file)
-
-    def read_parameters(self, file_):
-        """Read parameters from file.
-
-        Args:
-            file_:  file to read
-        """
-        # try to locate the parameters file
-        try:
-            ifile = pkg_resources.resource_filename(__name__, file_)
-            input_ = lib.open_file_for_reading(ifile)
-        except (IOError, FileNotFoundError, ValueError):
-            input_ = lib.open_file_for_reading(file_)
-        for line in input_:
-            self.parse_line(line)
 
     def parse_line(self, line):
         """Parse parameter file line."""

From 84846aad8c71dd9bebb0b330f7958a2f26a399bc Mon Sep 17 00:00:00 2001
From: Nathan Baker <nathanandrewbaker@gmail.com>
Date: Sat, 30 May 2020 12:01:30 -0700
Subject: [PATCH 4/6] Resolve cyclic import problem.

Fixes https://github.com/jensengroup/propka-3.1/issues/49

Moved I/O into higher level of of code; should make
issue https://github.com/jensengroup/propka-3.1/issues/51
easier to resolve
---
 propka/__init__.py               |  6 +--
 propka/atom.py                   |  4 +-
 propka/conformation_container.py |  1 -
 propka/input.py                  | 69 ++++++++++++++++++++++++++++++--
 propka/ligand_pka_values.py      | 14 +++----
 propka/molecular_container.py    | 66 ++++--------------------------
 propka/output.py                 |  2 +-
 propka/parameters.py             |  4 +-
 propka/run.py                    | 12 ++++--
 scripts/propka31.py              |  8 +++-
 tests/test_basic_regression.py   | 13 +++---
 11 files changed, 107 insertions(+), 92 deletions(-)

diff --git a/propka/__init__.py b/propka/__init__.py
index 8d2447f..ecfacd3 100644
--- a/propka/__init__.py
+++ b/propka/__init__.py
@@ -15,6 +15,6 @@
 """
 __all__ = ["atom", "bonds", "calculations", "conformation_container",
            "coupled_groups", "determinant", "determinants", "group",
-           "hybrid36", "iterative", "lib", "ligand_pka_values", "ligand",
-           "molecular_container", "output", "parameters", "pdb", "protonate",
-           "run", "vector_algebra", "version"]
+           "hybrid36", "iterative", "input", "lib", "ligand_pka_values",
+           "ligand", "molecular_container", "output", "parameters",
+           "protonate", "run", "vector_algebra", "version"]
diff --git a/propka/atom.py b/propka/atom.py
index 3bb3f12..fcd6825 100644
--- a/propka/atom.py
+++ b/propka/atom.py
@@ -1,7 +1,7 @@
 """Atom class - contains all atom information found in the PDB file"""
 import string
-from . import hybrid36
 from propka.lib import make_tidy_atom_label
+from . import hybrid36
 
 
 # Format strings that get used in multiple places (or are very complex)
@@ -25,7 +25,7 @@
     "({r.chain_id:1s}) [{r.x:>8.3f} {r.y:>8.3f} {r.z:>8.3f}] {r.element:s}")
 
 
-class Atom(object):
+class Atom:
     """Atom class - contains all atom information found in the PDB file"""
 
     def __init__(self, line=None):
diff --git a/propka/conformation_container.py b/propka/conformation_container.py
index d44e30c..d1e57ed 100644
--- a/propka/conformation_container.py
+++ b/propka/conformation_container.py
@@ -36,7 +36,6 @@ def __init__(self, name='', parameters=None, molecular_container=None):
         self.groups = []
         self.chains = []
         self.current_iter_item = 0
-        # TODO - what is marvin_pkas_calculated?
         self.marvin_pkas_calculated = False
         self.non_covalently_coupled_groups = False
 
diff --git a/propka/input.py b/propka/input.py
index e8c7f07..675a770 100644
--- a/propka/input.py
+++ b/propka/input.py
@@ -1,5 +1,8 @@
 """Input routines."""
+from pathlib import Path
 from pkg_resources import resource_filename
+from propka.lib import protein_precheck
+from propka.output import write_propka
 from propka.atom import Atom
 from propka.conformation_container import ConformationContainer
 from propka.group import initialize_atom_group
@@ -27,6 +30,67 @@ def open_file_for_reading(input_file):
     return file_
 
 
+def read_molecule_file(input_file, mol_container):
+    """Read input file (PDB or PROPKA) for a molecular container
+
+    Args
+        input_file:  input file to read
+        mol_container:  MolecularContainer object
+    Returns
+        updated MolecularContainer object
+    Raises
+        ValuError if invalid input given
+    """
+    input_path = Path(input_file)
+    mol_container.name = input_path.stem
+    input_file_extension = input_path.suffix
+    if input_file_extension.lower() == '.pdb':
+        # input is a pdb file. read in atoms and top up containers to make
+        # sure that all atoms are present in all conformations
+        conformations, conformation_names = read_pdb(
+            input_path, mol_container.version.parameters, mol_container)
+        if len(conformations) == 0:
+            str_ = ('Error: The pdb file does not seems to contain any '
+                    'molecular conformations')
+            raise ValueError(str_)
+        mol_container.conformations = conformations
+        mol_container.conformation_names = conformation_names
+        mol_container.top_up_conformations()
+        # make a structure precheck
+        protein_precheck(
+            mol_container.conformations, mol_container.conformation_names)
+        # set up atom bonding and protonation
+        mol_container.version.setup_bonding_and_protonation(mol_container)
+        # Extract groups
+        mol_container.extract_groups()
+        # sort atoms
+        for name in mol_container.conformation_names:
+            mol_container.conformations[name].sort_atoms()
+        # find coupled groups
+        mol_container.find_covalently_coupled_groups()
+        # write out the input file
+        # TODO - figure out why this I/O has to happen here
+        output_path = Path(input_path.name.replace(
+            input_file_extension, '.propka_input'))
+        write_propka(mol_container, output_path)
+    elif input_file_extension.lower() == '.propka_input':
+        # input is a propka_input file
+        conformations, conformation_names = read_propka(
+            input_file, mol_container.version.parameters, mol_container)
+        mol_container.conformations = conformations
+        mol_container.conformation_names = conformation_names
+        # Extract groups - this merely sets up the groups found in the
+        # input file
+        mol_container.extract_groups()
+        # do some additional set up
+        mol_container.additional_setup_when_reading_input_file()
+    else:
+        str_ = "Unknown input file type {0!s} for file {1!s}".format(
+            input_file_extension, input_path)
+        raise ValueError(str_)
+    return mol_container
+
+
 def read_parameter_file(input_file, parameters):
     """Read a parameter file.
 
@@ -47,7 +111,6 @@ def read_parameter_file(input_file, parameters):
     return parameters
 
 
-
 def conformation_sorter(conf):
     """TODO - figure out what this function does."""
     model = int(conf[:-1])
@@ -121,7 +184,7 @@ def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
             terminal = None
 
 
-def read_input(input_file, parameters, molecule):
+def read_propka(input_file, parameters, molecule):
     """Read PROPKA input file for molecular container.
 
     Args:
@@ -235,5 +298,3 @@ def read_pdb(pdb_file, parameters, molecule):
     # make a sorted list of conformation names
     names = sorted(conformations.keys(), key=conformation_sorter)
     return [conformations, names]
-
-
diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py
index ab7fa28..023845d 100644
--- a/propka/ligand_pka_values.py
+++ b/propka/ligand_pka_values.py
@@ -2,11 +2,8 @@
 import os
 import subprocess
 import sys
-import propka.molecular_container
-import propka.calculations
-import propka.parameters
 from propka.output import write_mol2_for_atoms
-from propka.lib import info, warning
+from propka.lib import info, warning, split_atoms_into_molecules
 
 
 class LigandPkaValues:
@@ -47,17 +44,17 @@ def find_in_path(program):
             sys.exit(-1)
         return locs[0]
 
-    def get_marvin_pkas_for_pdb_file(self, pdbfile, num_pkas=10, min_ph=-10,
-                                     max_ph=20):
+    def get_marvin_pkas_for_pdb_file(
+            self, molecule, parameters, num_pkas=10, min_ph=-10, max_ph=20):
         """Use Marvin executables to get pKas for a PDB file.
 
         Args:
             pdbfile:  PDB file
+            molecule:  MolecularContainer object
             num_pkas:  number of pKas to get
             min_ph:  minimum pH value
             max_ph:  maximum pH value
         """
-        molecule = propka.molecular_container.Molecular_container(pdbfile)
         self.get_marvin_pkas_for_molecular_container(
             molecule, num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph)
 
@@ -110,7 +107,7 @@ def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False,
             max_ph:  maximum pH value
         """
         # do one molecule at the time so we don't confuse marvin
-        molecules = propka.lib.split_atoms_into_molecules(atoms)
+        molecules = split_atoms_into_molecules(atoms)
         for i, molecule in enumerate(molecules):
             filename = '{0:s}_{1:d}.mol2'.format(name, i+1)
             self.get_marvin_pkas_for_molecule(
@@ -196,4 +193,3 @@ def extract_pkas(output):
         if len(indices) != len(values) != len(types):
             raise Exception('Lengths of atoms and pka values mismatch')
         return indices, values, types
-
diff --git a/propka/molecular_container.py b/propka/molecular_container.py
index ae82fb9..43ad372 100644
--- a/propka/molecular_container.py
+++ b/propka/molecular_container.py
@@ -1,12 +1,8 @@
 """Molecular container for storing all contents of PDB files."""
 import os
-import sys
 import propka.version
-from propka.input import read_pdb, read_input, read_parameter_file
-from propka.parameters import Parameters
-from propka.output import write_input
 from propka.conformation_container import ConformationContainer
-from propka.lib import info, warning, protein_precheck, make_grid
+from propka.lib import info, warning, make_grid
 
 
 # TODO - these are constants whose origins are a little murky
@@ -15,36 +11,26 @@
 MAX_ITERATION = 4
 
 
-class Molecular_container:
+class MolecularContainer:
     """Container for storing molecular contents of PDB files.
 
     TODO - this class name does not conform to PEP8 but has external use.
     We should deprecate and change eventually.
     """
 
-    def __init__(self, input_file, options=None):
+    def __init__(self, parameters, options=None):
         """Initialize molecular container.
 
         Args:
-            input_file:  molecular input file
+            parameters:  Parameters() object
             options:  options object
         """
         # printing out header before parsing input
         propka.output.print_header()
-        # set up some values
+        self.conformation_names = []
+        self.conformations = {}
         self.options = options
-        self.input_file = input_file
-        # TODO - replace this indelicate os.path code with pathlib
-        self.dir = os.path.split(input_file)[0]
-        self.file = os.path.split(input_file)[1]
-        self.name = self.file[0:self.file.rfind('.')]
-        input_file_extension = input_file[input_file.rfind('.'):]
-        parameters = Parameters()
-        if options:
-            parameters = read_parameter_file(
-                self.options.parameters, parameters)
-        else:
-            parameters = read_parameter_file('propka.cfg', parameters)
+        self.name = None
         try:
             version_class = getattr(propka.version, parameters.version)
             self.version = version_class(parameters)
@@ -53,44 +39,6 @@ def __init__(self, input_file, options=None):
             errstr = 'Error: Version {0:s} does not exist'.format(
                 parameters.version)
             raise Exception(errstr)
-        # read the input file
-        if input_file_extension[0:4] == '.pdb':
-            # input is a pdb file. read in atoms and top up containers to make
-            # sure that all atoms are present in all conformations
-            [self.conformations, self.conformation_names] = (
-                read_pdb(input_file, self.version.parameters, self))
-            if len(self.conformations) == 0:
-                info('Error: The pdb file does not seems to contain any '
-                     'molecular conformations')
-                sys.exit(-1)
-            self.top_up_conformations()
-            # make a structure precheck
-            protein_precheck(
-                self.conformations, self.conformation_names)
-            # set up atom bonding and protonation
-            self.version.setup_bonding_and_protonation(self)
-            # Extract groups
-            self.extract_groups()
-            # sort atoms
-            for name in self.conformation_names:
-                self.conformations[name].sort_atoms()
-            # find coupled groups
-            self.find_covalently_coupled_groups()
-            # write out the input file
-            filename = self.file.replace(input_file_extension, '.propka_input')
-            write_input(self, filename)
-        elif input_file_extension == '.propka_input':
-            #input is a propka_input file
-            [self.conformations, self.conformation_names] = read_input(
-                input_file, self.version.parameters, self)
-            # Extract groups - this merely sets up the groups found in the
-            # input file
-            self.extract_groups()
-            # do some additional set up
-            self.additional_setup_when_reading_input_file()
-        else:
-            info('Unrecognized input file:{0:s}'.format(input_file))
-            sys.exit(-1)
 
     def top_up_conformations(self):
         """Makes sure that all atoms are present in all conformations."""
diff --git a/propka/output.py b/propka/output.py
index a72114e..2ee5f89 100644
--- a/propka/output.py
+++ b/propka/output.py
@@ -582,7 +582,7 @@ def write_mol2_for_atoms(atoms, filename):
     out.write(substructure_section)
     out.close()
 
-def write_input(molecular_container, filename):
+def write_propka(molecular_container, filename):
     """Write PROPKA input file for molecular container.
 
     Args:
diff --git a/propka/parameters.py b/propka/parameters.py
index b708fce..5b26dee 100644
--- a/propka/parameters.py
+++ b/propka/parameters.py
@@ -1,6 +1,4 @@
 """Holds parameters and settings."""
-import pkg_resources
-import propka.lib as lib
 from propka.lib import info, warning
 
 
@@ -346,7 +344,7 @@ def print_interactions_latex(self):
                    'N1', 'O2', 'OP', 'SH']
         lines = [
             "",
-            "\\begin{longtable}{{{0:s}}}".format('l'*len(agroups)),
+            "\\begin{{longtable}}{{{0:s}}}".format('l'*len(agroups)),
             ("\\caption{{Ligand interaction parameters. For interactions not "
              "listed, the default value of {0:s} is applied.}}").format(
                  str(self.sidechain_cutoffs.default)),
diff --git a/propka/run.py b/propka/run.py
index e851309..633e2c5 100644
--- a/propka/run.py
+++ b/propka/run.py
@@ -1,7 +1,9 @@
 """Entry point for PROPKA script."""
 import logging
 from propka.lib import loadOptions
-from propka.molecular_container import Molecular_container
+from propka.input import read_parameter_file, read_molecule_file
+from propka.parameters import Parameters
+from propka.molecular_container import MolecularContainer
 
 
 _LOGGER = logging.getLogger("PROPKA")
@@ -13,8 +15,10 @@ def main(optargs=None):
     optargs = optargs if optargs is not None else []
     options = loadOptions(*optargs)
     pdbfiles = options.filenames
+    parameters = read_parameter_file(options.parameters, Parameters())
     for pdbfile in pdbfiles:
-        my_molecule = Molecular_container(pdbfile, options)
+        my_molecule = MolecularContainer(parameters, options)
+        my_molecule = read_molecule_file(pdbfile, my_molecule)
         my_molecule.calculate_pka()
         my_molecule.write_pka()
 
@@ -33,9 +37,11 @@ def single(pdbfile, optargs=None):
     optargs = optargs if optargs is not None else []
     options = loadOptions(*optargs)
     pdbfile = options.filenames.pop(0)
+    parameters = read_parameter_file(options.parameters, Parameters())
     if len(options.filenames) > 0:
         _LOGGER.warning("Ignoring filenames: {0:s}".format(options.filenames))
-    my_molecule = Molecular_container(pdbfile, options)
+    my_molecule = MolecularContainer(parameters, options)
+    my_molecule = read_molecule_file(pdbfile, my_molecule)
     my_molecule.calculate_pka()
     my_molecule.write_pka()
     return my_molecule
diff --git a/scripts/propka31.py b/scripts/propka31.py
index d3459c8..8a90114 100755
--- a/scripts/propka31.py
+++ b/scripts/propka31.py
@@ -11,7 +11,9 @@
 propka31.)
 """
 from propka.lib import loadOptions
-from propka.molecular_container import Molecular_container
+from propka.input import read_parameter_file, read_molecule_file
+from propka.parameters import Parameters
+from propka.molecular_container import MolecularContainer
 
 
 def main():
@@ -19,9 +21,11 @@ def main():
     # loading options, flaggs and arguments
     options = loadOptions([])
     pdbfiles = options.filenames
+    parameters = read_parameter_file(options.parameters, Parameters())
 
     for pdbfile in pdbfiles:
-        my_molecule = Molecular_container(pdbfile, options)
+        my_molecule = MolecularContainer(parameters, options)
+        my_molecule = read_molecule_file(pdbfile, my_molecule)
         my_molecule.calculate_pka()
         my_molecule.write_pka()
 
diff --git a/tests/test_basic_regression.py b/tests/test_basic_regression.py
index cd8036e..9a63854 100644
--- a/tests/test_basic_regression.py
+++ b/tests/test_basic_regression.py
@@ -5,8 +5,10 @@
 from pathlib import Path
 import pytest
 from numpy.testing import assert_almost_equal
-import propka.lib
-import propka.molecular_container
+from propka.parameters import Parameters
+from propka.molecular_container import MolecularContainer
+from propka.input import read_parameter_file, read_molecule_file
+from propka.lib import loadOptions
 
 
 _LOGGER = logging.getLogger(__name__)
@@ -64,15 +66,16 @@ def run_propka(options, pdb_path, tmp_path):
         tmp_path:  path for working directory
     """
     options += [str(pdb_path)]
-    args = propka.lib.loadOptions(options)
+    args = loadOptions(options)
     try:
         _LOGGER.warning(
             "Working in tmpdir {0:s} because of PROPKA file output; "
             "need to fix this.".format(str(tmp_path)))
         cwd = Path.cwd()
         os.chdir(tmp_path)
-        molecule = propka.molecular_container.Molecular_container(
-            str(pdb_path), args)
+        parameters = read_parameter_file(args.parameters, Parameters())
+        molecule = MolecularContainer(parameters, args)
+        molecule = read_molecule_file(str(pdb_path), molecule)
         molecule.calculate_pka()
         molecule.write_pka()
     finally:

From 1d17cb05ed695294956dc892205eb451a3d1cd8e Mon Sep 17 00:00:00 2001
From: Nathan Baker <nathanandrewbaker@gmail.com>
Date: Sat, 30 May 2020 13:15:45 -0700
Subject: [PATCH 5/6] Remove TODO for refactoring.

Done already.
---
 propka/calculations.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/propka/calculations.py b/propka/calculations.py
index 915b713..1c48384 100644
--- a/propka/calculations.py
+++ b/propka/calculations.py
@@ -2,15 +2,6 @@
 import math
 
 
-# TODO - this file should be broken into three separate files:
-# * calculations.py - includes basic functions for calculating distances, etc.
-# * hydrogen.py - includes bonding and protonation functions
-# * energy.py - includes energy functions (dependent on distance functions)
-
-
-# TODO - the next set of functions form a distinct "module" for distance calculation
-
-
 # Maximum distance used to bound calculations of smallest distance
 MAX_DISTANCE = 1e6
 

From 7085271ad5d5b09ae0ecc53b729a6b01defc9837 Mon Sep 17 00:00:00 2001
From: Nathan Baker <nathanandrewbaker@gmail.com>
Date: Wed, 3 Jun 2020 17:49:19 -0700
Subject: [PATCH 6/6] Simplify with .writable() method.

Addresses https://github.com/jensengroup/propka-3.1/pull/50#discussion_r434349737
---
 propka/output.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/propka/output.py b/propka/output.py
index 2ee5f89..7353779 100644
--- a/propka/output.py
+++ b/propka/output.py
@@ -13,8 +13,7 @@ def open_file_for_writing(input_file):
         then will attempt to get file mode.
     """
     try:
-        mode = input_file.mode
-        if not ("w" in mode or "a" in mode or "+" in mode):
+        if not input_file.writable():
             raise IOError("File/stream not open for writing")
         return input_file
     except AttributeError: