Skip to content

Commit

Permalink
Introduce charge_at_pH functionality in ProteinAnalysis (and Isoelect…
Browse files Browse the repository at this point in the history
…ricPoint) (#2096)

* Introduce charge_at_pH functionality

* Change single to double quotes in IsoelectricPoint.py

* Update NEWS.rst

* D413 issue after solving merge conflict
  • Loading branch information
MarkusPiotrowski committed Oct 3, 2019
1 parent 4c7a031 commit 849dc45
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 56 deletions.
140 changes: 94 additions & 46 deletions Bio/SeqUtils/IsoelectricPoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,96 +31,144 @@


class IsoelectricPoint(object):
"""A class with one public method to calculate the pI of a protein.
"""A class for calculating the IEP or charge at given pH of a protein.
Parameters
----------
:protein_sequence: A ``Bio.Seq`` or string object containing a protein
sequence.
:aa_content: A dictionary with amino acid letters as keys and it's
occurences as integers, e.g. ``{"A": 3, "C": 0, ...}``.
Default: ``None``. If ``None``, the dic will be calculated
from the given sequence.
Methods
-------
:charge_at_pH(pH): Calculates the charge of the protein for a given pH
:pi(): Calculates the isoelectric point
Examples
--------
The methods of this class can either be accessed from the class itself
or from a ``ProtParam.ProteinAnalysis`` object (with partially different
names):
>>> from Bio.SeqUtils.IsoelectricPoint import IsoelectricPoint as IP
>>> protein = IP("INGAR")
>>> print("IEP of peptide {} is {:.2f}"
... .format(protein.sequence, protein.pi()))
IEP of peptide INGAR is 9.75
>>> print("It's charge at pH 7 is {:.2f}"
... .format(protein.charge_at_pH(7.0)))
It's charge at pH 7 is 0.76
>>> from Bio.SeqUtils.ProtParam import ProteinAnalysis as PA
>>> protein = PA("PETER")
>>> print("IEP of {}: {:.2f}".format(protein.sequence,
... protein.isoelectric_point()))
IEP of PETER: 4.53
>>> print("Charge at pH 4.53: {:.2f}"
... .format(protein.charge_at_pH(4.53)))
Charge at pH 4.53: 0.00
Access this class through ProtParam.ProteinAnalysis class.
First make a ProteinAnalysis object and then call its isoelectric_point
method.
"""

def __init__(self, ProteinSequence, AminoAcidsContent):
def __init__(self, protein_sequence, aa_content=None):
"""Initialize the class."""
self.sequence = ProteinSequence
self.charged_aas_content = self._select_charged(AminoAcidsContent)
self.sequence = str(protein_sequence).upper()
if not aa_content:
from Bio.SeqUtils.ProtParam import ProteinAnalysis as _PA
aa_content = _PA(self.sequence).count_amino_acids()
self.charged_aas_content = self._select_charged(aa_content)

self.pos_pKs, self.neg_pKs = self._update_pKs_tables()

# This function creates a dictionary with the contents of each charged aa,
# plus Cterm and Nterm.
def _select_charged(self, AminoAcidsContent):
def _select_charged(self, aa_content):
charged = {}
for aa in charged_aas:
charged[aa] = float(AminoAcidsContent[aa])
charged[aa] = float(aa_content[aa])
charged["Nterm"] = 1.0
charged["Cterm"] = 1.0
return charged

def _update_pKs_tables(self):
"""Update pKs tables with seq specific values for N- and C-termini."""
pos_pKs = positive_pKs.copy()
neg_pKs = negative_pKs.copy()
nterm, cterm = self.sequence[0], self.sequence[-1]
if nterm in pKnterminal:
pos_pKs["Nterm"] = pKnterminal[nterm]
if cterm in pKcterminal:
neg_pKs["Cterm"] = pKcterminal[cterm]
return pos_pKs, neg_pKs

# This function calculates the total charge of the protein at a given pH.
def _chargeR(self, pH, pos_pKs, neg_pKs):
PositiveCharge = 0.0
for aa, pK in pos_pKs.items():
def _chargeR(self, pH):
positive_charge = 0.0
for aa, pK in self.pos_pKs.items():
CR = 10 ** (pK - pH)
partial_charge = CR / (CR + 1.0)
PositiveCharge += self.charged_aas_content[aa] * partial_charge
positive_charge += self.charged_aas_content[aa] * partial_charge

NegativeCharge = 0.0
for aa, pK in neg_pKs.items():
negative_charge = 0.0
for aa, pK in self.neg_pKs.items():
CR = 10 ** (pH - pK)
partial_charge = CR / (CR + 1.0)
NegativeCharge += self.charged_aas_content[aa] * partial_charge
negative_charge += self.charged_aas_content[aa] * partial_charge

return PositiveCharge - NegativeCharge
return positive_charge - negative_charge

def charge_at_pH(self, pH):
"""Calculate the charge of a protein at given pH."""
return self._chargeR(pH)

# This is the action function, it tries different pH until the charge of
# the protein is 0 (or close).
def pi(self):
"""Calculate and return the isoelectric point as float."""
pos_pKs = dict(positive_pKs)
neg_pKs = dict(negative_pKs)
nterm = self.sequence[0]
cterm = self.sequence[-1]
if nterm in pKnterminal:
pos_pKs["Nterm"] = pKnterminal[nterm]
if cterm in pKcterminal:
neg_pKs["Cterm"] = pKcterminal[cterm]

# Bracket between pH1 and pH2
pH = 7.0
Charge = self._chargeR(pH, pos_pKs, neg_pKs)
if Charge > 0.0:
pH = 7
charge = self._chargeR(pH)
if charge > 0.0:
pH1 = pH
Charge1 = Charge
while Charge1 > 0.0:
charge1 = charge
while charge1 > 0.0:
pH = pH1 + 1.0
Charge = self._chargeR(pH, pos_pKs, neg_pKs)
if Charge > 0.0:
charge = self._chargeR(pH)
if charge > 0.0:
pH1 = pH
Charge1 = Charge
charge1 = charge
else:
pH2 = pH
Charge2 = Charge
charge2 = charge
break
else:
pH2 = pH
Charge2 = Charge
while Charge2 < 0.0:
charge2 = charge
while charge2 < 0.0:
pH = pH2 - 1.0
Charge = self._chargeR(pH, pos_pKs, neg_pKs)
if Charge < 0.0:
charge = self._chargeR(pH)
if charge < 0.0:
pH2 = pH
Charge2 = Charge
charge2 = charge
else:
pH1 = pH
Charge1 = Charge
charge1 = charge
break

# Bisection
while pH2 - pH1 > 0.0001 and Charge != 0.0:
while pH2 - pH1 > 0.0001 and charge != 0.0:
pH = (pH1 + pH2) / 2.0
Charge = self._chargeR(pH, pos_pKs, neg_pKs)
if Charge > 0.0:
charge = self._chargeR(pH)
if charge > 0.0:
pH1 = pH
Charge1 = Charge
charge1 = charge
else:
pH2 = pH
Charge2 = Charge
charge2 = charge

return pH
7 changes: 7 additions & 0 deletions Bio/SeqUtils/ProtParam.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
- gravy
- protein_scale
- flexibility
- charge_at_pH
"""

Expand Down Expand Up @@ -296,6 +297,12 @@ def isoelectric_point(self):
ie_point = IsoelectricPoint.IsoelectricPoint(self.sequence, aa_content)
return ie_point.pi()

def charge_at_pH(self, pH):
"""Calculate the charge of a protein at given pH."""
aa_content = self.count_amino_acids()
charge = IsoelectricPoint.IsoelectricPoint(self.sequence, aa_content)
return charge.charge_at_pH(pH)

def secondary_structure_fraction(self):
"""Calculate fraction of helix, turn and sheet.
Expand Down
3 changes: 3 additions & 0 deletions NEWS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ pretty print method ``format_alignment`` to restore the output of local
alignments to the 'old' format (showing the whole sequences including the
un-aligned parts instead of only showing the aligned parts).

A new function ``charge_at_pH(pH)`` has been added to ``ProtParam`` and
``IsoelectricPoint`` in ``Bio.SeqUtils``.

As in recent releases, more of our code is now explicitly available under
either our original "Biopython License Agreement", or the very similar but
more commonly used "3-Clause BSD License". See the ``LICENSE.rst`` file for
Expand Down
26 changes: 16 additions & 10 deletions Tests/test_ProtParam.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,25 @@ def test_get_amino_acids_percent(self):

def test_get_molecular_weight(self):
"""Calculate protein molecular weight."""
self.assertAlmostEqual(round(self.analysis.molecular_weight(), 2),
17103.16)
self.assertAlmostEqual(self.analysis.molecular_weight(), 17103.16, 2)

def test_get_monoisotopic_molecular_weight(self):
"""Calculate monoisotopic molecular weight."""
self.analysis = ProtParam.ProteinAnalysis(self.seq_text, monoisotopic=True)
self.assertAlmostEqual(round(self.analysis.molecular_weight(), 2),
17092.61)
self.assertAlmostEqual(self.analysis.molecular_weight(), 17092.61, 2)

def test_get_molecular_weight_identical(self):
"""Confirm protein molecular weight agrees with calculation from Bio.SeqUtils."""
# This test is somehow useless, since ProteinAnalysis.molecular_weight
# is internally calling SeqUtils.molecular_weight.
mw_1 = self.analysis.molecular_weight()
mw_2 = molecular_weight(Seq(self.seq_text, IUPAC.protein))
self.assertAlmostEqual(mw_1, mw_2)

def test_get_monoisotopic_molecular_weight_identical(self):
"""Confirm protein molecular weight agrees with calculation from Bio.SeqUtils."""
# This test is somehow useless, since ProteinAnalysis.molecular_weight
# is internally calling SeqUtils.molecular_weight.
self.analysis = ProtParam.ProteinAnalysis(self.seq_text, monoisotopic=True)
mw_1 = self.analysis.molecular_weight()
mw_2 = molecular_weight(Seq(self.seq_text, IUPAC.protein), monoisotopic=True)
Expand All @@ -62,12 +64,12 @@ def test_get_monoisotopic_molecular_weight_identical(self):
def test_aromaticity(self):
"""Calculate protein aromaticity."""
# Old test used a number rounded to two digits, so use the same
self.assertEqual(round(self.analysis.aromaticity(), 2), 0.10)
self.assertAlmostEqual(self.analysis.aromaticity(), 0.10, 2)

def test_instability_index(self):
"""Calculate protein instability index."""
# Old test used a number rounded to two digits, so use the same
self.assertEqual(round(self.analysis.instability_index(), 2), 41.98)
self.assertAlmostEqual(self.analysis.instability_index(), 41.98, 2)

def test_flexibility(self):
"""Calculate protein flexibility."""
Expand Down Expand Up @@ -128,15 +130,19 @@ def test_flexibility(self):
def test_isoelectric_point(self):
"""Calculate the isoelectric point."""
# Old test used a number rounded to two digits, so use the same
self.assertAlmostEqual(round(self.analysis.isoelectric_point(), 2), 7.72)
self.assertAlmostEqual(self.analysis.isoelectric_point(), 7.72, 2)

def test_charge_at_pH(self):
"""Test charge_at_pH function."""
self.assertAlmostEqual(self.analysis.charge_at_pH(7.72), 0.00, 2)

def test_secondary_structure_fraction(self):
"""Calculate secondary structure fractions."""
helix, turn, sheet = self.analysis.secondary_structure_fraction()
# Old test used numbers rounded to two digits, so use the same
self.assertAlmostEqual(round(helix, 2), 0.28)
self.assertAlmostEqual(round(turn, 2), 0.26)
self.assertAlmostEqual(round(sheet, 2), 0.25)
self.assertAlmostEqual(helix, 0.28, 2)
self.assertAlmostEqual(turn, 0.26, 2)
self.assertAlmostEqual(sheet, 0.25, 2)

def test_protein_scale(self):
"""Calculate the Kite Doolittle scale."""
Expand Down

0 comments on commit 849dc45

Please sign in to comment.