In [26]:
negative_aa = set(['D', 'E'])
positive_aa = set(['R', 'K'])
nonpolar_aa = set(['A','V', 'L', 'I', 'P', 'F', 'M', 'W', 'G', 'C'])
polar_aa = set(['N', 'Q', 'S', 'T', 'Y'])

In [34]:
def print_stats(seq):
    neg_counter = pos_counter = nonpolar_counter = polar_counter = his_counter = 0
    for i in seq:
        if i in negative_aa:
            neg_counter += 1
        elif i in positive_aa:
            pos_counter += 1
        elif i in nonpolar_aa:
            nonpolar_counter += 1
        elif i in polar_aa:
            polar_counter += 1
        if i == 'H':
            his_counter += 1
    # naive implementation for sequence charge using HH-equation
    print("Negatively charged: {0}/{1} ({2}%)".format(neg_counter, len(seq), neg_counter/len(seq)*100))
    print("Postively charged: {0}/{1} ({2}%)".format(pos_counter, len(seq), pos_counter/len(seq)*100))    
    print("Polar: {0}/{1} ({2}%)".format(polar_counter, len(seq), polar_counter/len(seq)*100))
    print("Non-Polar: {0}/{1} ({2}%)".format(nonpolar_counter, len(seq), nonpolar_counter/len(seq)*100))
    print("Net charge at pH=7.4: {0}".format(pos_counter - neg_counter))

In [35]:
print_stats("GSHMAARRGALIVLEGVDRAGKSTQSRKLVEALCAAGHRAELLRFPERSTEIGKLLSSYLQKKSDVEDHSVHLLFSANRWEQVPLIKEKLSQGVTLVVDRYAFSGVAFTGAKENFSLDWCKQPDVGLPKPDLVLFLQLQLADAAKRGAFGHERYENGAFQERALRCFHQLMKDTTLNWKMVDASKSIEAVHEDIRVLSEDAIATATEKPLGELWK")

Negatively charged: 29/215 (13.488372093023257%)
Postively charged: 30/215 (13.953488372093023%)
Polar: 39/215 (18.13953488372093%)
Non-Polar: 110/215 (51.162790697674424%)
Net charge at pH=7.4: 1


Histidine is generally considered neutral at pH=7.4, which is backed by the Henderson-Hasselbalch equation shown below:

$$7.4=6+\log{\mathrm{\frac{[His]}{[His^+]}}}$$
$$1.4=\log{\mathrm{\frac{[His]}{[His^+]}}}$$
$$25=\mathrm{\frac{[His]}{[His^+]}}$$
$$\mathrm{[His]}=25\mathrm{[His^+]}$$

Then the charge is roughly: $\frac{1}{1+25}=0.038\approx0$

In [2]:
from Bio.SeqUtils.ProtParam import ProteinAnalysis

seq = ProteinAnalysis("GSHMAARRGALIVLEGVDRAGKSTQSRKLVEALCAAGHRAELLRFPERSTEIGKLLSSYLQKKSDVEDHSVHLLFSANRWEQVPLIKEKLSQGVTLVVDRYAFSGVAFTGAKENFSLDWCKQPDVGLPKPDLVLFLQLQLADAAKRGAFGHERYENGAFQERALRCFHQLMKDTTLNWKMVDASKSIEAVHEDIRVLSEDAIATATEKPLGELWK")
print(seq.charge_at_pH(7.4))
print(seq.isoelectric_point())

0.716780683040021
7.902744102478028


In [1]:
from Bio.SeqUtils.ProtParam import ProteinAnalysis

seq = ProteinAnalysis("GSHMAARRGALIVLEGVDRAGKSTQSRKLVEALCAAGHRAELLRFPERSTEIGKLLSSYLQKKSDVEDHSVHLLFSANRWEQVPLIKEKLSQGVTLVVDRYAFSGVAFTGAKENFSLDWCKQPDVGLPKPDLVLFLQLQLADAAKRGAFGHERYENGAFQERALRCFHQLMKDTTLNWKMVDASKSIEAVHEDIRVLSEDAIATATEKPLGELWK")
print(seq.charge_at_pH(7.4))
print(seq.isoelectric_point())

0.716780683040021
7.902744102478028


In [2]:
from Bio.SeqUtils.ProtParam import ProteinAnalysis

seq = ProteinAnalysis("GIGKFLHSAKKFGKAFVGEIMNS")
print(seq.charge_at_pH(7.4))
print(seq.isoelectric_point())

2.585177699013698
10.00138339996338
