# Sequencing proteins by mass spectrometry

Represent a peptide by a sequence of integers denoting the peptide’s constituent
amino acid masses.

In [1]:
# weights from textbook Figure 4.6
# G  A  S  P  V  T   C   I   L   N   D   K   Q   E   M   H   F   R   Y   W
# 57 71 87 97 99 101 103 113 113 114 115 128 128 129 131 137 147 156 163 186

def makeweights():
    table = {}
    aas     = "G  A  S  P  V  T   C   I   L   N   D   K   Q   E   M   H   F   R   Y   W".split()
    weights = list(map(int, "57 71 87 97 99 101 103 113 113 114 115 128 128 129 131 137 147 156 163 186".split()))
    for i in range(len(aas)):
        table[aas[i]] = weights[i]
    return table

In [2]:
aa_wt_table = makeweights()
aa_wt_table

{'G': 57,
 'A': 71,
 'S': 87,
 'P': 97,
 'V': 99,
 'T': 101,
 'C': 103,
 'I': 113,
 'L': 113,
 'N': 114,
 'D': 115,
 'K': 128,
 'Q': 128,
 'E': 129,
 'M': 131,
 'H': 137,
 'F': 147,
 'R': 156,
 'Y': 163,
 'W': 186}

In [3]:
def convert(sequence):
    return [ aa_wt_table[aa] for aa in sequence ]

In [4]:
convert("NQEL")

[114, 128, 129, 113]

In [5]:
def mass(sequence):
    weight = 0
    for aa in sequence:
        weight += aa
    return (weight)

In [6]:
def ParentMass(spectrum):
    return spectrum[-1]

In [7]:
# cyclospectrum (problem 4C)

def Cyclospectrum(peptide):
    # Generate all fragments
    fragments = [[], peptide] 
    for i in range(len(peptide)):
        for j in range(1,len(peptide)):
            start = i
            end = i + j
            
            fragment = (peptide+peptide)[start:end]
            fragments.append(fragment)

    # Calculate the spectrum
    spectrum = []
    for fragment in fragments:
        spectrum.append(mass(fragment))
    return sorted(spectrum)

In [11]:
# Linear spectrum 

def Linearspectrum(peptide):
    # Generate all fragments
    fragments = [[], peptide] 
    for i in range(len(peptide)-1):
        for j in range(i+1,len(peptide)):
            
            fragment = (peptide)[i:j]
            fragments.append(fragment)

    # Calculate the spectrum
    spectrum = []
    for fragment in fragments:
        spectrum.append(mass(fragment))
    return sorted(spectrum)

In [8]:
Cyclospectrum(convert("NQEL"))

[0, 113, 114, 128, 129, 227, 242, 242, 257, 355, 356, 370, 371, 484]

In [12]:
Linearspectrum(convert("NQEL"))

[0, 114, 128, 129, 242, 257, 371, 484]

```
CYCLOPEPTIDESEQUENCING(Spectrum)
  Peptides <- a set containing only the empty peptide
  while Peptides is nonempty
    Peptides <- EXPAND(Peptides)
    for each peptide Peptide in Peptides
      if MASS(Peptide) = PARENTMASS(Spectrum)
        if CYCLOSPECTRUM(Peptide) = Spectrum
          output Peptide
        remove Peptide from Peptides
      else if Peptide is not consistent with Spectrum
        remove Peptide from Peptides
```

In [30]:
def expand(peptides):
    aaws = list(set(aa_wt_table.values()))
    newcoll = []
    for peptide in peptides:
        # newset.add(peptide)
        for aa in aaws:
            newcoll.append(peptide + [aa])
    return newcoll

In [14]:
def consistent(Peptide, Spectrum):
    circular = Spectrum[:]
    linear = Linearspectrum(Peptide)
    for wt in linear:
        if wt not in circular:
            return False
        else:
            circular.remove(wt)
        
    return True

In [16]:
consistent((convert("NQEL")), Cyclospectrum(convert("NQEL")))

True

In [21]:
def output(peptide):
    print("-".join(list(map(str,peptide))))

In [27]:
def CycloPeptideSequencing(Spectrum):
    Peptides = [[]] # a list containing only the empty list
    while len(Peptides) > 0:
        Peptides = expand(Peptides)
        #print(len(Peptides))
        for Peptide in Peptides:
            if mass(Peptide) == ParentMass(Spectrum):
                if Spectrum == Cyclospectrum(Peptide):
                    output(Peptide)
                Peptides.remove(Peptide)
            elif not consistent(Peptide, Spectrum):
                #print(peptide, peptides)
                Peptides.remove(Peptide)

In [None]:
CycloPeptideSequencing(Cyclospectrum(convert("NQEL")))

128-114-113-129
129-128-114-113
129-113-114-128
113-114-128-129
114-128-129-113
114-113-129-128
