In [34]:
def expand(leaderboard, frequent_aa):
    """Expands each peptide/aminoacid in leaderboard by all 18 aminoacid masses."""
    return [i+(j,) for i in leaderboard for j in frequent_aa] 

In [38]:
x = expand([()], [129, 137, 71, 99, 57, 194])
print x
print expand(x, [129, 137, 71, 99, 57, 194])

[(129,), (137,), (71,), (99,), (57,), (194,)]
[(129, 129), (129, 137), (129, 71), (129, 99), (129, 57), (129, 194), (137, 129), (137, 137), (137, 71), (137, 99), (137, 57), (137, 194), (71, 129), (71, 137), (71, 71), (71, 99), (71, 57), (71, 194), (99, 129), (99, 137), (99, 71), (99, 99), (99, 57), (99, 194), (57, 129), (57, 137), (57, 71), (57, 99), (57, 57), (57, 194), (194, 129), (194, 137), (194, 71), (194, 99), (194, 57), (194, 194)]


In [42]:
def cyclicSpectrum(peptide):
    """Input: (aminoacid masses)
     Output: The cyclic spectrum of Peptide."""
    prefixMass = [0]*((len(peptide)+1))
    for i in range(len(peptide)):
        prefixMass[i+1] = prefixMass[i] + peptide[i]
    peptideMass = prefixMass[len(peptide)]
    cyclic_spectrum = [0]
    for i in range(len(prefixMass)-1):
        for j in range(i+1, len(prefixMass)):
            cyclic_spectrum.append(prefixMass[j] - prefixMass[i])
            if i > 0 and j < (len(prefixMass)-1):
                cyclic_spectrum.append(peptideMass - (prefixMass[j] - prefixMass[i]))
    return sorted(cyclic_spectrum) 

In [41]:
cyclicSpectrum((113,129,128,114))

[0, 113, 114, 128, 129, 227, 242, 242, 257, 355, 356, 370, 371, 484]

In [None]:
from collections import Counter
def score_peptide(peptide, spectrum):
    """Cyclopeptide Scoring Problem: Compute the score of a cyclic peptide against a spectrum.
     Input: (aa masses) and a collection of integers Spectrum. 
     Output: The score of Peptide against Spectrum, Score(Peptide, Spectrum)."""
    spectrum_peptide = cyclicSpectrum(peptide)
    c1, c2 = Counter(spectrum_peptide), Counter(spectrum)
    return sum([min(n, c2[k]) for k,n in c1.items()])

In [44]:
score_peptide((114,128,129,113), [0, 99, 113, 114, 128, 227, 257, 299, 355, 356, 370, 371, 484])

11

In [45]:
score_peptide((), [0, 99, 113, 114, 128, 227, 257, 299, 355, 356, 370, 371, 484])

1

In [None]:
def linearSpectrum(peptide):
    """Input: An amino acid string Peptide.
     Output: The linear spectrum of Peptide."""
    prefixMass = [0]*((len(peptide)+1))
    for i in xrange(len(peptide)):
        prefixMass[i+1] = prefixMass[i] + peptide[i]
    #print 'prefixMass', prefixMass
    linear_spectrum = [0]
    for i in xrange(len(prefixMass)-1):
        for j in xrange(i+1, len(prefixMass)):
            linear_spectrum.append(prefixMass[j] - prefixMass[i])
    return sorted(linear_spectrum) 

def score_linear_peptide(peptide, spectrum):
    """Compute the score of a linear peptide with respect to a spectrum.
     Input: An amino acid string Peptide and a collection of integers Spectrum.
     Output: The linear score of Peptide with respect to Spectrum, LinearScore(Peptide, Spectrum)."""
    spectrum_linear_peptide = linearSpectrum(peptide)
    c3, c4 = Counter(spectrum_linear_peptide), Counter(spectrum)
    return sum([min(n, c4[k]) for k,n in c3.items()])


In [46]:
score_linear_peptide((114,128,129,113), [0, 99, 113, 114, 128, 227, 257, 299, 355, 356, 370, 371, 484])

8

In [None]:
def trim_leaderboard(leaderboard, spectrum, N):
    """Input: A collection of peptides Leaderboard, a collection of integers Spectrum, and an integer N.
     Output: The N highest-scoring linear peptides on Leaderboard with respect to Spectrum."""
    scores =  [[score_linear_peptide(peptide, spectrum), peptide] for peptide in leaderboard]
    sorted_scores = sorted(scores, reverse = True)
    if len(leaderboard) <= N:
        return [i[1] for i in sorted_scores]
    else:
        return [i[1] for i in sorted_scores if i[0] >= sorted_scores[int(N)-1][0]]

In [None]:
trim_leaderboard(['LAST', 'ALST', 'TLLT', 'TQAS'], [0, 71, 87, 101, 113, 158, 184, 188, 259, 271, 372], 2)

In [1]:
def convolution(spectrum):
    
    """Input: A collection of integers Spectrum.
     Output: The list of elements in the convolution of Spectrum. If an element has
     multiplicity k, it should appear exactly k times; you may return the elements in any order."""
    
    spectrum = sorted(spectrum)
    return [i-j for i in spectrum for j in spectrum if i-j > 0]   

In [26]:
from collections import Counter
def convolution_cyclopeptide_sequencing(M, N, spectrum):
    
    #Get the sorted convolution
    convoluted_spectrum = sorted(convolution(spectrum))
    
    #Select elements from convolution betweeen 57 and 200
    aa = [i for i in convoluted_spectrum if 57<=i<=200]
    
    #Select the M most frequent elements between 57 and 200 in the convolution with ties
    if len(aa) < M:
        frequent_aa = aa
    else:
        c = Counter(aa).most_common()
        frequent_aa = [k for (k,v) in c if v >= c[M-1][1]]
    return frequent_aa

In [27]:
print convolution_cyclopeptide_sequencing(20, 60, [57, 57, 71, 99, 129, 137, 170, 186, 194, 208, 228, 265, 285, 299, 307, 323, 356, 364, 394, 422, 493])

[(129, 7), (137, 7), (71, 7), (99, 7), (57, 6), (194, 5), (170, 5), (186, 5), (58, 4), (79, 4), (91, 4), (95, 4), (113, 4), (115, 4), (128, 2), (136, 2), (148, 2), (151, 2), (156, 2), (157, 2), (162, 2), (166, 2), (171, 2), (200, 2), (178, 2), (65, 2), (66, 2), (72, 2), (80, 2), (87, 2), (109, 2), (123, 2), (153, 1), (77, 1), (105, 1), (121, 1)]
[129, 137, 71, 99, 57, 194, 170, 186, 58, 79, 91, 95, 113, 115, 128, 136, 148, 151, 156, 157, 162, 166, 171, 200, 178, 65, 66, 72, 80, 87, 109, 123]


In [32]:
() + (65,)

(65,)

In [31]:
for i in '6566':
    print i

6
5
6
6
