In [6]:
aminoacidMass = {'G':57, 'A':71, 'S':87, 'P':97, 'V':99, 'T':101, 'C':103, 'I':113, 'L':113, 'N':114, 'D':115, 'K':128, 'Q':128, 'E':129, 'M':131, 'H':137, 'F':147, 'R':156, 'Y':163, 'W':186}
def linearSpectrum(peptide):
    """Input: An amino acid string Peptide.
     Output: The linear spectrum of Peptide."""
    prefixMass = [0]*((len(peptide)+1))
    for i in xrange(len(peptide)):
        prefixMass[i+1] = prefixMass[i] + aminoacidMass[peptide[i]]
    #print 'prefixMass', prefixMass
    linear_spectrum = [0]
    for i in xrange(len(prefixMass)-1):
        for j in xrange(i+1, len(prefixMass)):
            linear_spectrum.append(prefixMass[j] - prefixMass[i])
    return sorted(linear_spectrum)    

In [7]:
from collections import Counter
def score_linear_peptide(peptide, spectrum):
    """Compute the score of a linear peptide with respect to a spectrum.
     Input: An amino acid string Peptide and a collection of integers Spectrum.
     Output: The linear score of Peptide with respect to Spectrum, LinearScore(Peptide, Spectrum)."""
    spectrum_linear_peptide = linearSpectrum(peptide)
    c3, c4 = Counter(spectrum_linear_peptide), Counter(spectrum)
    return sum([min(n, c4[k]) for k,n in c3.items()])

In [41]:
score_linear_peptide('NKEL', [0, 99, 113, 114, 128, 227, 257, 299, 355, 356, 370, 371, 484])

8

In [9]:
score_linear_peptide('PEEP', [0, 97, 129, 129, 129, 194, 226, 323, 323, 355, 452])

7

In [44]:
#Read the data containing peptide and spectrum to compute linear score wrt spectrum.
f = open('input/rosalind_ba4k.txt')
peptide2, spectrum2 = [line.strip() if i==0 else map(int,line.strip().split()) for i, line in enumerate(f.readlines())]

In [45]:
#Call function
score_linear_peptide(peptide2, spectrum2)

544

In [12]:
def trim_leaderboard(leaderboard, spectrum, N):
    """Input: A collection of peptides Leaderboard, a collection of integers Spectrum, and an integer N.
     Output: The N highest-scoring linear peptides on Leaderboard with respect to Spectrum."""
    scores =  [[score_linear_peptide(peptide, spectrum), peptide] for peptide in leaderboard]
    sorted_scores = sorted(scores, reverse = True)
    if len(leaderboard) <= N:
        return [i[1] for i in sorted_scores]
    else:
        return [i[1] for i in sorted_scores if i[0] >= sorted_scores[int(N)-1][0]]

In [13]:
trim_leaderboard(['LAST', 'ALST', 'TLLT', 'TQAS'], [0, 71, 87, 101, 113, 158, 184, 188, 259, 271, 372], 2)

[[11, 'LAST'], [9, 'ALST'], [3, 'TLLT'], [5, 'TQAS']]
[[11, 'LAST'], [9, 'ALST'], [5, 'TQAS'], [3, 'TLLT']]


['LAST', 'ALST']

### Final algorithm for Trimming the peptide leaderboard

In [49]:
aminoacidMass = {'G':57, 'A':71, 'S':87, 'P':97, 'V':99, 'T':101, 'C':103, 'I':113, 'L':113, 'N':114, 'D':115, 'K':128, 'Q':128, 'E':129, 'M':131, 'H':137, 'F':147, 'R':156, 'Y':163, 'W':186}
def linearSpectrum(peptide):
    """Input: An amino acid string Peptide.
     Output: The linear spectrum of Peptide."""
    prefixMass = [0]*((len(peptide)+1))
    for i in xrange(len(peptide)):
        prefixMass[i+1] = prefixMass[i] + aminoacidMass[peptide[i]]
    #print 'prefixMass', prefixMass
    linear_spectrum = [0]
    for i in xrange(len(prefixMass)-1):
        for j in xrange(i+1, len(prefixMass)):
            linear_spectrum.append(prefixMass[j] - prefixMass[i])
    return sorted(linear_spectrum)    

from collections import Counter
def score_linear_peptide(peptide, spectrum):
    """Compute the score of a linear peptide with respect to a spectrum.
     Input: An amino acid string Peptide and a collection of integers Spectrum.
     Output: The linear score of Peptide with respect to Spectrum, LinearScore(Peptide, Spectrum)."""
    spectrum_linear_peptide = linearSpectrum(peptide)
    c3, c4 = Counter(spectrum_linear_peptide), Counter(spectrum)
    return sum([min(n, c4[k]) for k,n in c3.items()])

def trim_leaderboard(leaderboard, spectrum, N):
    """Input: A collection of peptides Leaderboard, a collection of integers Spectrum, and an integer N.
     Output: The N highest-scoring linear peptides on Leaderboard with respect to Spectrum."""
    scores =  [[score_linear_peptide(peptide, spectrum), peptide] for peptide in leaderboard]
    sorted_scores = sorted(scores, reverse = True)
    if len(leaderboard) <= N:
        return [i[1] for i in sorted_scores]
    else:
        return [i[1] for i in sorted_scores if i[0] >= sorted_scores[int(N)-1][0]]

#Reading the iput from .txt file
f = open('input/rosalind_ba4l.txt', 'r')
leaderboard, spectrum, n = [line.split() for line in f]
spectrum = map(int, spectrum)
N = int(n.pop())

#Calling the function
ans = trim_leaderboard(leaderboard, spectrum, N)

#Print the output in desired format
for i in ans:
    print i

LTACRNHQLANICHAKPFRLTQMSYCHYRNTNGRHW
QVDATLPIACGMFHSWHDAWSHPMKKDHCPIIGMKQ
MKCEPISVEKVVPMMDTPPLERRDSFYDRWSIDYVK
HFNLTPCQNNVIAWFGRKITYPHEHSMDQYYEGVWD
IFFFRVDTYGYSRQHNYCAGYMSACGPENCLIVWLC
WHIFFLDVNFCMMPVNCGQGVAYIWNPQEIIGAKAL
KWNRAVQIAKWTITNMIFPHHVVITLIQSRGSRVRM
PIKTLNPMMEIWIAGPWDMDGVSVIMWSPIAIREKK
KFDNGWNGGWEWCLIFEQWQLCAIVRIPTYQDGHDA
TMPHISIRFGTYYHYRVMVLRIRGSDNGRFHKLNHH
KCDMATDQYWRGGEFPGFTMEIHHCWIFQWSPCKPA
GYYYMWPVQANTKNNIFWLWAFMAKRRLSTYWAIYL
STENTCTSVPEQESQCCHENFVMPGCRQWDLGRESD
NILYRSAQGKFHGANSHFMWWRLLPYWGAGFLLIPQ
GFVKITARLVCYGYEQRWEFCLYGVGWCYKTSTQHD


In [46]:
len(leaderboard)

15