In [1]:
aminoacidMass = {'G':57, 'A':71, 'S':87, 'P':97, 'V':99, 'T':101, 'C':103, 'L':113, 'N':114, 'D':115, 'K':128, 'E':129, 'M':131, 'H':137, 'F':147, 'R':156, 'Y':163, 'W':186}
def expand(leaderboard):
    expanded = []
    for i in leaderboard:
        expanded += [i+j for j in aminoacidMass.keys()]
    return expanded   

In [2]:
print expand(['K', 'L', 'W'])

['KA', 'KC', 'KE', 'KD', 'KG', 'KF', 'KH', 'KK', 'KM', 'KL', 'KN', 'KP', 'KS', 'KR', 'KT', 'KW', 'KV', 'KY', 'LA', 'LC', 'LE', 'LD', 'LG', 'LF', 'LH', 'LK', 'LM', 'LL', 'LN', 'LP', 'LS', 'LR', 'LT', 'LW', 'LV', 'LY', 'WA', 'WC', 'WE', 'WD', 'WG', 'WF', 'WH', 'WK', 'WM', 'WL', 'WN', 'WP', 'WS', 'WR', 'WT', 'WW', 'WV', 'WY']


In [3]:
def cyclicSpectrum(peptide):
    """Input: An amino acid string Peptide.
     Output: The cyclic spectrum of Peptide."""
    prefixMass = [0]*((len(peptide)+1))
    for i in range(len(peptide)):
        prefixMass[i+1] = prefixMass[i] + aminoacidMass[peptide[i]]
    peptideMass = prefixMass[len(peptide)]
    cyclic_spectrum = [0]
    for i in range(len(prefixMass)-1):
        for j in range(i+1, len(prefixMass)):
            cyclic_spectrum.append(prefixMass[j] - prefixMass[i])
            if i > 0 and j < (len(prefixMass)-1):
                cyclic_spectrum.append(peptideMass - (prefixMass[j] - prefixMass[i]))
    return sorted(cyclic_spectrum) 

In [4]:
from collections import Counter
def score_peptide(peptide, spectrum):
    """Cyclopeptide Scoring Problem: Compute the score of a cyclic peptide against a spectrum.
     Input: An amino acid string Peptide and a collection of integers Spectrum. 
     Output: The score of Peptide against Spectrum, Score(Peptide, Spectrum)."""
    spectrum_peptide = cyclicSpectrum(peptide)
    c1, c2 = Counter(spectrum_peptide), Counter(spectrum)
    return sum([min(n, c2[k]) for k,n in c1.items()])

In [14]:
def linearSpectrum(peptide):
    """Input: An amino acid string Peptide.
     Output: The linear spectrum of Peptide."""
    prefixMass = [0]*((len(peptide)+1))
    for i in range(len(peptide)):
        prefixMass[i+1] = prefixMass[i] + aminoacidMass[peptide[i]]
    #print 'prefixMass', prefixMass
    linear_spectrum = [0]
    for i in range(len(prefixMass)-1):
        for j in range(i+1, len(prefixMass)):
            linear_spectrum.append(prefixMass[j] - prefixMass[i])
    return sorted(linear_spectrum)    

In [17]:
def linear_score(peptide, spectrum):
    """Compute the score of a linear peptide with respect to a spectrum.
     Input: An amino acid string Peptide and a collection of integers Spectrum.
     Output: The linear score of Peptide with respect to Spectrum, LinearScore(Peptide, Spectrum)."""
    spectrum_peptide = linearSpectrum(peptide)
    c1, c2 = Counter(spectrum_peptide), Counter(spectrum)
    return sum([min(n, c2[k]) for k,n in c1.items()])

In [18]:
linear_score('NKEL', [0, 99, 113, 114, 128, 227, 257, 299, 355, 356, 370, 371, 484])

8

In [20]:
#Read the data containing peptide and spectrum to compute linear score wrt spectrum.
with open('input/dataset_4913_1.txt') as f:
    peptide2, spectrum2 = [line.strip() if i==0 else map(int,line.strip().split()) for i, line in enumerate(f.readlines())]

In [21]:
#Call function
linear_score(peptide2, spectrum2)

KeyError: 'I'

In [6]:
#Read input data
with open('input/leaderboard.txt') as f:
    n, spectrum = [int(line.strip()) if i==0 else map(int,line.strip().split()) for i, line in enumerate(f.readlines())]

In [10]:
scores = dict()
# Build the intial peptides.
seq = filter(lambda L: L[0] != -1, [[score_peptide(peptide,spectrum), peptide] for peptide in expand(aminoacidMass.keys())]) 