Tabela masa aminokiselina, uključujući masu 0 "praznog" peptida:

In [40]:
amino_acid_masses = {
        '': 0,
        'G': 57,
        'A': 71,
        'S': 87,
        'P': 97,
        'V': 99,
        'T': 101,
        'C': 103,
        'I': 113,
        'L': 113,
        'N': 114,
        'D': 115,
        'K': 128,
        'Q': 128,
        'E': 129,
        'M': 131,
        'H': 137,
        'F': 147,
        'R': 156,
        'Y': 163,
        'W': 186,
    }

Funkcija **linear_spectrum** izracunava teorijski *linearni* spekrar za dati peptid **peptide**.

In [41]:
def linear_spectrum(peptide):
    n = len(peptide)
    
    prefix_mass = [0 for i in range(n+1)]
    
    for i in range(n):
        aa = peptide[i]
        prefix_mass[i+1] = prefix_mass[i] + amino_acid_masses[aa]
        
    spectrum = [0]
    
    for i in range(n):                 
        for j in range(i+1, n+1):       
            spectrum.append(prefix_mass[j] - prefix_mass[i])
            
    spectrum.sort()
    
    return spectrum

Funkcija **cyclic_spectrum** izracunava teorijski *ciklicni* spektar za dati ciklopeptid **peptide**.

In [53]:
def cyclic_spectrum(peptide):
    n = len(peptide)
    
    prefix_mass = [0 for i in range(n+1)]
    
    for i in range(n):
        aa = peptide[i]
        prefix_mass[i+1] = prefix_mass[i] + amino_acid_masses[aa]
        
    spectrum = [0]
    
    # ======== STUDENTSKI KOD ======== #
    peptids_mass = prefix_mass[-1]
    
    for i in range(n):                 
        for j in range(i+1, n+1): 
            fragment_mass = prefix_mass[j] - prefix_mass[i]
            spectrum.append(fragment_mass)

            if i > 0 and j < n:
                spectrum.append(peptids_mass - fragment_mass)
    # ================================ #
                
    spectrum.sort()
    
    return spectrum

Funkcija **extend** na svaki peptid iz liste **peptides** nadovezuje po jednu aminokiselinu (po 20 kombinacija za svaku od 20 aminokiselina) i vraća listu svih tako dobijenih proširenja peptida.

In [54]:
def extend(peptides):
    extended_peptides = []
    
    for peptide in peptides:
        for aa in amino_acid_masses.keys():
            if aa != "":
                extended_peptides.append(peptide + aa)
                
    return extended_peptides 

Funkcija **mass** izracunava masu peptida **peptide**.

In [55]:
def mass(peptide):
    total_mass = 0
    
    for aa in peptide:
        total_mass += amino_acid_masses[aa]
        
    return total_mass    

Funkcija **consistent** proverava da li je linearni spekrar datog peptida **peptide** konzistentan sa datim (ciklicnim) spektrom **target_spectrum**.

In [56]:
def consistent(peptide, target_spectrum):
    peptide_spectrum = linear_spectrum(peptide)
    
    i = 0
    j = 0
    n = len(peptide_spectrum)
    m = len(target_spectrum)
    
    # ======== STUDENTSKI KOD ======== #
    while i < n and j < m:
        if peptide_spectrum[i] == target_spectrum[j]:
            i += 1
            j += 1
        elif peptide_spectrum[i] > target_spectrum[j]:
            j += 1
        else:
            return False

    if i < n:
        return False
    else:
        return True
    
    # ================================ #

Funkcija **cyclopeptide_sequencing** pronalazi sve moguće peptide ciji je ciklicni spektar jednak datom ciklicnom spektru **target_spectrum**.

In [57]:
def cyclopeptide_sequencing(target_spectrum):
    peptides = ['']         
    results = []            
    
    target_peptide_mass = target_spectrum[-1]
    
    while len(peptides) > 0:
        extended_peptides = extend(peptides)
        
        consistent_peptides = []
        
        for peptide in extended_peptides:
            if mass(peptide) == target_peptide_mass:
                if cyclic_spectrum(peptide) == target_spectrum:
                    results.append(peptide)
            else:
                if consistent(peptide, target_spectrum):
                    consistent_peptides.append(peptide)
                
        peptides = consistent_peptides
        
    return results   

Funkcija **score** racuna u kojoj meri se spektri **peptide_spectrum** i **target_spectrum** saglasni, tj. koliko poklapanja imaju.

In [58]:
def score(peptide_spectrum, target_spectrum):
    total_score = 0
    
    i = 0
    j = 0
    n = len(peptide_spectrum)
    m = len(target_spectrum)
    
    while i < n and j < m:
        if peptide_spectrum[i] == target_spectrum[j]:
            i += 1
            j += 1
            total_score += 1
        elif peptide_spectrum[i] > target_spectrum[j]:
            j += 1
        else:
            i += 1
        
    return total_score

Funkcije **linear_score** i **cyclic_score** racunaju poklapanje *linearnog* odnosno *ciklicnog* spektra datog peptida **peptide** sa spektrom **target_spectrum**.

In [59]:
def linear_score(peptide, target_spectrum):
    peptide_linear_spectrum = linear_spectrum(peptide)
    return score(peptide_linear_spectrum, target_spectrum)

In [60]:
def cyclic_score(peptide, target_spectrum):
    peptide_cyclic_spectrum = cyclic_spectrum(peptide)
    return score(peptide_cyclic_spectrum, target_spectrum)

Funkcija **trim** vrsi skracivanje liste peptida **peptides** tako sto ih prvo rangira na osnovu linearnog skora u odnosu na spektar **target_spectrum**, a zatim odbacuje sve one koji su imali manji skor od skora **N**-tog po redu.

In [61]:
def trim(peptides, target_spectrum, N):
    if len(peptides) <= N:
        return peptides
    
    leaderboard = []           
    
    for peptide in peptides:
        peptide_score = linear_score(peptide, target_spectrum)
        leaderboard.append((peptide_score, peptide))     
        
    leaderboard.sort(reverse=True)    
    
    for i in range(N, len(leaderboard)):
        if leaderboard[i][0] < leaderboard[N-1][0]:
            break
    
    trimmed_leaderboard = leaderboard[:i]
    return [el[1] for el in trimmed_leaderboard]

Funkcija **leaderboard_cyclopeptide_sequencing** pronalazi ciklopeptid ciji je ciklicni spektar najsaglasniji sa datim spektrom **target_spectrum**. Prilikom pretrage se u svakoj iteraciji lista peptida-kandidata krati sa granicom za odsecanje **N**.

In [62]:
def leaderboard_cyclopeptide_sequencing(target_spectrum, N):
    peptides = ['']               
    
    leader_peptide = ''
    leader_peptide_score = 0


    
    # ======== STUDENTSKI KOD ======== #
    target_peptid_mass = target_spectrum[-1]
    
    while len(peptides) > 0:
        extend_peptids = extend(peptides)

        # consistent
        consistent_peptids = []

        for ext_pep in extend_peptids:
            if mass(ext_pep) == target_peptid_mass:
                # izracunaj skor 
                peptid_score = cyclic_score(ext_pep, target_spectrum)
                if peptid_score > leader_peptide_score:
                    leader_peptide_score = peptid_score
                    leader_peptide = ext_pep
            elif mass(ext_pep) < target_peptid_mass:
                consistent_peptids.append(ext_pep)
                
        peptides = trim(consistent_peptids, target_spectrum, N)
    # ================================ #
        
    return leader_peptide    

In [63]:
experimental_spectrum = [0, 99, 113, 114, 128, 227, 257, 299, 355, 356, 370, 371, 484]
N = 10

leaderboard_cyclopeptide_sequencing(experimental_spectrum, N)

'QNLE'