<a href="https://colab.research.google.com/github/byunsy/bioinformatics-algorithms-py/blob/main/BA_4E.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Cyclopeptide Sequencing Problem

### Function

In [10]:
def Expand(pep):

    amino_acid = ['G','A','S','P','V','T','C','I','L','N','D','K','Q','E','M','H','F','R','Y','W']
    new_pep = set()

    for i in pep:
        for amino in amino_acid:
            new_pep.add(i+amino)

    return new_pep

In [2]:
def Mass(pep, amino_acid_mass):

    mass = 0
    for i in pep:
        mass += amino_acid_mass[i]

    return mass

In [3]:
def LinearSpectrum(peptide, amino_acid_mass):
    
    prefix_mass = [0]
    for amino in peptide:
        prefix_mass.append(prefix_mass[-1] + amino_acid_mass[amino])

    linear_spectrum = [0]
    for i in range(len(peptide)):
        j = i + 1
        while j <= len(peptide):
            linear_spectrum.append(prefix_mass[j]-prefix_mass[i])
            j += 1
    
    return sorted(linear_spectrum)

In [4]:
def CyclicSpectrum(peptide, amino_acid_mass):
    
    prefix_mass = [0]
    for amino in peptide:
        prefix_mass.append(prefix_mass[-1] + amino_acid_mass[amino])

    peptide_mass = prefix_mass[len(peptide)]
    cyclic_spectrum = [0]
    for i in range(len(peptide)):
        j = i + 1
        while j <= len(peptide):
            cyclic_spectrum.append(prefix_mass[j]-prefix_mass[i])
            if i > 0 and j < len(peptide):
                cyclic_spectrum.append(peptide_mass - (prefix_mass[j]-prefix_mass[i]))
            j += 1
    
    return sorted(cyclic_spectrum)

In [5]:
from collections import Counter

def Consistent(lin_spectrum, cyc_spectrum):

    lin_counter = Counter(lin_spectrum) 
    cyc_counter = Counter(cyc_spectrum)
    
    for i in lin_spectrum:
        if i not in cyc_spectrum:
            return False
        elif lin_counter[i] > cyc_counter[i]:
            return False

    return True 

In [18]:
def DisplayPepMass(pep):

    string = ""
    for amino in pep:
        string += str(Mass(amino, amino_acid_mass)) + "-"

    return string[:-1]

In [32]:
amino_acid_mass = {
    'G' : 57, 'A' : 71, 'S' : 87, 'P' : 97, 'V' : 99, 'T' : 101, 'C' : 103, 'I' : 113, 'L' : 113, 'N' : 114,
    'D' : 115, 'K' : 128, 'Q' : 128, 'E' : 129, 'M' : 131, 'H' : 137, 'F' : 147, 'R' : 156, 'Y' : 163, 'W' : 186 
}

def CyclopeptideSequencing(spectrum):

    parent_mass = spectrum[-1]
    peptides ={""}
    ret_peptides = set()

    while len(peptides) != 0:
        peptides = Expand(peptides)

        to_remove = set()
        for pep in peptides:
            if Mass(pep, amino_acid_mass) == parent_mass:
                if CyclicSpectrum(pep, amino_acid_mass) == spectrum:
                    ret_peptides.add(DisplayPepMass(pep))
                to_remove.add(pep)

            elif not Consistent(LinearSpectrum(pep, amino_acid_mass), spectrum):
                to_remove.add(pep)
        
        for i in to_remove:
            peptides.remove(i)

    return sorted(ret_peptides)

### Test Cases

In [35]:
# Create a function for test suite
def TestSuite(function, cases):
    print("*"*50)
    print("TEST SUITE\n")
    passed = 0
    for i, case in enumerate(cases):
        spectrum, answer = case
        result = function(spectrum)
        if sorted(result) == sorted(answer):
            print("- Test Case {} Passed. Expected: {}, Actual: {}"
                  .format(i+1, answer, result))
            passed += 1
        else:
            print("- Test Case {} Failed. Expected: {}, Actual: {}"
                  .format(i+1, answer, result))
    print("\n{} out of {} passed.".format(passed, len(cases)), end=" ")
    print("END OF TEST SUITE.")
    print("*"*50)

In [39]:
# Create test cases to pass into test suite
case1 = ([0, 113, 128, 186, 241, 299, 314, 427], 
         ['186-128-113', '186-113-128', '128-186-113', '128-113-186', '113-186-128', '113-128-186'])

case2 = ([0, 71, 97, 99, 103, 113, 113, 114, 115, 131, 137, 196, 200, 202, 208, 214, 226, 227, 228, 240, 245, 299, 311, 311, 316, 327, 337, 339, 340, 341, 358, 408, 414, 424, 429, 436, 440, 442, 453, 455, 471, 507, 527, 537, 539, 542, 551, 554, 556, 566, 586, 622, 638, 640, 651, 653, 657, 664, 669, 679, 685, 735, 752, 753, 754, 756, 766, 777, 782, 782, 794, 848, 853, 865, 866, 867, 879, 885, 891, 893, 897, 956, 962, 978, 979, 980, 980, 990, 994, 996, 1022, 1093], 
         ['103-137-71-131-114-113-113-115-99-97', '103-97-99-115-113-113-114-131-71-137', '113-113-114-131-71-137-103-97-99-115', '113-113-115-99-97-103-137-71-131-114', '113-114-131-71-137-103-97-99-115-113', '113-115-99-97-103-137-71-131-114-113', '114-113-113-115-99-97-103-137-71-131', '114-131-71-137-103-97-99-115-113-113', '115-113-113-114-131-71-137-103-97-99', '115-99-97-103-137-71-131-114-113-113', '131-114-113-113-115-99-97-103-137-71', '131-71-137-103-97-99-115-113-113-114', '137-103-97-99-115-113-113-114-131-71', '137-71-131-114-113-113-115-99-97-103', '71-131-114-113-113-115-99-97-103-137', '71-137-103-97-99-115-113-113-114-131', '97-103-137-71-131-114-113-113-115-99', '97-99-115-113-113-114-131-71-137-103', '99-115-113-113-114-131-71-137-103-97', '99-97-103-137-71-131-114-113-113-115'])

cases = [case1, case2]

TestSuite(CyclopeptideSequencing, cases)

**************************************************
TEST SUITE

- Test Case 1 Passed. Expected: ['186-128-113', '186-113-128', '128-186-113', '128-113-186', '113-186-128', '113-128-186'], Actual: ['113-128-186', '113-186-128', '128-113-186', '128-186-113', '186-113-128', '186-128-113']
- Test Case 2 Passed. Expected: ['103-137-71-131-114-113-113-115-99-97', '103-97-99-115-113-113-114-131-71-137', '113-113-114-131-71-137-103-97-99-115', '113-113-115-99-97-103-137-71-131-114', '113-114-131-71-137-103-97-99-115-113', '113-115-99-97-103-137-71-131-114-113', '114-113-113-115-99-97-103-137-71-131', '114-131-71-137-103-97-99-115-113-113', '115-113-113-114-131-71-137-103-97-99', '115-99-97-103-137-71-131-114-113-113', '131-114-113-113-115-99-97-103-137-71', '131-71-137-103-97-99-115-113-113-114', '137-103-97-99-115-113-113-114-131-71', '137-71-131-114-113-113-115-99-97-103', '71-131-114-113-113-115-99-97-103-137', '71-137-103-97-99-115-113-113-114-131', '97-103-137-71-131-114-113-113-115-99', '

### Stepik Coding Exercise