Agora temos o esboço para um novo algoritmo de sequenciamento de ciclopeptídeos. Dado um espectro experimental, primeiro calculamos a convolução de um espectro experimental. Então selecionamos os M elementos mais frequentes entre 57 e 200 na convolução para formar um alfabeto estendido de massas de aminoácidos candidatos. Para sermos justos, devemos incluir os M elementos superiores da convolução "com empates". Finalmente, executamos o algoritmo LeaderboardCyclopeptideSequencing , onde as massas de aminoácidos são restritas a este alfabeto. Chamamos este algoritmo de ConvolutionCyclopeptideSequencing .

Desafio de código: implementar ConvolutionCyclopeptideSequencing.

* Entrada : Um inteiro M , um inteiro N e uma coleção de inteiros (possivelmente repetidos) Spectrum .
* Saída: Um peptídeo cíclico LeaderPeptide com aminoácidos retirados apenas dos principais elementos M (e laços) da convolução do Spectrum que ficam entre 57 e 200, e onde o tamanho do Leaderboard é restrito aos principais N (e laços).

In [50]:
import numpy as np
from collections import Counter

def Expand(Peptides,AminoAcidMass):
    expanded = []
    for peptide in Peptides:
        for aa in AminoAcidMass.keys():
            expanded.append(peptide + aa)
    return expanded

def ParentMass(Spectrum):
    return max(Spectrum) 

def Mass(Peptide,AminoAcidMass):
    return sum(AminoAcidMass[aa] for aa in Peptide)

def CyclicSpectrum(Peptide, Alphabet, AminoAcidMass):
    PrefixMass = [0] * (len(Peptide) + 1)

    for i in range(1, len(Peptide) + 1):
        for s in Alphabet:
            if s == Peptide[i - 1]:
                PrefixMass[i] = PrefixMass[i - 1] + AminoAcidMass[s]

    peptideMass = PrefixMass[len(Peptide)]
    CyclicSpectrum = [0]

    for i in range(len(Peptide)):
        for j in range(i + 1, len(Peptide) + 1):
            CyclicSpectrum.append(PrefixMass[j] - PrefixMass[i])
            if i > 0 and j < len(Peptide):
                CyclicSpectrum.append(
                    peptideMass - (PrefixMass[j] - PrefixMass[i]))

    return sorted(CyclicSpectrum)

def CyclicScore(Peptide, Spectrum, Alphabet, AminoAcidMass):
    Spectrum_Teor = CyclicSpectrum(Peptide, Alphabet, AminoAcidMass)
    Score_final_Cyclic = 0
    p1 = 0
    p2 = 0
    
    while p1 < len(Spectrum_Teor) and p2 < len(Spectrum):
        if Spectrum_Teor[p1] == Spectrum[p2]:
            Score_final_Cyclic += 1
            p1 += 1
            p2 += 1
        elif Spectrum_Teor[p1] < Spectrum[p2]:
            p1 += 1
        elif Spectrum_Teor[p1] > Spectrum[p2]:
            p2 +=1
    return Score_final_Cyclic

def LinearSpectrum(Peptide, Alphabet, AminoAcidMass):
    PrefixMass = [0] * (len(Peptide) + 1)
    for i in range(1, len(Peptide) + 1):
        for s in Alphabet:
            if s == Peptide[i - 1]: 
                PrefixMass[i] = PrefixMass[i - 1] + AminoAcidMass[s]
    LinearSpectrum = [0]

    for i in range(len(Peptide)):
        for j in range(i + 1, len(Peptide) + 1):
            LinearSpectrum.append(PrefixMass[j] - PrefixMass[i])

    return sorted(LinearSpectrum)

def LinearScore(Peptide, Spectrum, Alphabet,AminoAcidMass):
    Spectrum_Teor = LinearSpectrum(Peptide, Alphabet, AminoAcidMass)
    LinearScore_final_linear = 0
    p1 = 0
    p2 = 0
    
    while p1 < len(Spectrum_Teor) and p2 < len(Spectrum):
        if Spectrum_Teor[p1] == Spectrum[p2]:
            LinearScore_final_linear += 1
            p1 += 1
            p2 += 1
        elif Spectrum_Teor[p1] < Spectrum[p2]:
            p1 += 1
        elif Spectrum_Teor[p1] > Spectrum[p2]:
            p2 +=1
    return LinearScore_final_linear

def Trim(Leaderboard, Spectrum, N, Alphabet, AminoAcidMass):
    LinearScores = []
    for j in range(1, len(Leaderboard) + 1):
        Peptide = Leaderboard[j - 1]
        LinearScores.append(LinearScore(Peptide, Spectrum, Alphabet, AminoAcidMass))

    sorted_indices = sorted(range(len(LinearScores)), key=lambda x: -LinearScores[x])
    Leaderboard = [Leaderboard[i] for i in sorted_indices]
    LinearScores = [LinearScores[i] for i in sorted_indices]

    for j in range(N + 1, len(Leaderboard) + 1):
        if LinearScores[j - 1] < LinearScores[N - 1]: 
            return Leaderboard[:j - 1]

    return Leaderboard

def LeaderboardCyclopeptideSequencing(Spectrum, N, Alphabet, AminoAcidMass):
    Leaderboard = {""}
    LeaderPeptide = "" 

    while Leaderboard:
        Leaderboard = Expand(Leaderboard, AminoAcidMass)
        for Peptide in list(Leaderboard):  
            if Mass(Peptide,AminoAcidMass) == ParentMass(Spectrum):
                if CyclicScore(Peptide, Spectrum, Alphabet, AminoAcidMass) > CyclicScore(LeaderPeptide, Spectrum, Alphabet, AminoAcidMass):
                    LeaderPeptide = Peptide
            elif Mass(Peptide, AminoAcidMass) > ParentMass(Spectrum):
                Leaderboard.remove(Peptide)
        Leaderboard = Trim(Leaderboard, Spectrum, N, Alphabet, AminoAcidMass)

    return LeaderPeptide

def format_peptide(peptide,AminoAcidMass):
    masses = [str(AminoAcidMass[aa]) for aa in peptide]
    return "-".join(masses)

def Spectral_Convolution(Spectrum):
    matriz_spec = np.zeros((len(Spectrum), len(Spectrum) - 1))

    for i in range(1, len(Spectrum)):
        for j in range(len(Spectrum) - 1):
            if Spectrum[i] > Spectrum[j]:
                matriz_spec[i][j] = Spectrum[i] - Spectrum[j]
    return matriz_spec

def format_ans(Spectrum):
    matriz_spec = Spectral_Convolution(Spectrum)
    result = []

    for linha in matriz_spec:
        for item in linha:
            if item != 0:
                result.append(int(item))
    return sorted(result)

def get_M(M,dic_result_sorted):
  if len(dic_result_sorted) <= M:
    return dic_result_sorted

  m_keys = list(dic_result_sorted.keys())[:M]
  m_values = list(dic_result_sorted.values())[:M]
  dic_final = {}

  for i in range(M):
    dic_final[m_keys[i]] = m_values[i]

  i = M

  while i < len(dic_result_sorted) and list(dic_result_sorted.values())[i] == list(dic_result_sorted.values())[i-1]:
      dic_final[list(dic_result_sorted.keys())[i]] = list(dic_result_sorted.values())[i]
      i += 1
  print(f'dic_final: {dic_final}')
  return dic_final

def aminoacids_new(M,dic_result_sorted):
    AminoAcidMass = {}
    dic_intermediario = get_M(M,dic_result_sorted)
  
    for i in dic_intermediario.keys():
        AminoAcidMass[chr(i)] = int(i)
    return AminoAcidMass

def ConvolutionCyclopeptideSequencing(M, N, Spectrum):
    Resultado = format_ans(Spectrum)
    nova_lista = []
    
    for massa in Resultado:
        if massa >= 57 and massa <= 200:
            nova_lista.append(massa)
    
    result = Counter(nova_lista)
    dic_result = dict(result)
    print(f'dic_result: {dic_result}')
    dic_result_sorted = dict(sorted(dic_result.items(), key=lambda item: item[1], reverse=True))
    print(f'dic_result_sorted: {dic_result_sorted}')
    
    AminoAcidMass = aminoacids_new(M,dic_result_sorted)
    print(f'AminoAcidMass: {AminoAcidMass}')
    Alphabet =  list(AminoAcidMass.keys())
    print(f'Alphabet: {Alphabet}')
    
    result_peptides = LeaderboardCyclopeptideSequencing(Spectrum, N, Alphabet, AminoAcidMass)
    formatted_result = "-".join(format_peptide(peptide, AminoAcidMass) for peptide in result_peptides)
    return formatted_result, AminoAcidMass


with open("dataset_30246_7.txt", "r") as file:
    lines = file.readlines()
    M = int(lines[0].strip())
    N = int(lines[1].strip())
    Spectrum = list(map(int, lines[2].strip().split())) 

formatted_result, AminoAcidMass = ConvolutionCyclopeptideSequencing(M,N,Spectrum)
print("formatted_result")
print(formatted_result)
print("AminoAcidMass")
print(AminoAcidMass)

dic_result: {57: 26, 58: 30, 59: 30, 60: 14, 62: 26, 63: 6, 64: 6, 65: 12, 66: 2, 67: 4, 69: 42, 71: 16, 72: 16, 73: 30, 74: 30, 76: 18, 78: 8, 80: 10, 82: 10, 83: 36, 84: 8, 85: 24, 87: 68, 88: 10, 89: 24, 90: 6, 91: 20, 92: 8, 93: 6, 94: 24, 95: 8, 96: 12, 97: 44, 98: 8, 99: 90, 101: 34, 102: 6, 103: 64, 104: 4, 105: 4, 106: 20, 108: 4, 109: 4, 110: 6, 111: 1, 112: 16, 113: 12, 114: 16, 115: 5, 116: 6, 117: 30, 118: 2, 120: 2, 121: 4, 122: 12, 123: 2, 124: 30, 126: 22, 127: 16, 128: 86, 129: 18, 130: 10, 131: 70, 132: 2, 133: 18, 134: 2, 135: 12, 137: 8, 138: 8, 140: 12, 141: 1, 142: 32, 143: 12, 145: 8, 146: 8, 147: 10, 149: 4, 151: 8, 152: 10, 153: 12, 154: 20, 156: 68, 157: 14, 158: 28, 159: 8, 160: 16, 161: 28, 162: 10, 163: 8, 165: 12, 166: 6, 167: 6, 168: 26, 170: 14, 171: 10, 172: 26, 173: 8, 174: 10, 175: 16, 176: 12, 177: 4, 179: 10, 181: 20, 182: 16, 183: 12, 184: 12, 185: 12, 186: 110, 187: 4, 188: 50, 189: 8, 190: 46, 191: 8, 192: 2, 193: 14, 195: 4, 196: 8, 197: 14, 198: