# Week 3 - Sequencing Antibiotics

First need to understand how proteins are produced.
1. DNA is transcribed into RNA (Replace T (thymine) with U(uracil)). 
This is called transcription, which is carried out by a molecule called RNA Polymerase.
2. RNA is translated into peptides (Assignment of codons (3-mer nucleotides) to amino acids to make proteins). 
This is called translation, which is carried out by an enzyme called Ribosome.

NOTE:
- Above known as the "Central Dogma of Molecular Biology", named by Francis Crick.
- Edward Tatum in 1963 inhibits ribosome Bacillus Brevis to stop RNA-Protein translation. 
- Production of some peptides, including tyrocidines, continued
- Fritz Lipmann showed in 1969 typcidines are non-ribosomal peptides (NRPs).

Sequencing with Branch and Bound
- Find all amino acids whose masses occur in Spectrum, and add to List
- Extend each peptide in List by each of 18 different amino acid masses
- Trim inconsistent peptides from List
- Return any peptides in List whose theoretical spectra match Spectrum
- Iterate steps 2-4 until List is empty

In [1]:
'''
Function : Solve Protein Translation Problem by translating an RNA string into an amino acid string
Input: String (rna_string), Array (codon_table)
Output: String (amino_acid_string, i.e. peptide) 
'''
def translate_rna_to_amino_acid(rna_string, codon_table):
    amino_acid_string = ""
    length = len(rna_string)

    # Iterate over the RNA string in steps of 3
    for i in range(0, length, 3):
        codon = rna_string[i:i + 3]
        amino_acid = codon_table.get(codon, "?")  # Use "?" for unknown codons
        amino_acid_string += amino_acid

    return amino_acid_string

# Unit Test - Protein Translation Problem
# Expected Output (MAMAPRTEINSTRING)

codon_table = {
        "UUU": "F", "UUC": "F", "UUA": "L", "UUG": "L",
        "UCU": "S", "UCC": "S", "UCA": "S", "UCG": "S",
        "UAU": "Y", "UAC": "Y", "UAA": "*", "UAG": "*",
        "UGU": "C", "UGC": "C", "UGA": "*", "UGG": "W",
        "CUU": "L", "CUC": "L", "CUA": "L", "CUG": "L",
        "CCU": "P", "CCC": "P", "CCA": "P", "CCG": "P",
        "CAU": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
        "CGU": "R", "CGC": "R", "CGA": "R", "CGG": "R",
        "AUU": "I", "AUC": "I", "AUA": "I", "AUG": "M",
        "ACU": "T", "ACC": "T", "ACA": "T", "ACG": "T",
        "AAU": "N", "AAC": "N", "AAA": "K", "AAG": "K",
        "AGU": "S", "AGC": "S", "AGA": "R", "AGG": "R",
        "GUU": "V", "GUC": "V", "GUA": "V", "GUG": "V",
        "GCU": "A", "GCC": "A", "GCA": "A", "GCG": "A",
        "GAU": "D", "GAC": "D", "GAA": "E", "GAG": "E",
        "GGU": "G", "GGC": "G", "GGA": "G", "GGG": "G"
    }

rna_sequence = "AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA"
amino_acid_sequence = translate_rna_to_amino_acid(rna_sequence, codon_table)
print(amino_acid_sequence)


MAMAPRTEINSTRING*


In [2]:
'''
Function : Determine how many DNA strings of length 30 transcribe and translate into Tyrocidine B1.
The amino acid sequence of Tyrocidine B1 is Val-Lys-Leu-Phe-Pro-Trp-Phe-Asn-Gln-Tyr (VKLFPWFNQY).
Input: String (amino_acid_string), Integer (sequence_length, defaults to None)
Output: List (matching_dna_sequences) 
'''
from itertools import product

def reverse_translate_to_rna(amino_acid_string):
    # RNA codon table (same as provided in previous code segment)
    codon_table = {
        "UUU": "F", "UUC": "F", "UUA": "L", "UUG": "L",
        "UCU": "S", "UCC": "S", "UCA": "S", "UCG": "S",
        "UAU": "Y", "UAC": "Y", "UAA": "*", "UAG": "*",
        "UGU": "C", "UGC": "C", "UGA": "*", "UGG": "W",
        "CUU": "L", "CUC": "L", "CUA": "L", "CUG": "L",
        "CCU": "P", "CCC": "P", "CCA": "P", "CCG": "P",
        "CAU": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
        "CGU": "R", "CGC": "R", "CGA": "R", "CGG": "R",
        "AUU": "I", "AUC": "I", "AUA": "I", "AUG": "M",
        "ACU": "T", "ACC": "T", "ACA": "T", "ACG": "T",
        "AAU": "N", "AAC": "N", "AAA": "K", "AAG": "K",
        "AGU": "S", "AGC": "S", "AGA": "R", "AGG": "R",
        "GUU": "V", "GUC": "V", "GUA": "V", "GUG": "V",
        "GCU": "A", "GCC": "A", "GCA": "A", "GCG": "A",
        "GAU": "D", "GAC": "D", "GAA": "E", "GAG": "E",
        "GGU": "G", "GGC": "G", "GGA": "G", "GGG": "G"
    }

    # Create a dictionary to store the possible RNA sequences for each amino acid
    amino_acid_rna_dict = {}

    # Iterate over the RNA codon table to populate the dictionary
    for rna_sequence, amino_acid in codon_table.items():
        amino_acid_rna_dict.setdefault(amino_acid, []).append(rna_sequence)

    # Generate all possible RNA sequences based on the amino acid sequence
    all_rna_sequences = [amino_acid_rna_dict[aa] for aa in amino_acid_string]

    # Use Cartesian product to combine all possible RNA sequences for each amino acid
    possible_rna_sequences = ["".join(seq) for seq in product(*all_rna_sequences)]

    return possible_rna_sequences

def reverse_transcribe_rna_to_dna(rna_sequence):
    # Reverse transcribes an RNA sequence to a DNA sequence
    return rna_sequence.replace("U", "T")

def find_dna_sequences_for_amino_acid(amino_acid_string, sequence_length=None):
    # Reverse translate the amino acid string to all possible RNA sequences
    possible_rna_sequences = reverse_translate_to_rna(amino_acid_string)

    # Transcribe RNA sequences to DNA sequences
    possible_dna_sequences = [reverse_transcribe_rna_to_dna(rna_sequence) for rna_sequence in possible_rna_sequences]

    # Filter out sequences longer than the specified length if applicable
    if sequence_length is not None:
        possible_dna_sequences = [seq for seq in possible_dna_sequences if len(seq) == sequence_length]

    return possible_dna_sequences

# Example Test:
amino_acid_to_find = "VKLFPWFNQY"  # Tyrocidine B1 amino acid sequence
sequence_length_to_check = 30

matching_dna_sequences = find_dna_sequences_for_amino_acid(amino_acid_to_find, sequence_length_to_check)
print("Number of DNA sequences:", len(matching_dna_sequences))
print("DNA sequences:", matching_dna_sequences)

Number of DNA sequences: 6144
DNA sequences: ['GTTAAATTATTTCCTTGGTTTAATCAATAT', 'GTTAAATTATTTCCTTGGTTTAATCAATAC', 'GTTAAATTATTTCCTTGGTTTAATCAGTAT', 'GTTAAATTATTTCCTTGGTTTAATCAGTAC', 'GTTAAATTATTTCCTTGGTTTAACCAATAT', 'GTTAAATTATTTCCTTGGTTTAACCAATAC', 'GTTAAATTATTTCCTTGGTTTAACCAGTAT', 'GTTAAATTATTTCCTTGGTTTAACCAGTAC', 'GTTAAATTATTTCCTTGGTTCAATCAATAT', 'GTTAAATTATTTCCTTGGTTCAATCAATAC', 'GTTAAATTATTTCCTTGGTTCAATCAGTAT', 'GTTAAATTATTTCCTTGGTTCAATCAGTAC', 'GTTAAATTATTTCCTTGGTTCAACCAATAT', 'GTTAAATTATTTCCTTGGTTCAACCAATAC', 'GTTAAATTATTTCCTTGGTTCAACCAGTAT', 'GTTAAATTATTTCCTTGGTTCAACCAGTAC', 'GTTAAATTATTTCCCTGGTTTAATCAATAT', 'GTTAAATTATTTCCCTGGTTTAATCAATAC', 'GTTAAATTATTTCCCTGGTTTAATCAGTAT', 'GTTAAATTATTTCCCTGGTTTAATCAGTAC', 'GTTAAATTATTTCCCTGGTTTAACCAATAT', 'GTTAAATTATTTCCCTGGTTTAACCAATAC', 'GTTAAATTATTTCCCTGGTTTAACCAGTAT', 'GTTAAATTATTTCCCTGGTTTAACCAGTAC', 'GTTAAATTATTTCCCTGGTTCAATCAATAT', 'GTTAAATTATTTCCCTGGTTCAATCAATAC', 'GTTAAATTATTTCCCTGGTTCAATCAGTAT', 'GTTAAATTATTTCCCTGGTTCAATCAGTAC', 'G

In [3]:
'''
Function : Find substrings of a genome encoding a given amino acid sequence.
Input: String (dna_text), String (peptide), Array (genetic_code)
Output: List (substrings) 
'''
def transcribe_dna_to_rna(dna_sequence):
    # Transcribes a DNA sequence to an RNA sequence
    return dna_sequence.replace("T", "U")

def reverse_complement(dna_sequence):
    # Returns the reverse complement of a DNA sequence
    complement = {"A": "T", "T": "A", "C": "G", "G": "C"}
    return "".join(complement[base] for base in reversed(dna_sequence))

def find_substrings_encoding_peptide(dna_text, peptide, genetic_code):
    # Find all substrings of Text encoding the given Peptide, including reverse complement substrings
    k = len(peptide) * 3  # Length of the DNA substring corresponding to Peptide
    n = len(dna_text)
    substrings = []

    # Iterate over the DNA string to check each possible substring of length k
    for i in range(n - k + 1):
        dna_substring = dna_text[i:i + k]
        rna_sequence = transcribe_dna_to_rna(dna_substring)
        translated_peptide = translate_rna_to_amino_acid(rna_sequence, genetic_code)

        if translated_peptide == peptide:
            #print(dna_substring)
            substrings.append(dna_substring)

        # Check reverse complement of the substring as well
        reverse_dna_substring = reverse_complement(dna_substring)
        rna_sequence_reverse = transcribe_dna_to_rna(reverse_dna_substring)
        translated_peptide_reverse = translate_rna_to_amino_acid(rna_sequence_reverse, genetic_code)

        if translated_peptide_reverse == peptide:
            #print(dna_substring, reverse_dna_substring)
            substrings.append(dna_substring)

    return substrings

# Unit Test - Peptide Encoding Problem
# Expected Output (ATGGCC GGCCAT ATGGCC)
dna_text = "ATGGCCATGGCCCCCAGAACTGAGATCAATAGTACCCGTATTAACGGGTGA"
peptide = "MA"

test_result = find_substrings_encoding_peptide(dna_text, peptide, codon_table)
print('Substrings =', ' '.join(str(e) for e in test_result))


Substrings = ATGGCC GGCCAT ATGGCC


In [4]:
# Stepik Exercise - Peptide Encoding Problem with Bacillus brevis
with open('Bacillus_brevis.txt', 'r') as genome_file:
    bacillus_brevis_genome = [line.strip() for line in genome_file.readlines()]

bacillus_brevis_genome = ''.join(bacillus_brevis_genome)
tyrocidine_b1 = 'VKLFPWFNQY'

test_result = find_substrings_encoding_peptide(bacillus_brevis_genome, tyrocidine_b1, codon_table)
print('Substrings =', ' '.join(str(e) for e in test_result))

Substrings = 


In [5]:
'''
Function : Number of subpeptides of a specific length
Input: Integer (cyclopeptide_length), Integer (k)
Output: Integer (num_subpeptides) 
'''
from math import factorial

def get_subpeptide_qty(cyclopeptide_length, k=2):
    n = cyclopeptide_length
    binomial_coeff = factorial(n) / (factorial(k)*factorial(n - k))
    num_subpeptides = int(k * binomial_coeff)

    return num_subpeptides

# Unit Test - How many subpeptides does a cyclic peptide of length n have?
# Expected Output (980597910)
cyclopeptide_length = 31315
peptide = "MA"

test_result = get_subpeptide_qty(cyclopeptide_length, 2)
print('Number of Subpeptides =', test_result)

Number of Subpeptides = 980597910


In [6]:
'''
Function : Generate the theoretical spectrum of a cyclic peptide.
Input: String (peptide)
Output: List (cyclospectrum, sorted)
'''
def get_amino_acid_mass():
    # Returns a dictionary containing the mass of each amino acid
    amino_acid_mass = {}
    with open("integer_mass_table.txt") as file:
        for key, mass in [x.strip().split(" ") for x in file.readlines()]:
            amino_acid_mass[key] = int(mass)
    
    return amino_acid_mass

def generate_theoretical_spectrum(peptide):
    amino_acid_masses = get_amino_acid_mass()
    n = len(peptide)
    prefix_masses = [0]

    # Calculate prefix masses of the cyclic peptide
    for i in range(n):
        prefix_masses.append(prefix_masses[-1] + amino_acid_masses[peptide[i]])
    #print(prefix_masses)

    peptide_mass = prefix_masses[-1]
    cyclospectrum = [0]

    # Calculate the masses of all subpeptides
    for i in range(n):
        for j in range(i + 1, n + 1):
            cyclospectrum.append(prefix_masses[j] - prefix_masses[i])
            #print(prefix_masses[j] - prefix_masses[i])

            # For cyclic peptides, add the masses of the peptides wrapping around the end
            if i > 0 and j < n:
                cyclospectrum.append(peptide_mass - (prefix_masses[j] - prefix_masses[i]))
                #print(peptide_mass - (prefix_masses[j] - prefix_masses[i]))

    cyclospectrum.sort()
    return cyclospectrum

# Unit Test - Generating Theoretical Spectrum Problem
# Expected Output (0 113 114 128 129 227 242 242 257 355 356 370 371 484)
peptide = 'LEQN'

test_result = generate_theoretical_spectrum(peptide)
print('Cyclospectrum =', test_result)

Cyclospectrum = [0, 113, 114, 128, 129, 227, 242, 242, 257, 355, 356, 370, 371, 484]


In [7]:
# Randomized Test - Generating Theoretical Spectrum Problem
peptide = 'PQIIAGFAIATQ'

test_result = generate_theoretical_spectrum(peptide)
print('Cyclospectrum =', ' '.join(str(e) for e in test_result))

Cyclospectrum = 0 57 71 71 71 97 101 113 113 113 128 128 128 147 172 184 184 184 204 218 225 225 226 229 241 241 255 275 275 285 297 300 326 331 338 346 353 354 354 356 388 388 397 402 413 425 451 454 459 459 459 466 482 484 501 503 510 522 525 530 560 567 572 572 579 579 581 629 631 631 638 638 643 650 680 685 688 700 707 709 726 728 744 751 751 751 756 759 785 797 808 813 822 822 854 856 856 857 864 872 879 884 910 913 925 935 935 955 969 969 981 984 985 985 992 1006 1026 1026 1026 1038 1063 1082 1082 1082 1097 1097 1097 1109 1113 1139 1139 1139 1153 1210


In [8]:
'''
Function : Count Peptides with Given Mass, i.e. compute the number of peptides of given mass.
Input: Integer (target_mass)
Output: Integer (peptide_count)
'''
amino_acid_masses = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186]
masses = {}

def count_peptides_from_mass(target_mass):
    if target_mass < 0:
        return 0
    elif target_mass == 0:
        return 1
    else:
        peptide_count = 0
        for amino_acid in amino_acid_masses:
            t = target_mass - amino_acid
            if t in masses:
                peptide_count += masses[t]
            else:
                temp = count_peptides_from_mass(t)
                masses[t] = temp
                peptide_count += temp

    return peptide_count

# Input: Target mass (m)
m = 1024

# Output: Number of peptides having the given mass
result = count_peptides_from_mass(m)
print("Number of peptides with mass", m, "=", result)


Number of peptides with mass 1024 = 14712706211


In [9]:
'''
Exercise Break: This figure suggests that for large m, the number of peptides with 
given integer mass m can be approximated as k · Cm, where k and C are constants. If 
you solved the exercise break on the previous step, use your solution to find C. 
(Give your answer as a decimal; the allowable error is 0.002).
'''
import math

a1 = count_peptides_from_mass(5000)
a2 = count_peptides_from_mass(5001)

# Exponentiate to get the constant C
C = 2 ** (math.log2(a2) - math.log2(a1))

print("Constant C =", C)

Constant C = 1.0277263869347322


In [10]:
'''
Exercise Break: How many subpeptides does a linear peptide of given length n have? 
(Include the empty peptide and the entire peptide.)
'''
def count_subpeptides_of_length(n):
    count = ((n * (n + 1)) // 2) + 1
    return count

# Sample Input
n = 4

# Sample Output
result = count_subpeptides_of_length(n)
print("Number of subpeptides for length", n, ":", result)

Number of subpeptides for length 4 : 11


In [11]:
amino_acid_masses = get_amino_acid_mass()
masses = set(amino_acid_masses.values())
print(masses)

{128, 97, 129, 99, 131, 101, 163, 71, 103, 137, 113, 114, 115, 147, 87, 57, 186, 156}


In [59]:
from collections import Counter
from collections import deque

def expand_peptides(peptides):
    masses = set(get_amino_acid_mass().values())
    new_peptides = []
    for peptide in peptides:
        for mass in masses:
            new_peptides.append(peptide + (mass,))
    return new_peptides

def is_consistent(peptide, spectrum):
    peptide_spectrum = cyclospectrum(peptide)
    for mass in peptide_spectrum:
        if mass not in spectrum:
            return False
    return True

def cyclospectrum(peptide):
    prefix_mass = [0]
    for i in range(len(peptide)):
        prefix_mass.append(prefix_mass[-1] + peptide[i])

    peptide_mass = prefix_mass[-1]
    cyclospectrum = [0]
    for i in range(len(peptide)):
        for j in range(i + 1, len(peptide) + 1):
            cyclospectrum.append(prefix_mass[j] - prefix_mass[i])
            if i > 0 and j < len(peptide):
                cyclospectrum.append(peptide_mass - (prefix_mass[j] - prefix_mass[i]))

    cyclospectrum.sort()
    return cyclospectrum

In [90]:
'''
Function : Branch-and-Bound Algorithm for Cyclopeptide Sequencing
Given the current collection of linear peptides Peptides, define Expand(Peptides) as a new collection 
containing all possible extensions of peptides in Peptides by a single amino acid mass.
Input: List (experimental_spectrum)
Output: List (final_peptides)
'''
def perform_cyclopeptide_sequencing(experimental_spectrum):
    masses = set(get_amino_acid_mass().values())
    masses.intersection_update(set(map(int, experimental_spectrum)))
    parent_mass = max(map(int, experimental_spectrum))
    candidate_peptides = [(mass,) for mass in masses if mass <= parent_mass]
    final_peptides = []
    print(masses, parent_mass, candidate_peptides)

    while candidate_peptides:
        current_expansion = expand_peptides(candidate_peptides)
        candidate_peptides = []
        #print(current_expansion)

        for peptide in current_expansion:
            if sum(peptide) == parent_mass:
                if (
                    cyclospectrum(peptide) == experimental_spectrum
                    and peptide not in final_peptides
                ):
                    final_peptides.append(peptide)
            elif is_consistent(peptide, experimental_spectrum):
                candidate_peptides.append(peptide)

    return final_peptides

# Unit Test - Cyclopeptide Sequencing
# Expected Output (186-128-113 186-113-128 128-186-113 128-113-186 113-186-128 113-128-186)
spectrum = '0 113 128 186 241 299 314 427'
experimental_spectrum = [int(x) for x in spectrum.split()] # Convert to List of Integers

test_result = perform_cyclopeptide_sequencing(experimental_spectrum)
test_result = ' '.join(str(e) for e in test_result)
test_result = test_result.replace(', ', '-')
test_result = test_result.replace(') (', ' ')
print('Cyclopeptide Sequenced Peptides =', test_result)

{128, 113, 186} 427 [(128,), (113,), (186,)]
Cyclopeptide Sequenced Peptides = (128-113-186 128-186-113 113-128-186 113-186-128 186-128-113 186-113-128)


In [95]:
# TODO: Not working for larger dataset
# Randomized Test - Cyclopeptide Sequencing
spectrum = '0 71 97 99 103 113 113 114 115 131 137 196 200 202 208 214 226 227 228 240 245 299 311 311 316 327 337 339 340 341 358 408 414 424 429 436 440 442 453 455 471 507 527 537 539 542 551 554 556 566 586 622 638 640 651 653 657 664 669 679 685 735 752 753 754 756 766 777 782 782 794 848 853 865 866 867 879 885 891 893 897 956 962 978 979 980 980 990 994 996 1022 1093'
experimental_spectrum = [int(x) for x in spectrum.split()] # Convert to List of Integers

test_result = perform_cyclopeptide_sequencing(experimental_spectrum)
test_result = ' '.join(str(e) for e in test_result)
test_result = test_result.replace(', ', '-')
test_result = test_result.replace(') (', ' ')
print('Cyclopeptide Sequenced Peptides =', test_result)

{97, 99, 131, 103, 71, 137, 113, 114, 115} 1093 [(97,), (99,), (131,), (103,), (71,), (137,), (113,), (114,), (115,)]
Cyclopeptide Sequenced Peptides = 


In [104]:
'''
Exercise Break: Implement LinearSpectrum.
'''
def get_linear_spectrum(peptide: str):
    n = len(peptide)
    prefix_mass = [0] * (n+1)
    INTEGER_MASS = get_amino_acid_mass()
    
    for i in range(1, n+1):
        acid = peptide[i-1]
        prefix_mass[i] = prefix_mass[i-1] + INTEGER_MASS[acid]
    
    linear_spec = [0] * int((n * (n+1)/2) + 1)
    ind = 0
    for i in range(0, n):
        for j in range(i+1, n+1):
            linear_spec[ind] = prefix_mass[j] - prefix_mass[i]
            ind += 1
    
    return sorted(linear_spec)

# Unit Test - Implement Linear Spectrum
# Expected Output (0 113 114 128 129 242 242 257 370 371 484)
peptide = 'NQEL'

test_result = get_linear_spectrum(peptide)
print('Linear Spectrum for', peptide, '=', ' '.join(str(e) for e in test_result))

Linear Spectrum for NQEL = 0 113 114 128 129 242 242 257 370 371 484


In [15]:
# Quiz - Question 2
# Which of the following RNA strings could translate into the amino acid string PRTEIN? (Select all that apply.)

rna_seq1 = "CCCAGUACCGAAAUUAAC"
rna_seq2 = "CCCCGUACGGAGAUGAAA"
rna_seq3 = "CCGAGGACCGAAAUCAAC"
rna_seq4 = "CCCAGGACUGAGAUCAAU"

amino_acid_seq1 = translate_rna_to_amino_acid(rna_seq1, codon_table)
amino_acid_seq2 = translate_rna_to_amino_acid(rna_seq2, codon_table)
amino_acid_seq3 = translate_rna_to_amino_acid(rna_seq3, codon_table)
amino_acid_seq4 = translate_rna_to_amino_acid(rna_seq4, codon_table)

print('1st seq = ', amino_acid_seq1)
print('2nd seq = ', amino_acid_seq2)
print('3rd seq = ', amino_acid_seq3)
print('4th seq = ', amino_acid_seq4)

1st seq =  PSTEIN
2nd seq =  PRTEMK
3rd seq =  PRTEIN
4th seq =  PRTEIN


In [16]:
# Quiz - Question 3
# How many DNA strings transcribe and translate into the amino acid string LEADER?
amino_acid_to_find = "LEADER"  # Tyrocidine B1 amino acid sequence

matching_dna_sequences = find_dna_sequences_for_amino_acid(amino_acid_to_find)
print("Number of DNA sequences:", len(matching_dna_sequences))
print("DNA sequences:", matching_dna_sequences)

Number of DNA sequences: 1152
DNA sequences: ['TTAGAAGCTGATGAACGT', 'TTAGAAGCTGATGAACGC', 'TTAGAAGCTGATGAACGA', 'TTAGAAGCTGATGAACGG', 'TTAGAAGCTGATGAAAGA', 'TTAGAAGCTGATGAAAGG', 'TTAGAAGCTGATGAGCGT', 'TTAGAAGCTGATGAGCGC', 'TTAGAAGCTGATGAGCGA', 'TTAGAAGCTGATGAGCGG', 'TTAGAAGCTGATGAGAGA', 'TTAGAAGCTGATGAGAGG', 'TTAGAAGCTGACGAACGT', 'TTAGAAGCTGACGAACGC', 'TTAGAAGCTGACGAACGA', 'TTAGAAGCTGACGAACGG', 'TTAGAAGCTGACGAAAGA', 'TTAGAAGCTGACGAAAGG', 'TTAGAAGCTGACGAGCGT', 'TTAGAAGCTGACGAGCGC', 'TTAGAAGCTGACGAGCGA', 'TTAGAAGCTGACGAGCGG', 'TTAGAAGCTGACGAGAGA', 'TTAGAAGCTGACGAGAGG', 'TTAGAAGCCGATGAACGT', 'TTAGAAGCCGATGAACGC', 'TTAGAAGCCGATGAACGA', 'TTAGAAGCCGATGAACGG', 'TTAGAAGCCGATGAAAGA', 'TTAGAAGCCGATGAAAGG', 'TTAGAAGCCGATGAGCGT', 'TTAGAAGCCGATGAGCGC', 'TTAGAAGCCGATGAGCGA', 'TTAGAAGCCGATGAGCGG', 'TTAGAAGCCGATGAGAGA', 'TTAGAAGCCGATGAGAGG', 'TTAGAAGCCGACGAACGT', 'TTAGAAGCCGACGAACGC', 'TTAGAAGCCGACGAACGA', 'TTAGAAGCCGACGAACGG', 'TTAGAAGCCGACGAAAGA', 'TTAGAAGCCGACGAAAGG', 'TTAGAAGCCGACGAGCGT', 'TTAGAAG

In [114]:
# Quiz - Question 4
# What is the integer mass of tryptophan?

def calculate_integer_mass(peptide):
    amino_acid_masses = get_amino_acid_mass()
    mass = 0
    for amino_acid in peptide:
        mass += amino_acid_masses[amino_acid]
    return mass

# Test Example for Tryptophan
tryptophan = "W"
tryptophan_mass = calculate_integer_mass(tryptophan)
print("Integer mass of Tryptophan:", tryptophan_mass)


Integer mass of Tryptophan: 186


In [113]:
# Quiz - Question 5
# Which of the following cyclic peptides could have generated the theoretical spectrum 
# 0 71 101 113 131 184 202 214 232 285 303 315 345 416? (Select all that apply.)

peptide = ['MAIT', 'IAMT', 'MLAT', 'TAIM', 'TMLA', 'TMIA']
for x in range(len(peptide)):
    test_result = generate_theoretical_spectrum(peptide[x])
    print('Linear Spectrum for', peptide[x], '=', ' '.join(str(e) for e in test_result))


Linear Spectrum for MAIT = 0 71 101 113 131 184 202 214 232 285 303 315 345 416
Linear Spectrum for IAMT = 0 71 101 113 131 184 202 214 232 285 303 315 345 416
Linear Spectrum for MLAT = 0 71 101 113 131 172 184 232 244 285 303 315 345 416
Linear Spectrum for TAIM = 0 71 101 113 131 172 184 232 244 285 303 315 345 416
Linear Spectrum for TMLA = 0 71 101 113 131 172 184 232 244 285 303 315 345 416
Linear Spectrum for TMIA = 0 71 101 113 131 172 184 232 244 285 303 315 345 416


In [112]:
# Quiz - Question 6
# Which of the following linear peptides is consistent with 
# Spectrum = {0 71 99 101 103 128 129 199 200 204 227 230 231 298 303 328 330 332 333}? 
# (Select all that apply.)

peptide = ['CTV', 'TCE', 'QCV', 'CTQ', 'ETC', 'AQV']
for x in range(len(peptide)):
    test_result = get_linear_spectrum(peptide[x])
    print('Linear Spectrum for', peptide[x], '=', ' '.join(str(e) for e in test_result))

Linear Spectrum for CTV = 0 99 101 103 200 204 303
Linear Spectrum for TCE = 0 101 103 129 204 232 333
Linear Spectrum for QCV = 0 99 103 128 202 231 330
Linear Spectrum for CTQ = 0 101 103 128 204 229 332
Linear Spectrum for ETC = 0 101 103 129 204 230 333
Linear Spectrum for AQV = 0 71 99 128 199 227 298
