# Generation of Random Peptides

In [4]:
import random
from itertools import chain

#Define a function to generate all of the possible 8-mers, 9-mers, 10-mers, 11-mers, and 12-mers from a protein sequence
def digestion_products(peptide_sequence) :
    eightmers = [peptide_sequence[i:i+8] for i in range(0, len(peptide_sequence) - 7)]
    ninemers = [peptide_sequence[i:i+9] for i in range(0, len(peptide_sequence) - 8)]
    tenmers = [peptide_sequence[i:i+10] for i in range(0, len(peptide_sequence) - 9)]
    elevenmers = [peptide_sequence[i:i+11] for i in range(0, len(peptide_sequence) - 10)]
    twelvemers = [peptide_sequence[i:i+12] for i in range(0, len(peptide_sequence) - 11)]
    
    return eightmers, ninemers, tenmers, elevenmers, twelvemers

protein_sequences = []

#Parse the UniProt database to extract all protein sequences into a list
with open("Random Peptides/uniprot_sprot.fasta") as file :
    temporary_sequence = ""
    next(file)
    for line in file :
        if ">" in line :
            protein_sequences.append(temporary_sequence)
            temporary_sequence = ""
        else :
            line = line.rstrip()
            temporary_sequence += line

#Select 5000 random proteins from the list and generate all of the possible digestion products from those proteins            
random_sequences = random.sample(protein_sequences, 5000)
eightmer = []
ninemer = []
tenmer = []
elevenmer = []
twelvemer = []

for sequence in random_sequences :
    gen_8mer, gen_9mer, gen_10mer, gen_11mer, gen_12mer = digestion_products(sequence)
    eightmer.extend(gen_8mer)
    ninemer.extend(gen_9mer)
    tenmer.extend(gen_10mer)
    elevenmer.extend(gen_11mer)
    twelvemer.extend(gen_12mer)

#Remove duplicate sequences
eightmer = list(set(eightmer))
ninemer = list(set(ninemer))
tenmermer = list(set(tenmer))
elevenmer = list(set(elevenmer))
twelvemer = list(set(twelvemer))

#Randomly select 25,000 8-mers, 25,000 9-mers, 25,000 10-mers, 25,000 11-mers, and 25,000 12-mers and write them to a file
number_of_peptides = 25000
selected_peptides = list(chain(random.sample(eightmer, number_of_peptides), random.sample(ninemer, number_of_peptides), random.sample(tenmer, number_of_peptides), random.sample(elevenmer, number_of_peptides), random.sample(twelvemer, number_of_peptides)))

with open("random_peptides.peptide", "w") as file :
    for peptide in selected_peptides:
        file.write(peptide + "\n")