In [1]:
from Bio import Entrez, SeqIO

def fetch_sequences(amp_count=3000, namp_count=3000, output_file='peptides.fasta'):

    # Fetch antimicrobial peptides (AMP)
    search_term_amp = "antimicrobial peptide[Title] OR antimicrobial peptide[All Fields]"
    handle_amp = Entrez.esearch(db="protein", term=search_term_amp, retmax=amp_count)
    record_amp = Entrez.read(handle_amp)
    handle_amp.close()

    # Get the list of AMP IDs
    amp_ids = record_amp['IdList']

    # Fetch the sequences for AMP
    sequences = []
    for i in range(0, len(amp_ids), 200):  # Fetch in batches of 200
        batch_ids = amp_ids[i:i + 200]
        id_string = ','.join(batch_ids)
        fetch_handle = Entrez.efetch(db="protein", id=id_string, rettype="fasta", retmode="text")
        for record in SeqIO.parse(fetch_handle, "fasta"):
            sequences.append(f">{record.id} AMP\n{record.seq}")
        fetch_handle.close()

    # Fetch non-antimicrobial peptides (nAMP)
    search_term_namp = "peptide[Title] NOT (antimicrobial peptide[Title] OR antimicrobial peptide[All Fields])"
    handle_namp = Entrez.esearch(db="protein", term=search_term_namp, retmax=namp_count)
    record_namp = Entrez.read(handle_namp)
    handle_namp.close()

    # Get the list of nAMP IDs
    namp_ids = record_namp['IdList']

    # Fetch the sequences for nAMP
    for i in range(0, len(namp_ids), 200):  # Fetch in batches of 200
        batch_ids = namp_ids[i:i + 200]
        id_string = ','.join(batch_ids)
        fetch_handle = Entrez.efetch(db="protein", id=id_string, rettype="fasta", retmode="text")
        for record in SeqIO.parse(fetch_handle, "fasta"):
            sequences.append(f">{record.id} nAMP\n{record.seq}")
        fetch_handle.close()

    # Save all sequences to a FASTA file
    with open(output_file, 'w') as fasta_file:
        fasta_file.write("\n".join(sequences))

    print(f"Fetched {len(sequences)} sequences (AMP: {amp_count}, nAMP: {namp_count}) and saved to {output_file}.")

fetch_sequences()

            Email address is not specified.

            To make use of NCBI's E-utilities, NCBI requires you to specify your
            email address with each request.  As an example, if your email address
            is A.N.Other@example.com, you can specify it as follows:
               from Bio import Entrez
               Entrez.email = 'A.N.Other@example.com'
            In case of excessive usage of the E-utilities, NCBI will attempt to contact
            a user at the email address provided before blocking access to the
            E-utilities.


Fetched 6000 sequences (AMP: 3000, nAMP: 3000) and saved to peptides.fasta.
