In [5]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

def extract_subsequences(fasta_file, positions, output_file):
    """
    从FASTA文件中提取子序列并保存到新的FASTA文件中。

    参数:
    fasta_file (str): FASTA文件的路径。
    positions (list of tuple): 每个元组包含两个元素 (start, end)，表示要提取的子序列的起始和结束位置（基于1）。
    output_file (str): 输出FASTA文件的路径。
    """
    subsequences = []
    for record in SeqIO.parse(fasta_file, "fasta"):
        seq_id = record.id
        sequence = str(record.seq)
        for start, end in positions:
            subseq = sequence[start-1:end]
            subseq_id = f"region_{start}:{end}"
            subseq_record = SeqRecord(Seq(subseq), id=subseq_id, description="")
            subsequences.append(subseq_record)

    SeqIO.write(subsequences, output_file, "fasta")

# 示例：从fasta_file中提取特定位置的子序列并保存到output_file
prefix = "UST-1-35"
fasta_file = f"E:\\desktop\\大肠杆菌7个样本\\{prefix}\\{prefix}_1.fna"
positions = [(4867865, 4885648)]  # 示例位置（基于1）
output_file = f"E:\\desktop\\大肠杆菌7个样本\\{prefix}\\prophage_tail_expand_R2.fna"

extract_subsequences(fasta_file, positions, output_file)
print(f"Extracted subsequences have been saved to {output_file}")




Extracted subsequences have been saved to E:\desktop\大肠杆菌7个样本\UST-1-35\prophage_tail_expand_R2.fna
