# README.md
## 测试机器
Mac M2
- vcpu 8
- memory 16G
## 安装程序
- python 3.12
- Bio 1.7.1
- cutadapt 4.9
- vsearch v2.30.0
- FastQC v0.12.1
- trimmomatic 0.39
## 运行
依次执行

In [1]:
import subprocess
import csv
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
import tqdm
import os
import json
from collections import Counter, defaultdict

In [2]:
def strict_QC(input_r1, input_r2, output_dir, primer_f="", primer_r=""):
    """
    执行严格的NGS数据质控流程
    :param input_r1: Read1输入文件路径
    :param input_r2: Read2输入文件路径
    :param output_dir: 输出目录
    :param primer_f: 正向引物序列（可选）
    :param primer_r: 反向引物序列（可选）
    """
    # 定义接头序列
    adapter_r1 = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC"  # P7 adapter for read1
    adapter_r2 = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"  # P5 adapter for read2

    subprocess.run(["mkdir", "-p", f"{output_dir}/fastqc_raw"])
    subprocess.run(["mkdir", "-p", f"{output_dir}/fastqc_trimmed"])
    
    # FastQC原始数据质控
    subprocess.run(["fastqc", "-t", "14", input_r1, input_r2, "-o", f"{output_dir}/fastqc_raw"])
    
    # cutadapt切除接头与引物
    cutadapt_cmd = [
            "cutadapt",
            "-a", adapter_r2,  # R2的3'端接头（P5）
            "-A", adapter_r1,  # R1的3'端接头（P7）
            "-o", f"{output_dir}/F.fq.gz",
            "-p", f"{output_dir}/R.fq.gz",
            "--minimum-length", "50",
            "--max-n", "0",
            "--error-rate", "0.1",
            f"--json={output_dir}/cutadapt.json",
            "--cores=14"
        ]
        
    # 添加引物切除参数
    if primer_f and primer_r:
        cutadapt_cmd.extend(["-g", f"^{primer_f}", "-G", f"^{primer_r}"])
    
    cutadapt_cmd.extend([input_r1, input_r2])
    
    subprocess.run(cutadapt_cmd)
    
    # 切除后质控验证
    try:
        with open(f"{output_dir}/cutadapt.json") as f:
            log_data = json.load(f)
            
        total_pairs = log_data["read_counts"]["input"]
        kept_pairs = log_data["read_counts"]["output"]
        kept_ratio = kept_pairs / total_pairs * 100
        
        print(f"原始序列对: {total_pairs}")
        print(f"保留序列对: {kept_pairs} ({kept_ratio:.2f}%)")
        
        # 验证标准
        if kept_ratio < 90:
            print("\n⚠️ 警告: 保留率低于90%，建议检查接头/引物设计")
        else:
            print("\n✅ 保留率符合质控标准(>90%)")
                
    except json.JSONDecodeError as e:
        print(f"JSON解析失败: {e}")
    except FileNotFoundError:
        print("❌ cutadapt.json文件未生成，请检查命令执行")
    except KeyError as e:
        print(f"❌ JSON结构异常，缺失关键字段: {e}")
    
    # 额外质控：切除后FastQC验证
    subprocess.run([
        "fastqc", 
        "-t", "14",
        f"{output_dir}/F.fq.gz", 
        f"{output_dir}/R.fq.gz",
        "-o", f"{output_dir}/fastqc_trimmed"
    ])

In [3]:
def merget(input_forward, input_reverse, output_merged):
    """合并双端测序数据，若输出文件已存在则跳过"""
    # 检查输出文件是否已存在
    if os.path.exists(output_merged):
        print(f"文件 {output_merged} 已存在，跳过合并操作")
        return
    
    # 检查输入文件是否存在
    if not os.path.exists(input_forward):
        raise FileNotFoundError(f"正向文件不存在: {input_forward}")
    if not os.path.exists(input_reverse):
        raise FileNotFoundError(f"反向文件不存在: {input_reverse}")
    
    # 构建并执行VSEARCH命令
    vsearch_command = (
        f"vsearch --fastq_mergepairs {input_forward} "
        f"--reverse {input_reverse} "
        f"--fastqout {output_merged} "
        "--fastq_allowmergestagger"
    )
    subprocess.run(vsearch_command, shell=True, check=True)
    print(f"双端合并完成 → {output_merged}")

In [4]:
def QC_merger(input_fastq, output_dir, output_fastq):
    # 检查输出文件是否已存在（若存在则跳过）
    if os.path.exists(output_fastq):
        print(f"文件 {output_fastq} 已存在，跳过质量控制和修剪步骤")
        return
    
    # 创建输出目录（若不存在）
    subprocess.run(["mkdir", "-p", output_dir])
    
    # 执行FastQC质量控制
    subprocess.run([
        "fastqc", 
        "-t", "14", 
        input_fastq, 
        "-o", output_dir
    ])
    
    # 执行Trimmomatic修剪
    subprocess.run([
        "trimmomatic", "SE", 
        "-threads", "14", 
        "-phred33", 
        input_fastq, 
        output_fastq, 
        "LEADING:3", 
        "TRAILING:3", 
        "SLIDINGWINDOW:4:15"
    ])

In [5]:
class BarcodeClassifier:
    def __init__(self, input_fastq, csv_file, output_directory):
        self.input_fastq = input_fastq
        self.csv_file = csv_file
        self.output_directory = output_directory
        self.barcode_pairs = self.read_barcodes_from_csv()
        self.barcode_handles = {}
        # 创建输出目录（如果不存在）
        os.makedirs(output_directory, exist_ok=True)

    def read_barcodes_from_csv(self):
        """从CSV文件中读取条形码组合"""
        barcode_pairs = []
        with open(self.csv_file, 'r') as csvfile:
            csvreader = csv.reader(csvfile)
            next(csvreader)  # 跳过标题行
            for row in csvreader:
                barcode1, barcode2 = row[0].strip(), row[1].strip()
                barcode_pairs.append((barcode1, barcode2))
        return barcode_pairs

    def correct_sequence(self, sequence):
        """校正反向互补序列"""
        base_F = "ATCG"
        base_R = "TAGC"
        complement = {f: r for f, r in zip(base_F, base_R)}
        return ''.join(complement.get(base, base) for base in reversed(sequence))

    def output_files_exist(self):
        """检查所有输出文件是否已存在"""
        # 检查未匹配序列文件
        unmatched_file = os.path.join(self.output_directory, "unmatched_output.fastq")
        if not os.path.exists(unmatched_file):
            return False
        
        # 检查每个条形码的输出文件
        for barcode1, barcode2 in self.barcode_pairs:
            barcode_file = os.path.join(
                self.output_directory, 
                f"{barcode1}_{barcode2}_output.fastq"
            )
            if not os.path.exists(barcode_file):
                return False
                
        return True

    def classify_by_barcodes(self):
        """执行条形码分类（如果输出文件不存在）"""
        # 检查所有输出文件是否已存在
        if self.output_files_exist():
            print("所有输出文件已存在，跳过分类操作。")
            return
            
        # 打开未匹配序列文件
        unmatched_handle = open(os.path.join(self.output_directory, "unmatched_output.fastq"), "w")
        
        with open(self.input_fastq, "r") as handle:
            for record in tqdm.tqdm(SeqIO.parse(handle, "fastq"), desc="Processing sequences"):
                found_match = False
                
                # 正向匹配
                for barcode1, barcode2 in self.barcode_pairs:
                    if str(record.seq).startswith(barcode1) and str(record.seq).endswith(barcode2):
                        self._write_record(record, barcode1, barcode2)
                        found_match = True
                        break
                
                # 反向互补匹配
                if not found_match:
                    corrected_seq = self.correct_sequence(str(record.seq))
                    for barcode1, barcode2 in self.barcode_pairs:
                        if corrected_seq.startswith(barcode1) and corrected_seq.endswith(barcode2):
                            self._write_corrected_record(record, corrected_seq, barcode1, barcode2)
                            found_match = True
                            break
                
                # 未匹配序列
                if not found_match:
                    SeqIO.write(record, unmatched_handle, "fastq")

        # 关闭所有文件句柄
        for handle in self.barcode_handles.values():
            handle.close()
        unmatched_handle.close()
    
    def _write_record(self, record, barcode1, barcode2):
        """写入匹配的序列记录"""
        barcode_pair_name = f"{barcode1}_{barcode2}"
        if barcode_pair_name not in self.barcode_handles:
            file_path = os.path.join(self.output_directory, f"{barcode_pair_name}_output.fastq")
            self.barcode_handles[barcode_pair_name] = open(file_path, "w")
        SeqIO.write(record, self.barcode_handles[barcode_pair_name], "fastq")
    
    def _write_corrected_record(self, record, corrected_seq, barcode1, barcode2):
        """写入校正后的序列记录"""
        barcode_pair_name = f"{barcode1}_{barcode2}"
        if barcode_pair_name not in self.barcode_handles:
            file_path = os.path.join(self.output_directory, f"{barcode_pair_name}_output.fastq")
            self.barcode_handles[barcode_pair_name] = open(file_path, "w")
        
        # 创建校正后的记录（保留质量值）
        corrected_record = SeqRecord(
            Seq(corrected_seq),
            id=record.id,
            description=record.description,
            letter_annotations={"phred_quality": record.letter_annotations['phred_quality']}
        )
        SeqIO.write(corrected_record, self.barcode_handles[barcode_pair_name], "fastq")

In [6]:
def process_sequences(config_file):
    with open(config_file, 'r') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader)  # 跳过标题
        for row in tqdm.tqdm(csvreader):
            barcode1 = row[0].strip()
            barcode2 = row[1].strip()
            template = row[2].strip().upper()
            spacer = row[3].strip().upper()
            start_base = int(row[4].strip())
            end_base = int(row[5].strip())
            ind = template.index(spacer)
            sequences = [] 
            file_path = os.path.join(file, f"{barcode1}_{barcode2}_output.fastq")
            with open(file_path, "r") as seq_file:
                line_number = 0
                for line in seq_file:
                    line_number += 1
                    if line_number % 4 == 2:
                        sequence = line.strip()
                        extracted_sequence = sequence[ind+start_base:ind+end_base]
                        sequences.append(extracted_sequence)
    
            sequence_counts = Counter(sequences)

            os.makedirs(os.path.join(file, "ExtractSeq"), exist_ok=True)
            output_file_path = os.path.join(file, "ExtractSeq", os.path.basename(file_path).replace('.fastq', '_counts.csv'))
            with open(output_file_path, 'w', newline='') as output_csvfile:
                writer = csv.writer(output_csvfile)
                writer.writerow(['Extracted Sequence', 'Count'])  
                for seq, count in sequence_counts.items():
                    writer.writerow([seq, count])


In [7]:
def process_base_counts(config_file, output_file):
    results = []
    with open(config_file, 'r') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader)  # 跳过标题
        for row in tqdm.tqdm(csvreader):
            barcode1 = row[0].strip()
            barcode2 = row[1].strip()
            template = row[2].strip().upper()
            spacer = row[3].strip().upper()
            base_windows = int(row[6].strip())
            file_path = os.path.join(file, f"{barcode1}_{barcode2}_output.fastq")
            sequences = [] 
            with open(file_path, 'r') as countsfile:
                line_number = 0

    
                for line in countsfile:
                    line_number += 1
                    if line_number % 4 == 2:
                        sequences.append(line.strip())

            ind = template.index(spacer)
            base_windows_ind = ind + base_windows - 1

            num_A = 0
            num_T = 0
            num_C = 0
            num_G = 0

            for seq in sequences:
                if len(seq) > base_windows_ind:
                    if seq[base_windows_ind] == 'A':
                        num_A += 1
                    elif seq[base_windows_ind] == 'T':
                        num_T += 1
                    elif seq[base_windows_ind] == 'C':
                        num_C += 1
                    elif seq[base_windows_ind] == 'G':
                        num_G += 1

            results.append([file_path, spacer, base_windows, num_A, num_T, num_C, num_G])

    # 写入结果到CSV文件
    with open(output_file, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([ 'Spacer', 'Base Windows', 'A', 'T', 'C', 'G'])
        writer.writerows(results)


In [8]:
def process_all_sequences(config_file):
    """处理序列数据，保存spacer定位后的完整序列"""
    with open(config_file, 'r') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader)  # 跳过标题行
        for row in tqdm.tqdm(csvreader):
            # 解析配置参数
            barcode1 = row[0].strip()
            barcode2 = row[1].strip()
            template = row[2].strip().upper()
            spacer = row[3].strip().upper()
            
            # 定位spacer位置（用于验证序列有效性）
            try:
                ind = template.index(spacer)
            except ValueError:
                continue

            # 构建输入文件路径
            file_path = os.path.join(file,f"{barcode1}_{barcode2}_output.fastq")
            
            # 读取并保存完整序列
            sequences = []
            try:
                with open(file_path, "r") as seq_file:
                    for line_num, line in enumerate(seq_file, 1):
                        if line_num % 4 == 2:  # 序列行
                            sequences.append(line.strip())
            except FileNotFoundError:
                continue
                
            # 统计序列频率
            sequence_counts = Counter(sequences)
            
            # 输出结果到CSV
            os.makedirs(os.path.join(file, "AllSeq"), exist_ok=True)
            output_file_path = os.path.join(file, "AllSeq", os.path.basename(file_path).replace('.fastq', '_counts.csv'))
            with open(output_file_path, 'w', newline='') as output_csvfile:
                writer = csv.writer(output_csvfile)
                writer.writerow(['Full Sequence', 'Count'])
                for seq, count in sequence_counts.items():
                    writer.writerow([seq, count])

In [9]:
def process_surrounding_sequences(config_file, spacer_front=20, spacer_after=20):
    """处理序列数据，保存spacer定位区域前后特定bp的序列"""
    with open(config_file, 'r') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader)  # 跳过标题行
        for row in tqdm.tqdm(csvreader, desc="Processing spacer surrounding sequences"):
            # 解析配置参数
            barcode1 = row[0].strip()
            barcode2 = row[1].strip()
            template = row[2].strip().upper()
            spacer = row[3].strip().upper()
            
            # 定位spacer位置
            try:
                ind = template.index(spacer)
            except ValueError:
                continue

            # 构建输入文件路径
            file_path = os.path.join(file, f"{barcode1}_{barcode2}_output.fastq")
            
            # 读取并处理序列
            sequences = []
            try:
                with open(file_path, "r") as seq_file:
                    for line_num, line in enumerate(seq_file, 1):
                        if line_num % 4 == 2:  # 序列行
                            sequence = line.strip()
                            # 计算截取范围（含边界保护）
                            start_index = max(0, ind - spacer_front)
                            end_index = min(len(sequence), ind + len(spacer) + spacer_after)
                            extracted_sequence = sequence[start_index:end_index]
                            sequences.append(extracted_sequence)
            except FileNotFoundError:
                continue
                
            # 统计序列频率
            sequence_counts = Counter(sequences)
            
            # 输出结果到CSV
            os.makedirs(os.path.join(file, "SurroundingSeq"), exist_ok=True)
            output_file_path = os.path.join(
                file, "SurroundingSeq", 
                os.path.basename(file_path).replace('.fastq', '_counts.csv')
            )
            with open(output_file_path, 'w', newline='') as output_csvfile:
                writer = csv.writer(output_csvfile)
                writer.writerow(['Surrounding Sequence', 'Count'])
                for seq, count in sequence_counts.items():
                    writer.writerow([seq, count])

In [21]:
def NGS(file,purpose,spacer_front=20, spacer_after=20):
    input_forward  = file + "/F.fq.gz"
    input_reverse = file + "/R.fq.gz"
    output_merged = file + "/merged.fasta"
    merget(input_forward,input_reverse,output_merged)
    input_fastq = output_merged
    output_dir = file + "/fastqc_output"
    output_fastq = file + "/output_trimmed.fastq"
    QC_merger(input_fastq,output_dir,output_fastq)
    input_fastq = output_fastq
    tempalte_CSV = "tempalte_CSV.csv"
    output_directory = file
    classifier = BarcodeClassifier(input_fastq, tempalte_CSV, output_directory)
    classifier.classify_by_barcodes()
    if purpose == 1:
        config_file = tempalte_CSV
        process_sequences(config_file)
    elif purpose == 2:
        config_file = tempalte_CSV
        output_file = file+ "/results.csv"
        process_base_counts(config_file, output_file)
    elif purpose == 3:
        config_file = tempalte_CSV
        process_all_sequences(config_file)
    elif purpose == 4:
        config_file = tempalte_CSV
        process_surrounding_sequences(config_file, spacer_front=20, spacer_after=20)

上述程序直接点击运行即可，不要进行任何修改，如果想修改的话请复制后进行修改
二代测序数据下载后请自行解压，并且把文件名修改为F.fq和R.fq
上传后确定文件夹后，在file中输入上传的路径，注意所有的后续文件都会有在这个文件夹中生成，建议每次都新建一个文件夹
purpose只有三个选项有意义，1,2或者3，1是针对spacer定位的区域进行扫描并保存截取部分，2是针对碱基编辑器，3是针对spacer定位的区域进行扫描并将全部序列保存，4是针对spacer定位的区域前后特定bp的序列进行保存

In [11]:
file = "./WangHongLe"

In [12]:
strict_QC('../F.fq', '../R.fq', file, primer_f="", primer_r="")

null
null


Started analysis of F.fq
Started analysis of R.fq
Approx 5% complete for R.fq
Approx 5% complete for F.fq
Approx 10% complete for R.fq
Approx 10% complete for F.fq
Approx 15% complete for R.fq
Approx 15% complete for F.fq
Approx 20% complete for R.fq
Approx 20% complete for F.fq
Approx 25% complete for R.fq
Approx 25% complete for F.fq
Approx 30% complete for R.fq
Approx 30% complete for F.fq
Approx 35% complete for R.fq
Approx 35% complete for F.fq
Approx 40% complete for R.fq
Approx 40% complete for F.fq
Approx 45% complete for R.fq
Approx 45% complete for F.fq
Approx 50% complete for R.fq
Approx 50% complete for F.fq
Approx 55% complete for R.fq
Approx 55% complete for F.fq
Approx 60% complete for R.fq
Approx 60% complete for F.fq
Approx 65% complete for R.fq
Approx 65% complete for F.fq
Approx 70% complete for R.fq
Approx 70% complete for F.fq
Approx 75% complete for R.fq
Approx 75% complete for F.fq
Approx 80% complete for R.fq
Approx 80% complete for F.fq
Approx 85% complete for 

Analysis complete for R.fq
Analysis complete for F.fq
This is cutadapt 5.1 with Python 3.12.11
Command line parameters: -a AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT -A AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC -o ./WangHongLe/F.fq.gz -p ./WangHongLe/R.fq.gz --minimum-length 50 --max-n 0 --error-rate 0.1 --json=./WangHongLe/cutadapt.json --cores=14 ../F.fq ../R.fq
Processing paired-end reads on 14 cores ...

=== Summary ===

Total read pairs processed:         23,878,353
  Read 1 with adapter:                 622,758 (2.6%)
  Read 2 with adapter:                 666,149 (2.8%)

== Read fate breakdown ==
Pairs that were too short:                   0 (0.0%)
Pairs with too many N:                  42,721 (0.2%)
Pairs written (passing filters):    23,835,632 (99.8%)

Total basepairs processed: 7,163,505,900 bp
  Read 1: 3,581,752,950 bp
  Read 2: 3,581,752,950 bp
Total written (filtered):  7,146,596,545 bp (99.8%)
  Read 1: 3,573,367,445 bp
  Read 2: 3,573,229,100 bp

=== First read: Adapter 1 ===

Seque

Started analysis of F.fq.gz
Started analysis of R.fq.gz
Approx 5% complete for F.fq.gz
Approx 5% complete for R.fq.gz
Approx 10% complete for F.fq.gz
Approx 10% complete for R.fq.gz
Approx 15% complete for F.fq.gz
Approx 15% complete for R.fq.gz
Approx 20% complete for F.fq.gz
Approx 20% complete for R.fq.gz
Approx 25% complete for F.fq.gz
Approx 25% complete for R.fq.gz
Approx 30% complete for F.fq.gz
Approx 30% complete for R.fq.gz
Approx 35% complete for F.fq.gz
Approx 35% complete for R.fq.gz
Approx 40% complete for F.fq.gz
Approx 40% complete for R.fq.gz
Approx 45% complete for F.fq.gz
Approx 45% complete for R.fq.gz
Approx 50% complete for F.fq.gz
Approx 50% complete for R.fq.gz
Approx 55% complete for F.fq.gz
Approx 55% complete for R.fq.gz
Approx 60% complete for F.fq.gz
Approx 60% complete for R.fq.gz
Approx 65% complete for F.fq.gz
Approx 65% complete for R.fq.gz
Approx 70% complete for F.fq.gz
Approx 70% complete for R.fq.gz
Approx 75% complete for F.fq.gz
Approx 75% complet

Analysis complete for F.fq.gz
Analysis complete for R.fq.gz


In [13]:
purpose = 1

In [14]:
NGS(file,purpose)

vsearch v2.30.0_linux_x86_64, 503.5GB RAM, 104 cores
https://github.com/torognes/vsearch

Merging reads 100%
  23835632  Pairs
  23426884  Merged (98.3%)
    408748  Not merged (1.7%)

Pairs that failed merging due to various reasons:
    138834  too few kmers found on same diagonal
       561  multiple potential alignments
     93949  too many differences
    175314  alignment score too low, or score drop too high
        90  overlap too short

Statistics of all reads:
    149.91  Mean read length

Statistics of merged reads:
    212.74  Mean fragment length
     11.68  Standard deviation of fragment length
      0.21  Mean expected error in forward sequences
      0.21  Mean expected error in reverse sequences
      0.17  Mean expected error in merged sequences
      0.12  Mean observed errors in merged region of forward sequences
      0.09  Mean observed errors in merged region of reverse sequences
      0.21  Mean observed errors in merged region


双端合并完成 → ./WangHongLe/merged.fasta
null


Started analysis of merged.fasta
Approx 5% complete for merged.fasta
Approx 10% complete for merged.fasta
Approx 15% complete for merged.fasta
Approx 20% complete for merged.fasta
Approx 25% complete for merged.fasta
Approx 30% complete for merged.fasta
Approx 35% complete for merged.fasta
Approx 40% complete for merged.fasta
Approx 45% complete for merged.fasta
Approx 50% complete for merged.fasta
Approx 55% complete for merged.fasta
Approx 60% complete for merged.fasta
Approx 65% complete for merged.fasta
Approx 70% complete for merged.fasta
Approx 75% complete for merged.fasta
Approx 80% complete for merged.fasta
Approx 85% complete for merged.fasta
Approx 90% complete for merged.fasta
Approx 95% complete for merged.fasta


Analysis complete for merged.fasta


TrimmomaticSE: Started with arguments:
 -threads 14 -phred33 ./WangHongLe/merged.fasta ./WangHongLe/output_trimmed.fastq LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15
Input Reads: 23426884 Surviving: 23423487 (99.99%) Dropped: 3397 (0.01%)
TrimmomaticSE: Completed successfully
Processing sequences: 23423487it [21:40, 18004.59it/s]
31it [00:17,  1.78it/s]


In [15]:
purpose = 2

In [16]:
NGS(file,purpose)

文件 ./WangHongLe/merged.fasta 已存在，跳过合并操作
文件 ./WangHongLe/output_trimmed.fastq 已存在，跳过质量控制和修剪步骤
所有输出文件已存在，跳过分类操作。


31it [00:16,  1.90it/s]


In [17]:
purpose = 3

In [18]:
NGS(file,purpose)

文件 ./WangHongLe/merged.fasta 已存在，跳过合并操作
文件 ./WangHongLe/output_trimmed.fastq 已存在，跳过质量控制和修剪步骤
所有输出文件已存在，跳过分类操作。


31it [00:17,  1.76it/s]


In [22]:
purpose = 4
spacer_front=20
spacer_after=20

In [23]:
NGS(file,purpose,spacer_front,spacer_after)

文件 ./WangHongLe/merged.fasta 已存在，跳过合并操作
文件 ./WangHongLe/output_trimmed.fastq 已存在，跳过质量控制和修剪步骤
所有输出文件已存在，跳过分类操作。


Processing spacer surrounding sequences: 31it [00:21,  1.43it/s]
