In [309]:
#функция для вытаскивания айдишников из файлов

def get_ids(filename):
    ids = []
    with open(filename, "r") as f:
        for line in f:
            line = line.strip()
            if line:
                id_part = line.split(":")[0]
                ids.append(id_part)
    return ids


In [140]:
#по айдишникам вытаскивать описание, продукт и транслят 
from Bio import SeqIO
from textwrap import wrap

def extract_to_fasta(filename, id_list, output_fasta):
    c=0
    with open(output_fasta, "w") as out:
        for record in SeqIO.parse(filename, "genbank"):
            acc = record.name
            if acc not in id_list:
                continue
            c=0
            definition = record.description
            for feature in record.features:
                if feature.type == "CDS":
                    c+=1
                    product = feature.qualifiers.get("product", ["unknown_product"])[0]
                    translation = feature.qualifiers.get("translation", [""])[0]
                    header = f">{acc}.{c}| product: {product} | {definition}"
                    out.write(header + "\n")
                    seq_lines = wrap(translation, 60)
                    out.write("\n".join(seq_lines) + "\n")

In [74]:
#по айдишникам вытаскивать описание и нуклеотидную последовательность (+потом транслят по трем рамкам считывание)

from Bio import SeqIO
from textwrap import wrap
from Bio.Seq import Seq

def extract_to_fasta_noCDS(filename, id_list, output_fasta):
    with open(output_fasta, "w") as out:
        for record in SeqIO.parse(filename, "genbank"):
            acc = record.name
            if acc not in id_list:
                continue
            definition = record.description
            nucl_seq = str(record.seq)
            for frame in range(3):
                sub_seq = nucl_seq[frame:]                              
                prot_seq = str(Seq(sub_seq).translate(to_stop=True))
                if len(prot_seq) < 10:
                    continue
                header_prot = f">{acc}.n+{frame} | {definition}"
                out.write(header_prot + "\n")
                for line in wrap(prot_seq, 50):
                    out.write(line + "\n")

In [263]:
#отбор неаннотированных CDS с квалификатором misc_feature
from Bio import SeqIO
gb_file = "Astroviridae_15102025.gb"
ids_with_misc_features = []
for record in SeqIO.parse(gb_file, "genbank"):
    for feature in record.features:
        if feature.type == "misc_feature":
            id_before_dot = record.id.split('.')[0]
            ids_with_misc_features.append(id_before_dot)
            break 


In [264]:
#скрипт для вытаскивания нуклеотидных последовательностей по айдишникам в фаста-файл 
from Bio import SeqIO
from textwrap import wrap

def extract_to_fasta_nucl(filename, id_list, ids_with_misc_features, output_fasta):
    with open(output_fasta, "w") as out:
        for record in SeqIO.parse(filename, "genbank"):
            acc = record.name.split('.')[0]
            if acc not in id_list or acc in ids_with_misc_features:
                continue
            
            definition = record.description
            nucl_seq = str(record.seq)
            
            header = f">{acc} | {definition}"
            out.write(header + "\n")
            for line in wrap(nucl_seq, 60):
                out.write(line + "\n")


In [40]:
conflict_annot_ids = get_ids("Astroviridae_15102025_conflictingannot.txt")
conflict_annot_ids
filename = "Astroviridae_15102025.gb"
output_fasta = "conflict_annot_ids.fasta"
extract_to_fasta(filename, conflict_annot_ids, output_fasta)


In [273]:
not_annot_CDS_ids = get_ids("Astroviridae_15102025_noannotCDS.txt")
len(not_annot_CDS_ids)

64

In [37]:
not_annot_targetCDS_ids = get_ids("Astroviridae_15102025_noannot_targetCDS.txt")
not_annot_targetCDS_ids
filename = "Astroviridae_15102025.gb"
output_fasta = "not_annot_targetCDS_ids.fasta"
extract_to_fasta(filename, not_annot_targetCDS_ids, output_fasta)


In [265]:
not_annot_CDS_ids = get_ids("Astroviridae_15102025_noannotCDS.txt")
not_annot_CDS_ids
filename = "Astroviridae_15102025.gb"
output_fasta = "not_annot_CDS_nucl.fasta"
extract_to_fasta_nucl(filename, not_annot_CDS_ids, ids_with_misc_features, output_fasta)

In [129]:
# для добавления название продукта в выдачу interpro
import pandas as pd
from Bio import SeqIO
tsv_file = "interpro_выдача/not_annot_targetCDS_ids_part2.tsv"
df = pd.read_csv(tsv_file, sep="\t")
fasta_file = "interpro_input/not_annot_targetCDS_ids_part2.fasta"
fasta_dict = {}
print(df["ID белка"])
for record in SeqIO.parse(fasta_file, "fasta"):
    parts = record.description.split("|", 1)
    if len(parts) > 1:
        fasta_id = parts[0]  # первое слово после '|'
    else:
        fasta_id = record.id  # fallback, если '|' нет
    fasta_id = f"{fasta_id}|"
    description = record.description
    product = None
    if "product:" in description:
        product = description.split("product:")[1].split("|")[0].strip()
    fasta_dict[fasta_id] = product

df["product"] = df["ID белка"].map(fasta_dict)
df = df[df["product"].notna()]

cols = df.columns.tolist()
cols.insert(1, cols.pop(cols.index("product")))  # вставляем 'product' на 2-е место
df = df[cols]
output_file = "not_annot_targetCDS_ids_part2_2.tsv"
df.to_csv(output_file, sep="\t", index=False)

0      OQ709194.1|
1      OQ709194.1|
2      OQ709194.1|
3      OQ709194.1|
4      OQ709194.1|
          ...     
540    MW347540.1|
541    MW347540.1|
542    MW347540.1|
543    MW347540.1|
544    MW347540.1|
Name: ID белка, Length: 545, dtype: object


In [135]:
#удаление неподходящих источников предсказания
import pandas as pd
tsv_file = "interpro_выдача/not_annot_targetCDS_ids_part2.tsv"
df = pd.read_csv(tsv_file, sep="\t")
df = df[df["Источник предсказания"] != "MobiDBLite"].copy()
df.to_csv("interpro_выдача/not_annot_targetCDS_ids_part2.tsv", sep="\t", index=False)

In [206]:
import pandas as pd

tsv_files = [
    "interpro_выдача/not_annot_targetCDS_ids_part1.tsv",
    "interpro_выдача/not_annot_targetCDS_ids_part2.tsv",
    "interpro_выдача/not_annot_CDS.tsv", 
    "interpro_выдача/conflict_annot_ids.tsv"
]

for tsv_file in tsv_files:
    df = pd.read_csv(tsv_file, sep="\t")
    pfam_superfamily_df = df[df['Источник предсказания'].isin(['Pfam', 'SUPERFAMILY'])]
    not_pfam_superfamily_df = df[~df['Источник предсказания'].isin(['Pfam', 'SUPERFAMILY'])]
    base_name = tsv_file.rsplit("/", 1)[-1].replace(".tsv", "")
    pfam_superfamily_file = f"interpro_выдача/{base_name}_pfam_superfamily.csv"
    not_pfam_superfamily_file = f"interpro_выдача/{base_name}_not_pfam_superfamily.csv"
    
    pfam_superfamily_df.to_csv(pfam_superfamily_file, index=False)
    not_pfam_superfamily_df.to_csv(not_pfam_superfamily_file, index=False)
    
    print(f"Обработан файл {tsv_file}: Pfam+Superfamily -> {pfam_superfamily_file}, Non-Pfam+Superfamily -> {not_pfam_superfamily_file}")


Обработан файл interpro_выдача/not_annot_targetCDS_ids_part1.tsv: Pfam+Superfamily -> interpro_выдача/not_annot_targetCDS_ids_part1_pfam_superfamily.csv, Non-Pfam+Superfamily -> interpro_выдача/not_annot_targetCDS_ids_part1_not_pfam_superfamily.csv
Обработан файл interpro_выдача/not_annot_targetCDS_ids_part2.tsv: Pfam+Superfamily -> interpro_выдача/not_annot_targetCDS_ids_part2_pfam_superfamily.csv, Non-Pfam+Superfamily -> interpro_выдача/not_annot_targetCDS_ids_part2_not_pfam_superfamily.csv
Обработан файл interpro_выдача/not_annot_CDS.tsv: Pfam+Superfamily -> interpro_выдача/not_annot_CDS_pfam_superfamily.csv, Non-Pfam+Superfamily -> interpro_выдача/not_annot_CDS_not_pfam_superfamily.csv
Обработан файл interpro_выдача/conflict_annot_ids.tsv: Pfam+Superfamily -> interpro_выдача/conflict_annot_ids_pfam_superfamily.csv, Non-Pfam+Superfamily -> interpro_выдача/conflict_annot_ids_not_pfam_superfamily.csv


In [291]:
import pandas as pd
df1 = pd.read_csv("interpro_выдача/not_annot_targetCDS_ids_part1.tsv", sep="\t")
df2 = pd.read_csv("interpro_выдача/not_annot_targetCDS_ids_part2.tsv", sep="\t")

merged_df = pd.concat([df1, df2], ignore_index=True)
merged_df.to_csv("interpro_выдача/not_annot_targetCDS_ids.tsv", sep="\t", index=False)


In [282]:
import pandas as pd
import os

def find_ORFs(csv_files, csv_file_orf, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    orf_df = pd.read_csv(csv_file_orf, header=None).fillna('')
    orf_list = orf_df[[0, 1]].values.tolist()

    for csv_file in csv_files:
        df = pd.read_csv(csv_file)
        all_results = []

        for _, row in df.iterrows():
            description = str(row.get('Описание предсказания', '')).lower()
            matched = False
            fixed_orfs = ['astrovirus vpg protein', 'rna helicase', 
                          'viral superfamily 1 rna helicase core domain', 'viral methyltransferase']
            if description in [x.lower() for x in fixed_orfs]:
                all_results.append({
                    'ID белка': row['ID белка'],
                    'Название семейства/белка': row.get('Название семейства/белка', ''),
                    'Описание предсказания': row.get('Описание предсказания', ''),
                    'Источник предсказания': row.get('Источник предсказания', ''),
                    'Вхождение из orf': '-',
                    'ORF': '1A',
                    'Start': row.get('Начало', ''),
                    'End': row.get('Конец', ''),
                    'Source File': csv_file
                })
                matched = True
            else:
                for keyword, orf in orf_list:
                    if keyword and keyword not in ['2', '3', 'S', 'NS']:
                        words = keyword.lower().split()
                        if any(word in description for word in words):
                            all_results.append({
                                'ID белка': row['ID белка'],
                                'Название семейства/белка': row.get('Название семейства/белка', ''),
                                'Описание предсказания': row.get('Описание предсказания', ''),
                                'Источник предсказания': row.get('Источник предсказания', ''),
                                'Вхождение из orf': keyword,
                                'ORF': orf,
                                'Start': row.get('Начало', ''),
                                'End': row.get('Конец', '')
                            })
                            matched = True
                            break

            if not matched:
                all_results.append({
                    'ID белка': row['ID белка'],
                    'Название семейства/белка': row.get('Название семейства/белка', ''),
                    'Описание предсказания': row.get('Описание предсказания', ''),
                    'Источник предсказания': row.get('Источник предсказания', ''),
                    'Вхождение из orf': '-',
                    'ORF': '-',
                    'Start': row.get('Начало', ''),
                    'End': row.get('Конец', '')
                })

        results_df = pd.DataFrame(all_results)
        base_name = os.path.basename(csv_file).rsplit(".", 1)[0]
        output_file = os.path.join(output_dir, f"{base_name}_orf_results.csv")
        results_df.to_csv(output_file, index=False)
        print(f"Файл {csv_file} обработан -> {output_file}")


In [232]:
csv_files = [
    "interpro_выдача/not_annot_targetCDS_ids_pfam_superfamily.csv",
    "interpro_выдача/not_annot_CDS_pfam_superfamily.csv", 
    "interpro_выдача/conflict_annot_ids_pfam_superfamily.csv"
]
csv_file_orf = "../ORF_names.csv"
output_dir = "interpro_выдача"
print(find_ORFs(csv_files, csv_file_orf,output_dir))

Файл interpro_выдача/not_annot_targetCDS_ids_pfam_superfamily.csv обработан -> interpro_выдача/not_annot_targetCDS_ids_pfam_superfamily_orf_results.csv
Файл interpro_выдача/not_annot_CDS_pfam_superfamily.csv обработан -> interpro_выдача/not_annot_CDS_pfam_superfamily_orf_results.csv
Файл interpro_выдача/conflict_annot_ids_pfam_superfamily.csv обработан -> interpro_выдача/conflict_annot_ids_pfam_superfamily_orf_results.csv
None


In [233]:
csv_files = [
    "interpro_выдача/not_annot_targetCDS_ids_not_pfam_superfamily.csv",
    "interpro_выдача/not_annot_CDS_not_pfam_superfamily.csv", 
    "interpro_выдача/conflict_annot_ids_not_pfam_superfamily.csv"
]
csv_file_orf = "../ORF_names.csv"
output_dir = "interpro_выдача"
print(find_ORFs(csv_files, csv_file_orf,output_dir))

Файл interpro_выдача/not_annot_targetCDS_ids_not_pfam_superfamily.csv обработан -> interpro_выдача/not_annot_targetCDS_ids_not_pfam_superfamily_orf_results.csv
Файл interpro_выдача/not_annot_CDS_not_pfam_superfamily.csv обработан -> interpro_выдача/not_annot_CDS_not_pfam_superfamily_orf_results.csv
Файл interpro_выдача/conflict_annot_ids_not_pfam_superfamily.csv обработан -> interpro_выдача/conflict_annot_ids_not_pfam_superfamily_orf_results.csv
None


In [254]:
#функуия для поиска айдишников, для которых interpro ничего не нашел

import pandas as pd
from Bio import SeqIO

def filter_fasta_by_csv(fasta_file, csv_file, output_txt):
    df = pd.read_csv(csv_file, sep='\t')
    ids_to_skip = set(str(x).split('|')[0].strip() for x in df['ID белка'])
    
    count_written = 0
    with open(output_txt, "w") as out_f:
        for record in SeqIO.parse(fasta_file, "fasta"):
            record_id = str(record.id).split("|")[0].strip()
            if record_id not in ids_to_skip:
                out_f.write(record.description + "\n")
                count_written += 1

    print(f"Готово! Записано {count_written} записей в {output_txt}")


In [256]:
all = [['interpro_input/conflict_annot_ids.fasta', 'interpro_выдача/conflict_annot_ids.tsv', 'not_find_interpro/conflict_annot_ids_not_find.txt'], 
       ['interpro_input/not_annot_targetCDS_ids.fasta', 'interpro_выдача/not_annot_targetCDS_ids.tsv', 'not_find_interpro/not_annot_targetCDS_ids_not_find.txt'],
       ['interpro_input/not_annot_CDS_ids.fasta', 'interpro_выдача/not_annot_CDS.tsv', 'not_find_interpro/not_annot_CDS_ids_not_find.txt']
]

for el in all:
    filter_fasta_by_csv(el[0], el[1], el[2])

Готово! Записано 3 записей в not_find_interpro/conflict_annot_ids_not_find.txt
Готово! Записано 13 записей в not_find_interpro/not_annot_targetCDS_ids_not_find.txt
Готово! Записано 42 записей в not_find_interpro/not_annot_CDS_ids_not_find.txt


In [274]:
# Подсчет количества последовательностей в FASTA-файле

def count_fasta_sequences(fasta_path):
    count = 0
    with open(fasta_path, 'r') as f:
        for line in f:
            if line.startswith('>'):
                count += 1
    return count


# Пример использования:
fasta_file = "not_annot_CDS_nucl_ORFs.fasta"
n_sequences = count_fasta_sequences(fasta_file)
print(f"Количество последовательностей в файле {fasta_file}: {n_sequences}")


Количество последовательностей в файле not_annot_CDS_nucl_ORFs.fasta: 395


In [275]:
# Разделение FASTA-файла на части по 100 последовательностей

def split_fasta(fasta_path, output_prefix, seqs_per_file=100):
    """
    Делит FASTA-файл на несколько частей, каждая из которых содержит seqs_per_file последовательностей.
    
    fasta_path: путь к исходному FASTA-файлу
    output_prefix: префикс имени выходных файлов (например, 'part')
    seqs_per_file: количество последовательностей в одном файле
    """
    part_num = 1
    seq_count = 0
    out = None

    with open(fasta_path, 'r') as f:
        for line in f:
            if line.startswith('>'):
                if seq_count % seqs_per_file == 0:
                    if out:
                        out.close()
                    out_path = f"{output_prefix}_part{part_num}.fasta"
                    out = open(out_path, 'w')
                    print(f"Создан файл: {out_path}")
                    part_num += 1
                seq_count += 1
            out.write(line)
    
    if out:
        out.close()
    print(f"Готово! Всего последовательностей: {seq_count}, файлов создано: {part_num - 1}")

fasta_file = "not_annot_CDS_nucl_ORFs.fasta"
split_fasta(fasta_file, output_prefix="split", seqs_per_file=100)


Создан файл: split_part1.fasta
Создан файл: split_part2.fasta
Создан файл: split_part3.fasta
Создан файл: split_part4.fasta
Готово! Всего последовательностей: 395, файлов создано: 4


In [278]:
import pandas as pd

def merge_tsv_files(file_list, output_file):
    dfs = [pd.read_csv(f, sep='\t') for f in file_list]
    merged_df = pd.concat(dfs, ignore_index=True)
    merged_df.to_csv(output_file, sep='\t', index=False)

merge_tsv_files(
    ['interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_prt1_output.tsv', 'interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_prt2_output.tsv', 'interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_prt3_output.tsv', 'interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_prt4_output.tsv'], 
    'interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_merged_output.tsv'
)


In [304]:
import pandas as pd
import csv

def merge_tsv_to_csv(file_list, output_csv):
    dfs = []
    max_cols = 0
    for f in file_list:
        with open(f, 'r', encoding='utf-8-sig') as fh:
            reader = csv.reader(fh, delimiter='\t')
            max_in_file = max(len(row) for row in reader)
            max_cols = max(max_cols, max_in_file)
    for f in file_list:
        df = pd.read_csv(
            f,
            sep='\t',
            header=None,
            names=[f'col_{i+1}' for i in range(max_cols)],
            engine='python',
            quoting=csv.QUOTE_NONE,
            dtype=str
        )
        dfs.append(df)

    merged = pd.concat(dfs, ignore_index=True)
    merged.to_csv(output_csv, index=False, header=False, encoding='utf-8')

merge_tsv_to_csv(
    ['interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_prt1_output.tsv', 'interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_prt2_output.tsv', 'interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_prt3_output.tsv', 'interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_prt4_output.tsv'], 
    'interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_merged_output.tsv'
)


In [308]:
csv_files = [
    "interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_merged_output.csv"
]
csv_file_orf = "../ORF_names.csv"
output_dir = "interpro_выдача"
print(find_ORFs(csv_files, csv_file_orf,output_dir))

Файл interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_merged_output.csv обработан -> interpro_выдача/not_annot_CDS_nucl_ORFs_merged_output_orf_results.csv
None


In [307]:
import pandas as pd

df = pd.read_csv('interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_merged_output.tsv', sep='\t')
df.to_csv('interpro_выдача/notannot_CDS_interpro/not_annot_CDS_nucl_ORFs_merged_output.csv', index=False)


In [310]:
notannotCDS_ids = get_ids("Astroviridae_15102025_noannotCDS.txt")
notannotCDS_ids

['PQ110289',
 'PP512783',
 'OR800315',
 'OP474168',
 'MN733910',
 'MN036159',
 'MN036131',
 'MN035893',
 'MN035838',
 'MN035574',
 'MN035429',
 'MN035369',
 'MN034849',
 'MN034422',
 'MN034197',
 'MN033907',
 'MN033591',
 'MN033177',
 'MN033141',
 'MN032977',
 'HV511037',
 'HUANSSPS']

In [311]:
notannotCDS_target_ids = get_ids("Astroviridae_15102025_noannot_targetCDS.txt")
notannotCDS_target_ids

['PX395425',
 'PX289198',
 'PX289197',
 'PX289196',
 'PQ421854',
 'PQ421853',
 'PQ421852',
 'PQ421848',
 'PQ421847',
 'PQ421846',
 'PQ421845',
 'PQ421844',
 'PQ421843',
 'PQ421842',
 'PQ421841',
 'PP211223',
 'PQ150499',
 'PQ161557',
 'PQ055527',
 'PQ110289',
 'PP512783',
 'OR800315',
 'OQ802761',
 'OQ709194',
 'OQ709193',
 'OQ709192',
 'OQ709191',
 'OQ709190',
 'OQ709189',
 'OQ709188',
 'ON932807',
 'OP413956',
 'OP413950',
 'OP474168',
 'OM514376',
 'MT568535',
 'MZ182272',
 'MZ182271',
 'MZ291967',
 'MZ443626',
 'MW924358',
 'MW924357',
 'MW924356',
 'MW853972',
 'MW645022',
 'MW645021',
 'MW347540',
 'MW346737',
 'MN841288',
 'MN733910',
 'NC_040647',
 'MN036159',
 'MN036131',
 'MN035893',
 'MN035838',
 'MN035574',
 'MN035429',
 'MN035369',
 'MN034849',
 'MN034422',
 'MN034197',
 'MN033907',
 'MN033591',
 'MN033177',
 'MN033141',
 'MN032977',
 'MH188020',
 'KX290465',
 'HV511037',
 'HUANSSPS']

In [314]:
diff = list(set(notannotCDS_target_ids) - set(notannotCDS_ids))
print(diff)


['OQ709190', 'PQ421845', 'MW853972', 'OQ709193', 'MW924357', 'PQ055527', 'OQ709192', 'OQ709189', 'PX395425', 'PX289196', 'MZ443626', 'OM514376', 'PQ421854', 'MW346737', 'PQ421853', 'PX289197', 'MN841288', 'PQ421852', 'PQ421843', 'OP413956', 'MW645022', 'MH188020', 'NC_040647', 'ON932807', 'MZ182271', 'PQ421846', 'PX289198', 'MW924358', 'MT568535', 'MZ291967', 'MW924356', 'PQ421841', 'OQ709191', 'PP211223', 'MZ182272', 'OQ709188', 'MW347540', 'PQ150499', 'PQ421848', 'PQ421847', 'OP413950', 'OQ709194', 'KX290465', 'PQ161557', 'OQ802761', 'PQ421844', 'PQ421842', 'MW645021']


In [317]:
import pandas as pd

# Пути к файлам
file1 = "interpro_выдача/pfam and superfamily ORFs/not_annot_targetCDS_ids_pfam_superfamily_orf_results.csv"
file2 = "interpro_выдача/not pfam and superfamily ORFs/not_annot_targetCDS_ids_not_pfam_superfamily_orf_results.csv"
output = "not_annot_targetCDS_summary_orfs.csv"
id_col = "ID белка"

df1 = pd.read_csv(file1)
df2 = pd.read_csv(file2)
df1["id_prefix"] = df1[id_col].astype(str).str.split(".").str[0]
df2["id_prefix"] = df2[id_col].astype(str).str.split(".").str[0]
result_rows = []

for full_id in diff:
    prefix = full_id.split(".")[0]
    matches1 = df1[df1["id_prefix"] == prefix].copy()
    matches2 = df2[df2["id_prefix"] == prefix].copy()
    if not matches1.empty or not matches2.empty:
        matches = pd.concat([matches1, matches2], ignore_index=True)
        result_rows.append(matches)
    else:
        empty_row = pd.DataFrame([{id_col: full_id}])
        result_rows.append(empty_row)


result = pd.concat(result_rows, ignore_index=True)
result.to_csv(output, index=False)
print(f"✅ Готово! Все совпадения и пустые строки записаны в {output}")


✅ Готово! Все совпадения и пустые строки записаны в not_annot_targetCDS_summary_orfs.csv
