In [None]:
import re
import csv
import os

corpus_path = "/content/17-18.txt"
if not os.path.exists(corpus_path):
    raise FileNotFoundError(f"Файл корпуса не найден: {corpus_path}")

with open(corpus_path, 'r', encoding='utf-8') as f:
    text = f.read()


terms_path = "/content/литургич_термины_clean4.txt"
if not os.path.exists(terms_path):
    raise FileNotFoundError(f"Файл терминов не найден: {terms_path}")

liturgical_terms = set()
with open(terms_path, 'r', encoding='utf-8') as f:
    for line in f:
        term = line.strip()
        if term:
            liturgical_terms.add(term.lower())

print(f"Загружено {len(liturgical_terms)} литургических терминов.")

sentences = re.split(r'(?<=[.!?])\s+', text.strip())

matches = []

for sent in sentences:
    if not sent.strip():
        continue
    sent_lower = sent.lower()
    found_terms = [term for term in liturgical_terms if term in sent_lower]
    if found_terms:
        matches.append({
            "sentence": sent,
            "found_terms": "; ".join(sorted(set(found_terms)))
        })

print(f"Нашел {len(matches)} предложений с литургическими терминами.")

output_path = "/content/liturgical_sentences.csv"
with open(output_path, "w", encoding="utf-8-sig", newline='') as f:
    writer = csv.DictWriter(f, fieldnames=["sentence", "found_terms"])
    writer.writeheader()
    writer.writerows(matches)

print(f"Результат сохранён: {output_path}")

Загружено 344 литургических терминов.
Найдено 35 предложений с литургическими терминами.
Результат сохранён: /content/liturgical_sentences.csv
