In [11]:
import re
import os
from fpdf import FPDF


def split_into_pages(text):
    pattern = r'(?=\[[0-9]{2}:[0-9]{2}:[0-9]{2}\])'
    parts = re.split(pattern, text)
    return [p.strip() for p in parts if p.strip()]


def txt_to_pdf(input_path, output_path,
               font_name='Arial', font_size=12, line_height=8,
               unicode_font_path=None, unicode_font_name='DejaVu'):
    
    with open(input_path, 'r', encoding='utf-8-sig') as f:
        content = f.read()

    pages = split_into_pages(content)
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)

    # Register Unicode font if provided, else use default
    if unicode_font_path:
        pdf.add_font(unicode_font_name, '', unicode_font_path, uni=True)
        pdf.set_font(unicode_font_name, size=font_size)
    else:
        pdf.set_font(font_name, size=font_size)

    for page in pages:
        pdf.add_page()
        for line in page.splitlines():
            pdf.multi_cell(0, line_height, line)

    pdf.output(output_path)
    print(f"Created PDF: {output_path}")


def process_directory(input_dir, output_dir,
                      font_name='Arial', font_size=12, line_height=8,
                      unicode_font_path=None, unicode_font_name='DejaVu'):
    
    os.makedirs(output_dir, exist_ok=True)
    for fname in os.listdir(input_dir):
        if fname.lower().endswith('.txt'):
            in_path = os.path.join(input_dir, fname)
            out_path = os.path.join(output_dir, os.path.splitext(fname)[0] + '.pdf')
            txt_to_pdf(in_path, out_path,
                       font_name, font_size, line_height,
                       unicode_font_path, unicode_font_name)

In [13]:
process_directory('../../RAG/data/etapart/videos/', '../../RAG/data/etapart/videos/')

Created PDF: ../../RAG/data/etapart/videos/Wartung einen ETAFlex MK3.pdf
Created PDF: ../../RAG/data/etapart/videos/Gasdetektion.pdf
Created PDF: ../../RAG/data/etapart/videos/Wartung Dunkelstrahler ETARay 3Hu 9m.pdf
Created PDF: ../../RAG/data/etapart/videos/Durchführung von Dichtheitsprüfung udn Abgasmessungen.pdf
Created PDF: ../../RAG/data/etapart/videos/Ausbau und Reinigung eines Abgasventilators GE133.pdf
Created PDF: ../../RAG/data/etapart/videos/Erstinbetriebnahme Dunkelstrahler ETARay 3HU 9m.pdf
Created PDF: ../../RAG/data/etapart/videos/Erstinbetriebnahme eines Robonect R-Gerätes G30.pdf
