-
Notifications
You must be signed in to change notification settings - Fork 0
/
replace_text_reflow.py
84 lines (64 loc) · 3.12 KB
/
replace_text_reflow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import fitz # Import the PyMuPDF library
def replace_text_and_reflow(original_pdf_path, output_pdf_path, tag, replacement_text):
"""
Replace a specified tag in a PDF with new text and reflow the text to fit the original layout.
Args:
original_pdf_path (str): The path to the original PDF file.
output_pdf_path (str): The path where the modified PDF will be saved.
tag (str): The text to be replaced in the PDF.
replacement_text (str): The text that will replace the tag.
Returns:
None
"""
# Open the original PDF
doc = fitz.open(original_pdf_path)
# Define a font and size for the replacement text
font = 'helv' # Helvetica font
font_size = 11 # Font size
# Iterate over each page in the document
for page in doc:
# Search for all instances of the tag on the page
text_instances = page.search_for(tag)
# Iterate over each found instance
for inst in text_instances:
# Remove the original tag by adding a redaction annotation
page.add_redact_annot(inst, text=replacement_text, fill=(1, 1, 1))
# Apply the redaction to remove the original text and add the new text
page.apply_redactions()
# Calculate the width of the replacement text
text_width = fitz.get_text_length(replacement_text, font, font_size)
# Create a new rectangle for the replacement text
new_rect = fitz.Rect(inst[0], inst[1], inst[0] + text_width, inst[3])
# Add the new text with the adjusted textbox size
page.insert_textbox(new_rect, replacement_text, fontname=font, fontsize=font_size)
# Save the modified document
doc.save(output_pdf_path)
def replace_text_in_pdf(original_pdf_path, output_pdf_path, tag, replacement_text):
"""
Replace specified text in a PDF file with new text.
This function searches for all instances of a given tag in the PDF and replaces it with the provided replacement text.
Unlike the `replace_text_and_reflow` function, this function does not reflow the text but overlays the new text over the old one.
Args:
original_pdf_path (str): The path to the original PDF file.
output_pdf_path (str): The path where the modified PDF will be saved.
tag (str): The text to be replaced in the PDF.
replacement_text (str): The text that will replace the tag.
Returns:
None
"""
# Open the original PDF
doc = fitz.open(original_pdf_path)
# Iterate over each page in the document
for page in doc:
# Search for all instances of the tag on the page
text_instances = page.search_for(tag)
# Iterate over each found instance
for inst in text_instances:
# Mark the area where the tag is found for redaction
page.add_redact_annot(inst, text=replacement_text)
# Apply redactions, which removes the marked text and adds the new text
page.apply_redactions()
# Save the modified document
doc.save(output_pdf_path)
# Example usage
# replace_text_and_reflow("path/to/original.pdf", "path/to/output.pdf", "old text", "new text")