In [9]:
library(magick)
library(tesseract)
library(pdftools)

# Parámetros
text      <- "Hello OCR!"
font_size <- 100
font      <- "Arial"
padding   <- 30  # padding más generoso para evitar recorte

# Crear un canvas suficientemente grande (estimación)
canvas_width  <- 600
canvas_height <- 200

canvas <- image_blank(width = canvas_width,
                      height = canvas_height,
                      color = "white")

# Añadir texto centrado
annotation <- image_annotate(
  canvas,
  text,
  font  = font,
  size  = font_size,
  color = "black",
  gravity = "NorthWest",  # posicionar en esquina superior izquierda
  location = paste0("+", padding, "+", padding)
)

# Guardar PNG
png_path <- "20250701_big_text.png"
image_write(annotation, path = png_path, format = "png")

# Guardar PDF
pdf_path <- "20250701_big_text.pdf"
image_write(annotation, path = pdf_path, format = "pdf")

# Convertir PDF a PNG con pdftools (más confiable)
png_from_pdf <- "20250701_big_text_from_pdf.png"
pdf_convert(pdf_path, format = "png", pages = 1, filenames = png_from_pdf, dpi = 300)

# Leer imagen convertida
pages <- image_read(png_from_pdf)

# OCR con tesseract
ocr_text <- tesseract::ocr(pages)

# Guardar texto OCR
txt_path <- "20250701_ocr_output.txt"
writeLines(ocr_text, con = txt_path)

message("Generated files: ", png_path, ", ", pdf_path, ", ", txt_path)



"2 arguments not used by format '20250701_big_text_from_pdf.png'"


Converting page 1 to 20250701_big_text_from_pdf.png... done!


Generated files: 20250701_big_text.png, 20250701_big_text.pdf, 20250701_ocr_output.txt

