Skip to content

Commit

Permalink
refactors XpdfUtils (#95)
Browse files Browse the repository at this point in the history
  • Loading branch information
codyfrehr committed Feb 25, 2024
1 parent 3fb7729 commit 8c20b92
Show file tree
Hide file tree
Showing 10 changed files with 220 additions and 143 deletions.
3 changes: 2 additions & 1 deletion README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
//TODO: add badge for code coverage?
//TODO: add badge for codeQL?

image::https://github.com/codyfrehr/xpdf-api/actions/workflows/ci.yml/badge.svg?event=push&branch=main[]
image:https://github.com/codyfrehr/xpdf-api/actions/workflows/ci.yml/badge.svg?event=push&branch=main[]
{empty}

Xpdf API is a collection of Java APIs for https://www.xpdfreader.com/about.html[Xpdf], the open source library for operating on PDF files written in C++.
Xpdf is an invaluable PDF toolkit, and this project aims to make it more accessible to the Java community.
Expand Down
53 changes: 2 additions & 51 deletions common-api/src/main/java/io/xpdf/api/common/util/XpdfUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,61 +30,12 @@
*/
public class XpdfUtils {

/**
* Gets the resource name of the <em>pdftotext</em> executable native to this system.
*
* @return executable resource name
* @since 1.0.0
*/
public static String getPdfTextExecutableResourceName() {
return String.format("xpdf/%s/%s", getTargetSystem(), getPdfTextExecutableName());
}

/**
* Gets the path where the native <em>pdftotext</em> executable should be copied so that it can be accessed by OS.
*
* @return executable path
* @since 1.0.0
*/
public static Path getPdfTextExecutablePath() {
return getXpdfTempPath().resolve("pdf-text").resolve("bin").resolve(getPdfTextExecutableName());
}

/**
* Gets the temporary directory where the <em>pdftotext</em> executable should write output.
*
* @return temporary directory
* @since 1.0.0
*/
public static Path getPdfTextTempOutputPath() {
return getXpdfTempPath().resolve("pdf-text").resolve("out");
}

/**
* Gets the maximum amount of time in seconds allotted to the <em>pdftotext</em> process before timing out.
*
* @return timeout length in seconds for process
* @since 1.0.0
*/
public static Integer getPdfTextTimeoutSeconds() {
return 30;
}

/**
* Gets the name of the native <em>pdftotext</em> executable.
*
* @return executable name
*/
protected static String getPdfTextExecutableName() {
return String.format("pdftotext%s", getTargetSystem().contains("windows") ? ".exe" : "");
}

/**
* Gets the temporary directory utilized by native <em>Xpdf</em> executables.
*
* @return temporary directory
*/
protected static Path getXpdfTempPath() {
public static Path getXpdfTempPath() {
return Paths.get(System.getProperty("java.io.tmpdir")).resolve( "xpdf-api");
}

Expand All @@ -94,7 +45,7 @@ protected static Path getXpdfTempPath() {
*
* @return representation of OS and bits
*/
protected static String getTargetSystem() {
public static String getTargetSystem() {
// get JVM bit architecture
val bit = System.getProperty("sun.arch.data.model");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package io.xpdf.api.common.util

import io.kotest.assertions.throwables.shouldThrowWithMessage
import io.kotest.matchers.shouldBe
import io.mockk.every
import io.mockk.mockkStatic
import io.mockk.unmockkStatic
import io.xpdf.api.common.exception.XpdfRuntimeException
Expand Down Expand Up @@ -50,61 +49,6 @@ class XpdfUtilsTest {
System.setProperty("os.name", originalOsName)
}

@Test
fun `should get pdf text executable resource name`() {
// given
every { XpdfUtils.getTargetSystem() } returns "targetSystem"
every { XpdfUtils.getPdfTextExecutableName() } returns "executableName"

// when then
XpdfUtils.getPdfTextExecutableResourceName() shouldBe "xpdf/targetSystem/executableName"
}

@Test
fun `should get pdf text executable path`() {
// given
every { XpdfUtils.getXpdfTempPath() } returns Paths.get("tempPath")
every { XpdfUtils.getPdfTextExecutableName() } returns "executableName"

// when then
XpdfUtils.getPdfTextExecutablePath() shouldBe Paths.get("tempPath", "pdf-text", "bin", "executableName")
}

@Test
fun `should get pdf text temp output path`() {
// given
every { XpdfUtils.getXpdfTempPath() } returns Paths.get("tempPath")

// when then
XpdfUtils.getPdfTextTempOutputPath() shouldBe Paths.get("tempPath", "pdf-text", "out")
}

@Test
fun `should get pdf text timeout seconds`() {
// given
every { XpdfUtils.getPdfTextTimeoutSeconds() } returns 99

// when then
XpdfUtils.getPdfTextTimeoutSeconds() shouldBe 99
}

@ParameterizedTest
@CsvSource(
"linux/bin32, pdftotext",
"linux/bin64, pdftotext",
"mac/bin64, pdftotext",
"windows/bin32, pdftotext.exe",
"windows/bin64, pdftotext.exe",
)
fun `should get pdf text executable name`(targetSystem: String,
executableName: String) {
// given
every { XpdfUtils.getTargetSystem() } returns targetSystem

// when then
XpdfUtils.getPdfTextExecutableName() shouldBe executableName
}

@Test
fun `should get xpdf temp path`() {
// when then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
*/
package io.xpdf.api.pdftext.autoconfigure;

import io.xpdf.api.common.util.XpdfUtils;
import io.xpdf.api.pdftext.PdfTextTool;
import io.xpdf.api.pdftext.util.PdfTextUtils;
import org.springframework.boot.autoconfigure.AutoConfiguration;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
Expand All @@ -40,11 +40,11 @@ public class PdfTextToolAutoConfiguration {
@ConditionalOnMissingBean
public PdfTextTool pdfTextTool(PdfTextToolProperties pdfTextToolProperties) {
Path executablePath = pdfTextToolProperties.getExecutablePath() == null
? XpdfUtils.getPdfTextExecutablePath()
? PdfTextUtils.getPdfTextExecutablePath()
: pdfTextToolProperties.getExecutablePath();

Integer timeoutSeconds = pdfTextToolProperties.getTimeoutSeconds() == null
? XpdfUtils.getPdfTextTimeoutSeconds()
? PdfTextUtils.getPdfTextTimeoutSeconds()
: pdfTextToolProperties.getTimeoutSeconds();

return PdfTextTool.builder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ import io.mockk.every
import io.mockk.mockk
import io.mockk.mockkStatic
import io.mockk.unmockkStatic
import io.xpdf.api.common.util.XpdfUtils
import io.xpdf.api.pdftext.PdfTextTool
import io.xpdf.api.pdftext.util.PdfTextUtils
import org.junit.jupiter.api.AfterEach
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
Expand Down Expand Up @@ -83,9 +83,9 @@ class PdfTextToolAutoConfigurationTest {
every { toFile() } returns executableFile
}

mockkStatic(XpdfUtils::class)
every { XpdfUtils.getPdfTextExecutablePath() } returns executablePath
every { XpdfUtils.getPdfTextTimeoutSeconds() } returns 99
mockkStatic(PdfTextUtils::class)
every { PdfTextUtils.getPdfTextExecutablePath() } returns executablePath
every { PdfTextUtils.getPdfTextTimeoutSeconds() } returns 99

context.register(PdfTextToolAutoConfiguration::class.java)
context.refresh()
Expand All @@ -97,7 +97,7 @@ class PdfTextToolAutoConfigurationTest {
pdfTextTool.executableFile shouldBe executableFile
pdfTextTool.timeoutSeconds shouldBe 99

unmockkStatic(XpdfUtils::class)
unmockkStatic(PdfTextUtils::class)
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import io.xpdf.api.common.XpdfTool;
import io.xpdf.api.common.exception.*;
import io.xpdf.api.common.util.XpdfUtils;
import io.xpdf.api.pdftext.util.PdfTextUtils;
import lombok.Builder;
import lombok.Getter;
import lombok.ToString;
Expand All @@ -39,7 +39,7 @@
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

import static io.xpdf.api.common.util.XpdfUtils.*;
import static io.xpdf.api.pdftext.util.PdfTextUtils.*;
import static java.util.Arrays.asList;
import static java.util.Collections.emptyList;

Expand Down Expand Up @@ -272,7 +272,7 @@ protected void validate(PdfTextRequest request) throws XpdfValidationException {
*
* @param request {@link PdfTextRequest}
* @return text file
* @throws IOException if canonical path of {@link XpdfUtils#getPdfTextTempOutputPath()} is invalid
* @throws IOException if canonical path of {@link PdfTextUtils#getPdfTextTempOutputPath()} is invalid
*/
protected File initializeTextFile(PdfTextRequest request) throws IOException {
final File textFile;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* PdfText API - An API for accessing a native pdftotext library.
* Copyright © 2024 xpdf.io (info@xpdf.io)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package io.xpdf.api.pdftext.util;

import io.xpdf.api.common.XpdfTool;
import io.xpdf.api.common.util.XpdfUtils;

import java.nio.file.Path;

/**
* Helpers for a {@link XpdfTool}.
*
* @since 1.0.0
*/
public class PdfTextUtils {

/**
* Gets the resource name of the <em>pdftotext</em> executable native to this system.
*
* @return executable resource name
* @since 1.0.0
*/
public static String getPdfTextExecutableResourceName() {
return String.format("xpdf/%s/%s", XpdfUtils.getTargetSystem(), getPdfTextExecutableName());
}

/**
* Gets the path where the native <em>pdftotext</em> executable should be copied so that it can be accessed by OS.
*
* @return executable path
* @since 1.0.0
*/
public static Path getPdfTextExecutablePath() {
return XpdfUtils.getXpdfTempPath().resolve("pdf-text").resolve("bin").resolve(getPdfTextExecutableName());
}

/**
* Gets the temporary directory where the <em>pdftotext</em> executable should write output.
*
* @return temporary directory
* @since 1.0.0
*/
public static Path getPdfTextTempOutputPath() {
return XpdfUtils.getXpdfTempPath().resolve("pdf-text").resolve("out");
}

/**
* Gets the maximum amount of time in seconds allotted to the <em>pdftotext</em> process before timing out.
*
* @return timeout length in seconds for process
* @since 1.0.0
*/
public static Integer getPdfTextTimeoutSeconds() {
return 30;
}

/**
* Gets the name of the native <em>pdftotext</em> executable.
*
* @return executable name
*/
protected static String getPdfTextExecutableName() {
return String.format("pdftotext%s", XpdfUtils.getTargetSystem().contains("windows") ? ".exe" : "");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import io.xpdf.api.common.util.XpdfUtils
import io.xpdf.api.pdftext.options.PdfTextEncoding
import io.xpdf.api.pdftext.options.PdfTextEndOfLine
import io.xpdf.api.pdftext.options.PdfTextFormat
import io.xpdf.api.pdftext.util.PdfTextUtils
import org.apache.commons.io.FileUtils
import java.io.File
import java.nio.file.Paths
Expand Down Expand Up @@ -58,7 +59,7 @@ class PdfTextToolCucumberSteps {
*/
@Given("a PdfTextTool with {int} second timeout and dynamic executable file")
fun `a PdfTextTool with TIMEOUT_SECONDS second timeout and dynamic executable file`(timeoutSeconds: Int) {
val executableResourceStream = this::class.java.classLoader.getResourceAsStream(XpdfUtils.getPdfTextExecutableResourceName())!!
val executableResourceStream = this::class.java.classLoader.getResourceAsStream(PdfTextUtils.getPdfTextExecutableResourceName())!!
val executableFile = Paths.get(System.getProperty("java.io.tmpdir")).resolve("some.exe").toFile()
FileUtils.copyInputStreamToFile(executableResourceStream, executableFile)
executableFile.setExecutable(true)
Expand Down

0 comments on commit 8c20b92

Please sign in to comment.