Skip to content

Commit

Permalink
perf: page streaming performance
Browse files Browse the repository at this point in the history
cache the zip or pdf document to avoid recreating it at every page access
  • Loading branch information
gotson committed May 24, 2021
1 parent 02f61ba commit 8de01a6
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 13 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.gotson.komga.infrastructure.mediacontainer

import com.github.benmanes.caffeine.cache.Caffeine
import mu.KotlinLogging
import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.pdmodel.PDPage
Expand All @@ -11,6 +12,7 @@ import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.springframework.stereotype.Service
import java.io.ByteArrayOutputStream
import java.nio.file.Path
import java.util.concurrent.TimeUnit
import javax.imageio.ImageIO
import kotlin.math.roundToInt

Expand All @@ -25,6 +27,12 @@ class PdfExtractor(
private val imageIOFormat = "jpeg"
private val resolution = 1536F

private val cache = Caffeine.newBuilder()
.maximumSize(20)
.expireAfterAccess(1, TimeUnit.MINUTES)
.removalListener { _: Path?, pdf: PDDocument?, _ -> pdf?.close() }
.build<Path, PDDocument>()

override fun mediaTypes(): List<String> = listOf("application/pdf")

override fun getEntries(path: Path): List<MediaContainerEntry> =
Expand All @@ -37,16 +45,16 @@ class PdfExtractor(
}
}

override fun getEntryStream(path: Path, entryName: String): ByteArray =
PDDocument.load(path.toFile()).use { pdf ->
val pageNumber = entryName.toInt()
val page = pdf.getPage(pageNumber)
val image = PDFRenderer(pdf).renderImage(pageNumber, page.getScale(), ImageType.RGB)
ByteArrayOutputStream().use { out ->
ImageIO.write(image, imageIOFormat, out)
out.toByteArray()
}
override fun getEntryStream(path: Path, entryName: String): ByteArray {
val pdf = cache.get(path) { PDDocument.load(path.toFile()) }!!
val pageNumber = entryName.toInt()
val page = pdf.getPage(pageNumber)
val image = PDFRenderer(pdf).renderImage(pageNumber, page.getScale(), ImageType.RGB)
return ByteArrayOutputStream().use { out ->
ImageIO.write(image, imageIOFormat, out)
out.toByteArray()
}
}

private fun PDPage.getScale() = resolution / minOf(cropBox.width, cropBox.height)
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
package org.gotson.komga.infrastructure.mediacontainer

import com.github.benmanes.caffeine.cache.Caffeine
import mu.KotlinLogging
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator
import org.apache.commons.compress.archivers.zip.ZipFile
import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.springframework.stereotype.Service
import java.nio.file.Path
import java.util.concurrent.TimeUnit

private val logger = KotlinLogging.logger {}

Expand All @@ -16,6 +18,12 @@ class ZipExtractor(
private val imageAnalyzer: ImageAnalyzer
) : MediaContainerExtractor {

private val cache = Caffeine.newBuilder()
.maximumSize(20)
.expireAfterAccess(1, TimeUnit.MINUTES)
.removalListener { _: Path?, zip: ZipFile?, _ -> zip?.close() }
.build<Path, ZipFile>()

private val natSortComparator: Comparator<String> = CaseInsensitiveSimpleNaturalComparator.getInstance()

override fun mediaTypes(): List<String> = listOf("application/zip")
Expand All @@ -42,8 +50,8 @@ class ZipExtractor(
.sortedWith(compareBy(natSortComparator) { it.name })
}

override fun getEntryStream(path: Path, entryName: String): ByteArray =
ZipFile(path.toFile()).use { zip ->
zip.getInputStream(zip.getEntry(entryName)).use { it.readBytes() }
}
override fun getEntryStream(path: Path, entryName: String): ByteArray {
val zip = cache.get(path) { ZipFile(path.toFile()) }!!
return zip.getInputStream(zip.getEntry(entryName)).use { it.readBytes() }
}
}

0 comments on commit 8de01a6

Please sign in to comment.