Skip to content

Commit

Permalink
feat: get images dimension during media analysis
Browse files Browse the repository at this point in the history
media analysis will get width and height for each image in a book
this is required for #123
  • Loading branch information
gotson committed Jul 31, 2020
1 parent 6dc1e19 commit f9d55ec
Show file tree
Hide file tree
Showing 13 changed files with 123 additions and 25 deletions.
@@ -0,0 +1,4 @@
alter table media_page
add column width int NULL;
alter table media_page
add column height int NULL;
Expand Up @@ -2,5 +2,6 @@ package org.gotson.komga.domain.model

data class BookPage(
val fileName: String,
val mediaType: String
val mediaType: String,
val dimension: Dimension? = null
)
@@ -0,0 +1,6 @@
package org.gotson.komga.domain.model

data class Dimension(
val width: Int,
val height: Int
)
Expand Up @@ -3,5 +3,6 @@ package org.gotson.komga.domain.model
data class MediaContainerEntry(
val name: String,
val mediaType: String? = null,
val comment: String? = null
val comment: String? = null,
val dimension: Dimension? = null
)
Expand Up @@ -51,7 +51,7 @@ class BookAnalyzer(
entry.mediaType?.let { contentDetector.isImage(it) } ?: false
}.let { (images, others) ->
Pair(
images.map { BookPage(it.name, it.mediaType!!) },
images.map { BookPage(it.name, it.mediaType!!, it.dimension) },
others
)
}
Expand Down
@@ -0,0 +1,28 @@
package org.gotson.komga.infrastructure.image

import mu.KotlinLogging
import org.gotson.komga.domain.model.Dimension
import org.springframework.stereotype.Service
import java.io.InputStream
import javax.imageio.ImageIO

private val logger = KotlinLogging.logger {}

@Service
class ImageAnalyzer {

fun getDimension(stream: InputStream): Dimension? =
stream.use {
ImageIO.createImageInputStream(stream).use { fis ->
val readers = ImageIO.getImageReaders(fis)
if (readers.hasNext()) {
val reader = readers.next()
reader.input = fis
Dimension(reader.getWidth(0), reader.getHeight(0))
} else {
logger.warn { "no reader found" }
null
}
}
}
}
@@ -1,6 +1,7 @@
package org.gotson.komga.infrastructure.jooq

import org.gotson.komga.domain.model.BookPage
import org.gotson.komga.domain.model.Dimension
import org.gotson.komga.domain.model.Media
import org.gotson.komga.domain.persistence.MediaRepository
import org.gotson.komga.jooq.Tables
Expand Down Expand Up @@ -94,16 +95,20 @@ class MediaDao(
p.BOOK_ID,
p.FILE_NAME,
p.MEDIA_TYPE,
p.NUMBER
).values(null as String?, null, null, null)
p.NUMBER,
p.WIDTH,
p.HEIGHT
).values(null as String?, null, null, null, null, null)
).also {
medias.forEach { media ->
media.pages.forEachIndexed { index, page ->
it.bind(
media.bookId,
page.fileName,
page.mediaType,
index
index,
page.dimension?.width,
page.dimension?.height
)
}
}
Expand Down Expand Up @@ -198,6 +203,7 @@ class MediaDao(
private fun MediaPageRecord.toDomain() =
BookPage(
fileName = fileName,
mediaType = mediaType
mediaType = mediaType,
dimension = if (width != null && height != null) Dimension(width, height) else null
)
}
Expand Up @@ -4,6 +4,7 @@ import mu.KotlinLogging
import org.apache.commons.compress.archivers.zip.ZipFile
import org.apache.commons.io.FilenameUtils
import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.jsoup.Jsoup
import org.springframework.stereotype.Service
import java.nio.file.Path
Expand All @@ -12,7 +13,11 @@ import java.nio.file.Paths
private val logger = KotlinLogging.logger {}

@Service
class EpubExtractor(contentDetector: ContentDetector) : ZipExtractor(contentDetector) {
class EpubExtractor(
private val zipExtractor: ZipExtractor,
private val contentDetector: ContentDetector,
private val imageAnalyzer: ImageAnalyzer
) : MediaContainerExtractor {

override fun mediaTypes(): List<String> = listOf("application/epub+zip")

Expand Down Expand Up @@ -41,17 +46,27 @@ class EpubExtractor(contentDetector: ContentDetector) : ZipExtractor(contentDete
}

return images.map { image ->
MediaContainerEntry(image.separatorsToUnix(), manifest.values.first {
val name = image.separatorsToUnix()
val mediaType = manifest.values.first {
it.href == (opfDir?.relativize(image) ?: image).separatorsToUnix()
}.mediaType)
}.mediaType
val dimension = if (contentDetector.isImage(mediaType))
imageAnalyzer.getDimension(zip.getInputStream(zip.getEntry(name)))
else
null
MediaContainerEntry(name = name, mediaType = mediaType, dimension = dimension)
}
} catch (e: Exception) {
logger.error(e) { "File is not a proper Epub, treating it as a zip file" }
return super.getEntries(path)
return zipExtractor.getEntries(path)
}
}
}

override fun getEntryStream(path: Path, entryName: String): ByteArray {
return zipExtractor.getEntryStream(path, entryName)
}

private fun getPackagePath(zip: ZipFile): String =
zip.getEntry("META-INF/container.xml").let { entry ->
val container = zip.getInputStream(entry).use { Jsoup.parse(it, null, "") }
Expand Down
@@ -1,16 +1,25 @@
package org.gotson.komga.infrastructure.mediacontainer

import mu.KotlinLogging
import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.pdmodel.PDPage
import org.apache.pdfbox.rendering.ImageType
import org.apache.pdfbox.rendering.PDFRenderer
import org.gotson.komga.domain.model.Dimension
import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.springframework.stereotype.Service
import java.io.ByteArrayOutputStream
import java.nio.file.Path
import javax.imageio.ImageIO
import kotlin.math.roundToInt

private val logger = KotlinLogging.logger {}

@Service
class PdfExtractor : MediaContainerExtractor {
class PdfExtractor(
private val imageAnalyzer: ImageAnalyzer
) : MediaContainerExtractor {

private val mediaType = "image/jpeg"
private val imageIOFormat = "jpeg"
Expand All @@ -21,19 +30,23 @@ class PdfExtractor : MediaContainerExtractor {
override fun getEntries(path: Path): List<MediaContainerEntry> =
PDDocument.load(path.toFile()).use { pdf ->
(0 until pdf.numberOfPages).map { index ->
MediaContainerEntry(index.toString(), mediaType)
val page = pdf.getPage(index)
val scale = page.getScale()
val dimension = Dimension((page.cropBox.width * scale).roundToInt(), (page.cropBox.height * scale).roundToInt())
MediaContainerEntry(name = index.toString(), mediaType = mediaType, dimension = dimension)
}
}

override fun getEntryStream(path: Path, entryName: String): ByteArray =
PDDocument.load(path.toFile()).use { pdf ->
val pageNumber = entryName.toInt()
val page = pdf.getPage(pageNumber)
val scale = resolution / minOf(page.cropBox.width, page.cropBox.height)
val image = PDFRenderer(pdf).renderImage(pageNumber, scale, ImageType.RGB)
val image = PDFRenderer(pdf).renderImage(pageNumber, page.getScale(), ImageType.RGB)
ByteArrayOutputStream().use { out ->
ImageIO.write(image, imageIOFormat, out)
out.toByteArray()
}
}

private fun PDPage.getScale() = resolution / minOf(cropBox.width, cropBox.height)
}
Expand Up @@ -3,17 +3,22 @@ package org.gotson.komga.infrastructure.mediacontainer
import com.github.junrar.Archive
import mu.KotlinLogging
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator
import org.apache.commons.io.input.TeeInputStream
import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.domain.model.MediaUnsupportedException
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.springframework.stereotype.Service
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.nio.file.Path
import java.util.Comparator

private val logger = KotlinLogging.logger {}

@Service
class RarExtractor(
private val contentDetector: ContentDetector
private val contentDetector: ContentDetector,
private val imageAnalyzer: ImageAnalyzer
) : MediaContainerExtractor {

private val natSortComparator: Comparator<String> = CaseInsensitiveSimpleNaturalComparator.getInstance()
Expand All @@ -27,12 +32,20 @@ class RarExtractor(
if (rar.mainHeader.isMultiVolume) throw MediaUnsupportedException("Multi-Volume RAR archives are not supported")
rar.fileHeaders
.filter { !it.isDirectory }
.map {
.map { hd ->
try {
MediaContainerEntry(name = it.fileName, mediaType = contentDetector.detectMediaType(rar.getInputStream(it)))
val buffer = ByteArrayOutputStream()
TeeInputStream(rar.getInputStream(hd), buffer).use { tee ->
val mediaType = contentDetector.detectMediaType(tee)
val dimension = if (contentDetector.isImage(mediaType))
imageAnalyzer.getDimension(ByteArrayInputStream(buffer.toByteArray()))
else
null
MediaContainerEntry(name = hd.fileName, mediaType = mediaType, dimension = dimension)
}
} catch (e: Exception) {
logger.warn(e) { "Could not analyze entry: ${it.fileName}" }
MediaContainerEntry(name = it.fileName, comment = e.message)
logger.warn(e) { "Could not analyze entry: ${hd.fileName}" }
MediaContainerEntry(name = hd.fileName, comment = e.message)
}
}
.sortedWith(compareBy(natSortComparator) { it.name })
Expand Down
Expand Up @@ -4,15 +4,17 @@ import mu.KotlinLogging
import net.greypanther.natsort.CaseInsensitiveSimpleNaturalComparator
import org.apache.commons.compress.archivers.zip.ZipFile
import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.springframework.stereotype.Service
import java.nio.file.Path
import java.util.*
import java.util.Comparator

private val logger = KotlinLogging.logger {}

@Service
class ZipExtractor(
private val contentDetector: ContentDetector
private val contentDetector: ContentDetector,
private val imageAnalyzer: ImageAnalyzer
) : MediaContainerExtractor {

private val natSortComparator: Comparator<String> = CaseInsensitiveSimpleNaturalComparator.getInstance()
Expand All @@ -25,7 +27,12 @@ class ZipExtractor(
.filter { !it.isDirectory }
.map {
try {
MediaContainerEntry(name = it.name, mediaType = contentDetector.detectMediaType(zip.getInputStream(it)))
val mediaType = contentDetector.detectMediaType(zip.getInputStream(it))
val dimension = if (contentDetector.isImage(mediaType))
imageAnalyzer.getDimension(zip.getInputStream(it))
else
null
MediaContainerEntry(name = it.name, mediaType = mediaType, dimension = dimension)
} catch (e: Exception) {
logger.warn(e) { "Could not analyze entry: ${it.name}" }
MediaContainerEntry(name = it.name, comment = e.message)
Expand Down
Expand Up @@ -248,7 +248,9 @@ class BookController(
Media.Status.OUTDATED -> throw ResponseStatusException(HttpStatus.NOT_FOUND, "Book is outdated and must be re-analyzed")
Media.Status.ERROR -> throw ResponseStatusException(HttpStatus.NOT_FOUND, "Book analysis failed")
Media.Status.UNSUPPORTED -> throw ResponseStatusException(HttpStatus.NOT_FOUND, "Book format is not supported")
Media.Status.READY -> media.pages.mapIndexed { index, s -> PageDto(index + 1, s.fileName, s.mediaType) }
Media.Status.READY -> media.pages.mapIndexed { index, s ->
PageDto(index + 1, s.fileName, s.mediaType, s.dimension?.width, s.dimension?.height)
}
}
} ?: throw ResponseStatusException(HttpStatus.NOT_FOUND)

Expand Down
Expand Up @@ -3,5 +3,7 @@ package org.gotson.komga.interfaces.rest.dto
data class PageDto(
val number: Int,
val fileName: String,
val mediaType: String
val mediaType: String,
val width: Int?,
val height: Int?
)

0 comments on commit f9d55ec

Please sign in to comment.