Skip to content

Commit

Permalink
Add AsciiDoc content extraction
Browse files Browse the repository at this point in the history
This makes AsciiDoc content searchable.
Also simplify document title extraction. Don't try to make
up a title if there is no title.

Co-authored-by: Fabian Illner <fabian.illner@dm.de>
  • Loading branch information
jp7677 and fabianillnerdmde committed Oct 22, 2023
1 parent e7950d2 commit b571a9a
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ import com.structurizr.documentation.Section
import com.vladsch.flexmark.ast.Heading
import com.vladsch.flexmark.ast.Paragraph
import com.vladsch.flexmark.parser.Parser
import org.asciidoctor.Asciidoctor
import org.asciidoctor.Options
import org.asciidoctor.SafeMode
import org.asciidoctor.ast.ContentNode
import org.asciidoctor.ast.Document
import org.asciidoctor.ast.StructuralNode
import org.asciidoctor.converter.ConverterFor
import org.asciidoctor.converter.StringConverter

private val parser = Parser.builder().build()

Expand Down Expand Up @@ -41,4 +49,31 @@ private fun markdownText(content: String): String {
.trim()
}

private fun asciidocText(content: String): String = TODO("Content extraction is not implemented yet for AsciiDoc")
private fun asciidocText(content: String): String {
val asciidoctor = Asciidoctor.Factory.create()
asciidoctor.javaConverterRegistry().register(AsciiDocTextConverter::class.java)

val options = Options.builder().safe(SafeMode.SERVER).backend("text").build()
val text = asciidoctor.convert(content, options)
asciidoctor.shutdown()

return text.lines().joinToString(" ")
}

@ConverterFor("text")
class AsciiDocTextConverter(
backend: String?,
opts: Map<String?, Any?>?
) : StringConverter(backend, opts) {
override fun convert(node: ContentNode, transform: String?, o: Map<Any?, Any?>?): String? {
val transform1 = transform ?: node.nodeName
return if (node is Document)
node.content.toString()
else if (node is org.asciidoctor.ast.Section)
"${node.title}\n${node.content}"
else if (transform1 == "preamble" || transform1 == "paragraph")
(node as StructuralNode).content as String
else
null
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@ package nl.avisi.structurizr.site.generatr.site.model
import com.structurizr.documentation.Format
import com.structurizr.documentation.Section
import com.vladsch.flexmark.ast.Heading
import com.vladsch.flexmark.ast.Paragraph
import com.vladsch.flexmark.parser.Parser
import org.asciidoctor.Asciidoctor
import org.asciidoctor.Options
import org.asciidoctor.SafeMode

private val parser = Parser.builder().build()
private const val MAX_TITLE_LENGTH = 50

fun Section.contentTitle(): String = when (format) {
Format.Markdown -> markdownTitle()
Expand All @@ -21,24 +19,11 @@ fun Section.contentTitle(): String = when (format) {
private fun Section.markdownTitle(): String {
val document = parser.parse(content)

if (!document.hasChildren())
return "untitled document"

val header = document.children.firstOrNull { it is Heading }?.let { it as Heading }
if (header != null)
return header.text.toString()

val paragraph = document.children.firstOrNull { it is Paragraph }?.let { it as Paragraph }?.chars?.toString()
if (paragraph != null)
return if (paragraph.length > MAX_TITLE_LENGTH) {
val whitespacePosition = paragraph.withIndex()
.filter { it.value.isWhitespace() }
.lastOrNull { it.index < MAX_TITLE_LENGTH }
?.index
paragraph.take(whitespacePosition ?: MAX_TITLE_LENGTH)
} else paragraph

return "unknown document"
return "untitled document"
}

private fun Section.asciidocTitle(): String {
Expand All @@ -49,7 +34,5 @@ private fun Section.asciidocTitle(): String {
if (document.title != null && document.title.isNotEmpty())
return document.title

// TODO Content extraction is not implemented yet for AsciiDoc

return "untitled document"
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class ContentTextTest {
}

@ParameterizedTest
@ValueSource(strings = ["= header", "== header", "=== header"])
@ValueSource(strings = ["= header"])
fun `ignores asciidoc title`(content: String) {
val section = Section(Format.AsciiDoc, content)
assertThat(section.contentText()).isEqualTo("")
Expand All @@ -55,13 +55,13 @@ class ContentTextTest {

@Test
fun `markdown headers and paragraphs`() {
val section = Section(Format.Markdown, "# header\nsome content\n## subheader\nmore content")
val section = Section(Format.Markdown, "# header\n\nsome content\n\n## subheader\n\nmore content")
assertThat(section.contentText()).isEqualTo("subheader some content more content")
}

@Test
fun `asciidoc headers and paragraphs`() {
val section = Section(Format.AsciiDoc, "= header\nsome content\n== subheader\nmore content")
assertThat(section.contentText()).isEqualTo("subheader some content more content")
val section = Section(Format.AsciiDoc, "= header\n\nsome content\n\n== subheader\n\nmore content")
assertThat(section.contentText()).isEqualTo("some content subheader more content")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,31 +34,7 @@ class ContentTitleTest {
.map { format ->
DynamicTest.dynamicTest(format.name) {
val section = Section(format, "some content")
assertThat(section.contentTitle()).isEqualTo("some content")
}
}

@TestFactory
fun `long paragraph`() = listOf(Format.Markdown, Format.AsciiDoc)
.map { format ->
DynamicTest.dynamicTest(format.name) {
val section = Section(
format,
"some very very long content we really need to truncate since no one wants to read such an exhausting title"
)
assertThat(section.contentTitle()).isEqualTo("some very very long content we really need to")
}
}

@TestFactory
fun `long paragraph without whitespaces`() = listOf(Format.Markdown, Format.AsciiDoc)
.map { format ->
DynamicTest.dynamicTest(format.name) {
val section = Section(
format,
"some-very-very-long-content-we-really-need-to-truncate-since-no-one-wants-to-read-such-an-exhausting-title"
)
assertThat(section.contentTitle()).isEqualTo("some-very-very-long-content-we-really-need-to-trun")
assertThat(section.contentTitle()).isEqualTo("untitled document")
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class MenuViewModelTest : ViewModelTest() {
generatorContext.workspace.documentation.addSection(createSection("# Home"))
val section1 = createSection("# Doc 1")
.also { generatorContext.workspace.documentation.addSection(it) }
val section2 = createSection(" Doc Title 2")
val section2 = createSection("# Doc Title 2")
.also { generatorContext.workspace.documentation.addSection(it) }
val pageViewModel = createPageViewModel(generatorContext)
val viewModel = MenuViewModel(generatorContext, pageViewModel)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import kotlin.test.Test
class WorkspaceDocumentationSectionPageViewModelTest : ViewModelTest() {
@Test
fun url() {
val section = createSection("Some section With words and 1 number")
val section = createSection("# Some section With words and 1 number")

assertThat(WorkspaceDocumentationSectionPageViewModel.url(section))
.isEqualTo("/some-section-with-words-and-1-number")
Expand All @@ -18,7 +18,7 @@ class WorkspaceDocumentationSectionPageViewModelTest : ViewModelTest() {
fun `normalized url`() {
val generatorContext = generatorContext()
val viewModel = WorkspaceDocumentationSectionPageViewModel(
generatorContext, createSection("Some section With words and 1 number")
generatorContext, createSection("# Some section With words and 1 number")
)

assertThat(viewModel.url).isEqualTo("/some-section-with-words-and-1-number")
Expand Down

0 comments on commit b571a9a

Please sign in to comment.