From 7530d22b4fdad5ad6255a6f20a0185376757c1ba Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Mon, 18 Nov 2024 15:00:09 +0100 Subject: [PATCH] Introduce the notion of minimal parsing This will only read document instructions (yaml front matter, page TOC) from the document. --- .../DocumentationGenerator.cs | 2 +- src/Elastic.Markdown/IO/ConfigurationFile.cs | 2 +- src/Elastic.Markdown/IO/DocumentationFile.cs | 25 ++++----- .../IO/DocumentationFolder.cs | 6 +-- src/Elastic.Markdown/IO/DocumentationSet.cs | 3 +- src/Elastic.Markdown/IO/MarkdownFile.cs | 52 +++++++++++++------ .../Myst/FrontMatterParser.cs | 4 +- src/Elastic.Markdown/Myst/MarkdownParser.cs | 20 ++++--- src/Elastic.Markdown/Slices/HtmlWriter.cs | 3 +- src/docs-builder/Http/DocumentationWebHost.cs | 2 +- .../Directives/DirectiveBaseTests.cs | 2 +- .../Directives/ImageTests.cs | 2 +- .../Inline/InlneBaseTests.cs | 2 +- 13 files changed, 69 insertions(+), 56 deletions(-) diff --git a/src/Elastic.Markdown/DocumentationGenerator.cs b/src/Elastic.Markdown/DocumentationGenerator.cs index c85e07243..57fed5b66 100644 --- a/src/Elastic.Markdown/DocumentationGenerator.cs +++ b/src/Elastic.Markdown/DocumentationGenerator.cs @@ -124,7 +124,7 @@ await Parallel.ForEachAsync(DocumentationSet.Files, ctx, async (file, token) => var outputFile = OutputFile(file.RelativePath); if (file is MarkdownFile markdown) { - await markdown.ParseAsync(token); + await markdown.ParseFullAsync(token); await HtmlWriter.WriteAsync(outputFile, markdown, token); } else diff --git a/src/Elastic.Markdown/IO/ConfigurationFile.cs b/src/Elastic.Markdown/IO/ConfigurationFile.cs index aa95aa819..5c7002ce0 100644 --- a/src/Elastic.Markdown/IO/ConfigurationFile.cs +++ b/src/Elastic.Markdown/IO/ConfigurationFile.cs @@ -10,7 +10,7 @@ namespace Elastic.Markdown.IO; -public class ConfigurationFile : DocumentationFile +public record ConfigurationFile : DocumentationFile { private readonly IFileInfo _sourceFile; private readonly IDirectoryInfo _rootPath; diff --git a/src/Elastic.Markdown/IO/DocumentationFile.cs b/src/Elastic.Markdown/IO/DocumentationFile.cs index beff350dc..63e809156 100644 --- a/src/Elastic.Markdown/IO/DocumentationFile.cs +++ b/src/Elastic.Markdown/IO/DocumentationFile.cs @@ -5,24 +5,17 @@ namespace Elastic.Markdown.IO; -public abstract class DocumentationFile(IFileInfo sourceFile, IDirectoryInfo rootPath) +public abstract record DocumentationFile(IFileInfo SourceFile, IDirectoryInfo RootPath) { - public IFileInfo SourceFile { get; } = sourceFile; - public string RelativePath { get; } = Path.GetRelativePath(rootPath.FullName, sourceFile.FullName); - public string RelativeFolder { get; } = Path.GetRelativePath(rootPath.FullName, sourceFile.Directory!.FullName); - - public FileInfo OutputFile(IDirectoryInfo outputPath) => - new(Path.Combine(outputPath.FullName, RelativePath.Replace(".md", ".html"))); + public string RelativePath { get; } = Path.GetRelativePath(RootPath.FullName, SourceFile.FullName); + public string RelativeFolder { get; } = Path.GetRelativePath(RootPath.FullName, SourceFile.Directory!.FullName); } -public class ImageFile(IFileInfo sourceFile, IDirectoryInfo rootPath, string mimeType = "image/png") - : DocumentationFile(sourceFile, rootPath) -{ - public string MimeType { get; } = mimeType; -} +public record ImageFile(IFileInfo SourceFile, IDirectoryInfo RootPath, string MimeType = "image/png") + : DocumentationFile(SourceFile, RootPath); -public class StaticFile(IFileInfo sourceFile, IDirectoryInfo rootPath) - : DocumentationFile(sourceFile, rootPath); +public record StaticFile(IFileInfo SourceFile, IDirectoryInfo RootPath) + : DocumentationFile(SourceFile, RootPath); -public class ExcludedFile(IFileInfo sourceFile, IDirectoryInfo rootPath) - : DocumentationFile(sourceFile, rootPath); +public record ExcludedFile(IFileInfo SourceFile, IDirectoryInfo RootPath) + : DocumentationFile(SourceFile, RootPath); diff --git a/src/Elastic.Markdown/IO/DocumentationFolder.cs b/src/Elastic.Markdown/IO/DocumentationFolder.cs index a82e3753f..9e8f190ca 100644 --- a/src/Elastic.Markdown/IO/DocumentationFolder.cs +++ b/src/Elastic.Markdown/IO/DocumentationFolder.cs @@ -2,8 +2,6 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information -using Markdig.Helpers; - namespace Elastic.Markdown.IO; public class DocumentationFolder @@ -75,10 +73,10 @@ public async Task Resolve(Cancel ctx = default) { if (_resolved) return; - await Parallel.ForEachAsync(FilesInOrder, ctx, async (file, token) => await file.ParseAsync(token)); + await Parallel.ForEachAsync(FilesInOrder, ctx, async (file, token) => await file.MinimalParse(token)); await Parallel.ForEachAsync(GroupsInOrder, ctx, async (group, token) => await group.Resolve(token)); - await (Index?.ParseAsync(ctx) ?? Task.CompletedTask); + await (Index?.MinimalParse(ctx) ?? Task.CompletedTask); _resolved = true; } diff --git a/src/Elastic.Markdown/IO/DocumentationSet.cs b/src/Elastic.Markdown/IO/DocumentationSet.cs index 8f71f33ac..201c91416 100644 --- a/src/Elastic.Markdown/IO/DocumentationSet.cs +++ b/src/Elastic.Markdown/IO/DocumentationSet.cs @@ -1,9 +1,8 @@ // Licensed to Elasticsearch B.V under one or more agreements. // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information -using System.Globalization; + using System.IO.Abstractions; -using System.Text.Json; using Elastic.Markdown.Diagnostics; using Elastic.Markdown.Myst; diff --git a/src/Elastic.Markdown/IO/MarkdownFile.cs b/src/Elastic.Markdown/IO/MarkdownFile.cs index 36ae6efe7..1d7ec43cf 100644 --- a/src/Elastic.Markdown/IO/MarkdownFile.cs +++ b/src/Elastic.Markdown/IO/MarkdownFile.cs @@ -3,17 +3,15 @@ // See the LICENSE file in the project root for more information using System.IO.Abstractions; using Elastic.Markdown.Myst; -using Elastic.Markdown.Myst.Directives; using Elastic.Markdown.Slices; using Markdig; using Markdig.Extensions.Yaml; -using Markdig.Helpers; using Markdig.Syntax; using Slugify; namespace Elastic.Markdown.IO; -public class MarkdownFile : DocumentationFile +public record MarkdownFile : DocumentationFile { private readonly SlugHelper _slugHelper = new(); private string? _navigationTitle; @@ -21,7 +19,6 @@ public class MarkdownFile : DocumentationFile public MarkdownFile(IFileInfo sourceFile, IDirectoryInfo rootPath, MarkdownParser parser, BuildContext context) : base(sourceFile, rootPath) { - ParentFolders = RelativePath.Split(Path.DirectorySeparatorChar).SkipLast(1).ToArray(); FileName = sourceFile.Name; UrlPathPrefix = context.UrlPathPrefix; MarkdownParser = parser; @@ -29,7 +26,6 @@ public MarkdownFile(IFileInfo sourceFile, IDirectoryInfo rootPath, MarkdownParse public string? UrlPathPrefix { get; } private MarkdownParser MarkdownParser { get; } - private FrontMatterParser FrontMatterParser { get; } = new(); public YamlFrontMatter? YamlFrontMatter { get; private set; } public string? Title { get; private set; } public string? NavigationTitle @@ -38,16 +34,32 @@ public string? NavigationTitle private set => _navigationTitle = value; } - public List TableOfContents { get; } = new(); - public IReadOnlyList ParentFolders { get; } + private readonly List _tableOfContent = new(); + public IReadOnlyCollection TableOfContents => _tableOfContent; + public string FileName { get; } public string Url => $"{UrlPathPrefix}/{RelativePath.Replace(".md", ".html")}"; - public async Task ParseAsync(Cancel ctx) => await ParseFullAsync(ctx); + private bool _instructionsParsed; + + public async Task MinimalParse(Cancel ctx) + { + var document = await MarkdownParser.MinimalParseAsync(SourceFile, ctx); + ReadDocumentInstructions(document); + return document; + } public async Task ParseFullAsync(Cancel ctx) { - var document = await MarkdownParser.QuickParseAsync(SourceFile, ctx); + if (!_instructionsParsed) + await MinimalParse(ctx); + + var document = await MarkdownParser.ParseAsync(SourceFile, YamlFrontMatter, ctx); + return document; + } + + private void ReadDocumentInstructions(MarkdownDocument document) + { if (document.FirstOrDefault() is YamlFrontMatterBlock yaml) { var raw = string.Join(Environment.NewLine, yaml.Lines.Lines); @@ -63,14 +75,20 @@ public async Task ParseFullAsync(Cancel ctx) .Where(title => !string.IsNullOrWhiteSpace(title)) .Select(title => new PageTocItem { Heading = title!, Slug = _slugHelper.GenerateSlug(title) }) .ToList(); - TableOfContents.Clear(); - TableOfContents.AddRange(contents); - return document; + _tableOfContent.Clear(); + _tableOfContent.AddRange(contents); + _instructionsParsed = true; } - public async Task CreateHtmlAsync(YamlFrontMatter? matter, Cancel ctx) - { - var document = await MarkdownParser.ParseAsync(SourceFile, matter, ctx); - return document.ToHtml(MarkdownParser.Pipeline); - } + + public string CreateHtml(MarkdownDocument document) => + // var writer = new StringWriter(); + // var renderer = new HtmlRenderer(writer); + // renderer.LinkRewriter = (s => s); + // MarkdownParser.Pipeline.Setup(renderer); + // + // var document = MarkdownParser.Parse(markdown, pipeline); + // renderer.Render(document); + // writer.Flush(); + document.ToHtml(MarkdownParser.Pipeline); } diff --git a/src/Elastic.Markdown/Myst/FrontMatterParser.cs b/src/Elastic.Markdown/Myst/FrontMatterParser.cs index 7b4e87781..71efe322f 100644 --- a/src/Elastic.Markdown/Myst/FrontMatterParser.cs +++ b/src/Elastic.Markdown/Myst/FrontMatterParser.cs @@ -22,9 +22,9 @@ public class YamlFrontMatter public Dictionary? Properties { get; set; } } -public class FrontMatterParser +public static class FrontMatterParser { - public YamlFrontMatter Deserialize(string yaml) + public static YamlFrontMatter Deserialize(string yaml) { var input = new StringReader(yaml); diff --git a/src/Elastic.Markdown/Myst/MarkdownParser.cs b/src/Elastic.Markdown/Myst/MarkdownParser.cs index f4456aced..0176f2b69 100644 --- a/src/Elastic.Markdown/Myst/MarkdownParser.cs +++ b/src/Elastic.Markdown/Myst/MarkdownParser.cs @@ -1,6 +1,7 @@ // Licensed to Elasticsearch B.V under one or more agreements. // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information + using System.IO.Abstractions; using Cysharp.IO; using Elastic.Markdown.Myst.Comments; @@ -17,7 +18,13 @@ public class MarkdownParser(IDirectoryInfo sourcePath, BuildContext context) public IDirectoryInfo SourcePath { get; } = sourcePath; public BuildContext Context { get; } = context; - public MarkdownPipeline Pipeline => + public MarkdownPipeline MinimalPipeline { get; } = + new MarkdownPipelineBuilder() + .UseSubstitution() + .UseYamlFrontMatter() + .Build(); + + public MarkdownPipeline Pipeline { get; } = new MarkdownPipelineBuilder() .EnableTrackTrivia() .UsePreciseSourceLocation() @@ -30,17 +37,14 @@ public class MarkdownParser(IDirectoryInfo sourcePath, BuildContext context) .UseGridTables() .UsePipeTables() .UseDirectives() + .DisableHtml() .Build(); - // TODO only scan for yaml front matter and toc information - public Task QuickParseAsync(IFileInfo path, Cancel ctx) + public Task MinimalParseAsync(IFileInfo path, Cancel ctx) { - var context = new ParserContext(this, path, null, Context) - { - SkipValidation = true - }; - return ParseAsync(path, context, Pipeline, ctx); + var context = new ParserContext(this, path, null, Context) { SkipValidation = true }; + return ParseAsync(path, context, MinimalPipeline, ctx); } public Task ParseAsync(IFileInfo path, YamlFrontMatter? matter, Cancel ctx) diff --git a/src/Elastic.Markdown/Slices/HtmlWriter.cs b/src/Elastic.Markdown/Slices/HtmlWriter.cs index 8f49719de..462959c0c 100644 --- a/src/Elastic.Markdown/Slices/HtmlWriter.cs +++ b/src/Elastic.Markdown/Slices/HtmlWriter.cs @@ -40,7 +40,8 @@ private async Task RenderNavigation(MarkdownFile markdown, Cancel ctx = public async Task RenderLayout(MarkdownFile markdown, Cancel ctx = default) { - var html = await markdown.CreateHtmlAsync(markdown.YamlFrontMatter, ctx); + var document = await markdown.ParseFullAsync(ctx); + var html = markdown.CreateHtml(document); await DocumentationSet.Tree.Resolve(ctx); var navigationHtml = await RenderNavigation(markdown, ctx); var slice = Index.Create(new IndexViewModel diff --git a/src/docs-builder/Http/DocumentationWebHost.cs b/src/docs-builder/Http/DocumentationWebHost.cs index 946493b8c..7b5f8c773 100644 --- a/src/docs-builder/Http/DocumentationWebHost.cs +++ b/src/docs-builder/Http/DocumentationWebHost.cs @@ -70,7 +70,7 @@ private static async Task ServeDocumentationFile(ReloadableGeneratorSta { case MarkdownFile markdown: { - await markdown.ParseAsync(ctx); + await markdown.ParseFullAsync(ctx); var rendered = await generator.RenderLayout(markdown, ctx); return Results.Content(rendered, "text/html"); } diff --git a/tests/Elastic.Markdown.Tests/Directives/DirectiveBaseTests.cs b/tests/Elastic.Markdown.Tests/Directives/DirectiveBaseTests.cs index a8a482349..308d5bae2 100644 --- a/tests/Elastic.Markdown.Tests/Directives/DirectiveBaseTests.cs +++ b/tests/Elastic.Markdown.Tests/Directives/DirectiveBaseTests.cs @@ -84,7 +84,7 @@ public virtual async Task InitializeAsync() var collectTask = Task.Run(async () => await Collector.StartAsync(default), default); Document = await File.ParseFullAsync(default); - Html = await File.CreateHtmlAsync(File.YamlFrontMatter, default); + Html = File.CreateHtml(Document); Collector.Channel.TryComplete(); await collectTask; diff --git a/tests/Elastic.Markdown.Tests/Directives/ImageTests.cs b/tests/Elastic.Markdown.Tests/Directives/ImageTests.cs index 14d5d080e..2d4ca2f55 100644 --- a/tests/Elastic.Markdown.Tests/Directives/ImageTests.cs +++ b/tests/Elastic.Markdown.Tests/Directives/ImageTests.cs @@ -20,7 +20,7 @@ public class ImageBlockTests(ITestOutputHelper output) : DirectiveTest Task.CompletedTask;