From 411c5907ac442d31aae21712ca09217cc181993d Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Mon, 19 May 2025 13:30:36 +0200 Subject: [PATCH 1/5] stage --- Directory.Packages.props | 3 +- docs-builder.sln | 7 +++ .../Elastic.Documentation.Search.csproj | 17 ++++++ .../IngestCollector.cs | 61 +++++++++++++++++++ .../DocumentationGenerator.cs | 9 +++ .../Exporters/IMarkdownExporter.cs | 12 ++++ .../Building/AssemblerBuilder.cs | 10 ++- .../docs-assembler/Cli/RepositoryCommands.cs | 2 +- .../Indexing/ElasticsearchMarkdownExporter.cs | 30 +++++++++ .../docs-assembler/docs-assembler.csproj | 1 + src/tooling/docs-builder/Cli/Commands.cs | 2 +- tests/authoring/Framework/Setup.fs | 2 +- 12 files changed, 151 insertions(+), 5 deletions(-) create mode 100644 src/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj create mode 100644 src/Elastic.Documentation.Search/IngestCollector.cs create mode 100644 src/Elastic.Markdown/Exporters/IMarkdownExporter.cs create mode 100644 src/tooling/docs-assembler/Indexing/ElasticsearchMarkdownExporter.cs diff --git a/Directory.Packages.props b/Directory.Packages.props index dd121ad7c..ce195413b 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -16,6 +16,7 @@ + @@ -63,4 +64,4 @@ - + \ No newline at end of file diff --git a/docs-builder.sln b/docs-builder.sln index 3dff4f102..5c24bfe79 100644 --- a/docs-builder.sln +++ b/docs-builder.sln @@ -94,6 +94,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "assembler-filter", "assembl actions\assembler-filter\action.yml = actions\assembler-filter\action.yml EndProjectSection EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elastic.Documentation.Search", "src\Elastic.Documentation.Search\Elastic.Documentation.Search.csproj", "{052F70DE-CA5A-45F9-800B-E13CFEAE262C}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -155,6 +157,10 @@ Global {CD94F9E4-7FCD-4152-81F1-4288C6B75367}.Debug|Any CPU.Build.0 = Debug|Any CPU {CD94F9E4-7FCD-4152-81F1-4288C6B75367}.Release|Any CPU.ActiveCfg = Release|Any CPU {CD94F9E4-7FCD-4152-81F1-4288C6B75367}.Release|Any CPU.Build.0 = Release|Any CPU + {052F70DE-CA5A-45F9-800B-E13CFEAE262C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {052F70DE-CA5A-45F9-800B-E13CFEAE262C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {052F70DE-CA5A-45F9-800B-E13CFEAE262C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {052F70DE-CA5A-45F9-800B-E13CFEAE262C}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(NestedProjects) = preSolution {4D198E25-C211-41DC-9E84-B15E89BD7048} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A} @@ -178,5 +184,6 @@ Global {059E787F-85C1-43BE-9DD6-CE319E106383} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A} {7D36DDDA-9E0B-4D2C-8033-5D62FF8B6166} = {059E787F-85C1-43BE-9DD6-CE319E106383} {FB1C1954-D8E2-4745-BA62-04DD82FB4792} = {245023D2-D3CA-47B9-831D-DAB91A2FFDC7} + {052F70DE-CA5A-45F9-800B-E13CFEAE262C} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A} EndGlobalSection EndGlobal diff --git a/src/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj b/src/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj new file mode 100644 index 000000000..73f028e53 --- /dev/null +++ b/src/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj @@ -0,0 +1,17 @@ + + + + net9.0 + enable + enable + + + + + + + + + + + diff --git a/src/Elastic.Documentation.Search/IngestCollector.cs b/src/Elastic.Documentation.Search/IngestCollector.cs new file mode 100644 index 000000000..193412ca6 --- /dev/null +++ b/src/Elastic.Documentation.Search/IngestCollector.cs @@ -0,0 +1,61 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Buffers; +using System.Text.Json; +using Elastic.Ingest.Elasticsearch; +using Elastic.Ingest.Elasticsearch.Indices; +using Elastic.Transport; +using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging; + +namespace Elastic.Documentation.Search; + +public record DocumentationDocument +{ + public string? Title { get; set; } +} + +public class IngestCollector : IDisposable +{ + private readonly IndexChannel _channel; + private readonly ILogger _logger; + + public IngestCollector(ILoggerFactory logFactory, string url, string apiKey) + { + _logger = logFactory.CreateLogger(); + var uri = new Uri(url); + var moniker = $"{uri.Host}${Guid.NewGuid():N}"; + var base64 = Convert.ToBase64String(System.Text.Encoding.UTF8.GetBytes(moniker)); + var cloudId = $"name:{base64}"; + + var pool = new CloudNodePool(cloudId, new ApiKey(apiKey)); + var configuration = new TransportConfiguration(pool); + var transport = new DistributedTransport(configuration); + var options = new IndexChannelOptions(transport) + { + IndexFormat = "documentation", + ExportExceptionCallback = e => _logger.LogError(e, "Failed to export document"), + ServerRejectionCallback = items => _logger.LogInformation("Server rejection: {Rejection}", items.First().Item2) + }; + _channel = new IndexChannel(options); + } + + public async ValueTask TryWrite(DocumentationDocument document, CancellationToken ctx = default) + { + if (_channel.TryWrite(document)) + return true; + + if (await _channel.WaitToWriteAsync(ctx)) + return _channel.TryWrite(document); + return false; + } + + public void Dispose() + { + _channel.Complete(); + _channel.Dispose(); + GC.SuppressFinalize(this); + } +} diff --git a/src/Elastic.Markdown/DocumentationGenerator.cs b/src/Elastic.Markdown/DocumentationGenerator.cs index ccdfadd76..767f17005 100644 --- a/src/Elastic.Markdown/DocumentationGenerator.cs +++ b/src/Elastic.Markdown/DocumentationGenerator.cs @@ -40,6 +40,7 @@ public class DocumentationGenerator private readonly ILogger _logger; private readonly IFileSystem _writeFileSystem; private readonly IDocumentationFileExporter _documentationFileExporter; + private readonly IMarkdownExporter[] _markdownExporters; private HtmlWriter HtmlWriter { get; } public DocumentationSet DocumentationSet { get; } @@ -51,12 +52,14 @@ public DocumentationGenerator( ILoggerFactory logger, INavigationHtmlWriter? navigationHtmlWriter = null, IDocumentationFileOutputProvider? documentationFileOutputProvider = null, + IMarkdownExporter[]? markdownExporters = null, IDocumentationFileExporter? documentationExporter = null, IConversionCollector? conversionCollector = null, ILegacyUrlMapper? legacyUrlMapper = null, IPositionalNavigation? positionalNavigation = null ) { + _markdownExporters = markdownExporters ?? []; _documentationFileOutputProvider = documentationFileOutputProvider; _conversionCollector = conversionCollector; _writeFileSystem = docSet.Context.WriteFileSystem; @@ -219,6 +222,12 @@ private async Task ProcessFile(HashSet offendingFiles, DocumentationFile return; } + if (file is MarkdownFile markdown) + { + foreach (var exporter in _markdownExporters) + _ = await exporter.Export(markdown); + } + _logger.LogTrace("--> {FileFullPath}", file.SourceFile.FullName); //TODO send file to OutputFile() so we can validate its scope is defined in navigation.yml var outputFile = OutputFile(file.RelativePath); diff --git a/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs new file mode 100644 index 000000000..35a591508 --- /dev/null +++ b/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs @@ -0,0 +1,12 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Markdown.IO; + +namespace Elastic.Markdown.Exporters; + +public interface IMarkdownExporter +{ + ValueTask Export(MarkdownFile file); +} diff --git a/src/tooling/docs-assembler/Building/AssemblerBuilder.cs b/src/tooling/docs-assembler/Building/AssemblerBuilder.cs index 943107bab..9a547ac0d 100644 --- a/src/tooling/docs-assembler/Building/AssemblerBuilder.cs +++ b/src/tooling/docs-assembler/Building/AssemblerBuilder.cs @@ -3,10 +3,12 @@ // See the LICENSE file in the project root for more information using System.Collections.Frozen; +using Documentation.Assembler.Indexing; using Documentation.Assembler.Navigation; using Elastic.Documentation.Legacy; using Elastic.Documentation.Links; using Elastic.Markdown; +using Elastic.Markdown.Exporters; using Elastic.Markdown.Links.CrossLinks; using Microsoft.Extensions.Logging; @@ -92,12 +94,18 @@ string Resolve(string relativeMarkdownPath) private async Task BuildAsync(AssemblerDocumentationSet set, Cancel ctx) { + IMarkdownExporter[]? markdownExporters = + Environment.GetEnvironmentVariable("ELASTIC_API_KEY") is { } apiKey && + Environment.GetEnvironmentVariable("ELASTIC_URL") is { } url + ? [new ElasticsearchMarkdownExporter(logger, url, apiKey)] + : null; var generator = new DocumentationGenerator( set.DocumentationSet, logger, HtmlWriter, pathProvider, legacyUrlMapper: LegacyUrlMapper, - positionalNavigation: navigation + positionalNavigation: navigation, + markdownExporters: markdownExporters ); return await generator.GenerateAll(ctx); } diff --git a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs index 99a9bdbf8..f2d4fdf92 100644 --- a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs +++ b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs @@ -163,7 +163,7 @@ await Parallel.ForEachAsync(repositories, outputPath ); var set = new DocumentationSet(context, logger); - var generator = new DocumentationGenerator(set, logger, null, null, new NoopDocumentationFileExporter()); + var generator = new DocumentationGenerator(set, logger, null, null, null, new NoopDocumentationFileExporter()); _ = await generator.GenerateAll(c); IAmazonS3 s3Client = new AmazonS3Client(); diff --git a/src/tooling/docs-assembler/Indexing/ElasticsearchMarkdownExporter.cs b/src/tooling/docs-assembler/Indexing/ElasticsearchMarkdownExporter.cs new file mode 100644 index 000000000..4cf7ece09 --- /dev/null +++ b/src/tooling/docs-assembler/Indexing/ElasticsearchMarkdownExporter.cs @@ -0,0 +1,30 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Documentation.Search; +using Elastic.Markdown.Exporters; +using Elastic.Markdown.IO; +using Microsoft.Extensions.Logging; + +namespace Documentation.Assembler.Indexing; + +public class ElasticsearchMarkdownExporter(ILoggerFactory logFactory, string url, string apiKey) : IMarkdownExporter, IDisposable +{ + private readonly IngestCollector _ingestCollector = new(logFactory, url, apiKey); + + public void Dispose() + { + _ingestCollector.Dispose(); + GC.SuppressFinalize(this); + } + + public async ValueTask Export(MarkdownFile file) + { + var doc = new DocumentationDocument + { + Title = file.Title, + }; + return await _ingestCollector.TryWrite(doc); + } +} diff --git a/src/tooling/docs-assembler/docs-assembler.csproj b/src/tooling/docs-assembler/docs-assembler.csproj index cfed86523..2e7ae39b0 100644 --- a/src/tooling/docs-assembler/docs-assembler.csproj +++ b/src/tooling/docs-assembler/docs-assembler.csproj @@ -26,6 +26,7 @@ + diff --git a/src/tooling/docs-builder/Cli/Commands.cs b/src/tooling/docs-builder/Cli/Commands.cs index 5dacaec9d..f2fcbad1a 100644 --- a/src/tooling/docs-builder/Cli/Commands.cs +++ b/src/tooling/docs-builder/Cli/Commands.cs @@ -152,7 +152,7 @@ public async Task Generate( metadataOnly ??= metaValue; var exporter = metadataOnly.HasValue && metadataOnly.Value ? new NoopDocumentationFileExporter() : null; - var generator = new DocumentationGenerator(set, logger, null, null, exporter); + var generator = new DocumentationGenerator(set, logger, null, null, null, exporter); _ = await generator.GenerateAll(ctx); if (runningOnCi) diff --git a/tests/authoring/Framework/Setup.fs b/tests/authoring/Framework/Setup.fs index 66df0e955..95390500c 100644 --- a/tests/authoring/Framework/Setup.fs +++ b/tests/authoring/Framework/Setup.fs @@ -112,7 +112,7 @@ type Setup = let conversionCollector = TestConversionCollector() let linkResolver = TestCrossLinkResolver(context.Configuration) let set = DocumentationSet(context, logger, linkResolver); - let generator = DocumentationGenerator(set, logger, null, null, null, conversionCollector) + let generator = DocumentationGenerator(set, logger, null, null, null, null, conversionCollector) let context = { Collector = collector From 36c4df91221fb9a0a307f1caa90379fb0bc77408 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Wed, 21 May 2025 11:14:38 +0200 Subject: [PATCH 2/5] stage --- Directory.Packages.props | 2 +- docs-builder.sln | 7 --- .../IngestCollector.cs | 61 ------------------- .../Search/DocumentationDocument.cs | 11 ++++ .../Serialization/SourceGenerationContext.cs | 2 + .../DocumentationGenerator.cs | 2 +- .../Building/AssemblerBuilder.cs | 19 +++--- .../Indexing/ElasticsearchMarkdownExporter.cs | 51 ++++++++++++++-- .../docs-assembler/docs-assembler.csproj | 2 +- 9 files changed, 75 insertions(+), 82 deletions(-) delete mode 100644 src/Elastic.Documentation.Search/IngestCollector.cs create mode 100644 src/Elastic.Documentation/Search/DocumentationDocument.cs diff --git a/Directory.Packages.props b/Directory.Packages.props index ce195413b..c4d4e1699 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -16,7 +16,7 @@ - + diff --git a/docs-builder.sln b/docs-builder.sln index 5c24bfe79..3dff4f102 100644 --- a/docs-builder.sln +++ b/docs-builder.sln @@ -94,8 +94,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "assembler-filter", "assembl actions\assembler-filter\action.yml = actions\assembler-filter\action.yml EndProjectSection EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elastic.Documentation.Search", "src\Elastic.Documentation.Search\Elastic.Documentation.Search.csproj", "{052F70DE-CA5A-45F9-800B-E13CFEAE262C}" -EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -157,10 +155,6 @@ Global {CD94F9E4-7FCD-4152-81F1-4288C6B75367}.Debug|Any CPU.Build.0 = Debug|Any CPU {CD94F9E4-7FCD-4152-81F1-4288C6B75367}.Release|Any CPU.ActiveCfg = Release|Any CPU {CD94F9E4-7FCD-4152-81F1-4288C6B75367}.Release|Any CPU.Build.0 = Release|Any CPU - {052F70DE-CA5A-45F9-800B-E13CFEAE262C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {052F70DE-CA5A-45F9-800B-E13CFEAE262C}.Debug|Any CPU.Build.0 = Debug|Any CPU - {052F70DE-CA5A-45F9-800B-E13CFEAE262C}.Release|Any CPU.ActiveCfg = Release|Any CPU - {052F70DE-CA5A-45F9-800B-E13CFEAE262C}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(NestedProjects) = preSolution {4D198E25-C211-41DC-9E84-B15E89BD7048} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A} @@ -184,6 +178,5 @@ Global {059E787F-85C1-43BE-9DD6-CE319E106383} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A} {7D36DDDA-9E0B-4D2C-8033-5D62FF8B6166} = {059E787F-85C1-43BE-9DD6-CE319E106383} {FB1C1954-D8E2-4745-BA62-04DD82FB4792} = {245023D2-D3CA-47B9-831D-DAB91A2FFDC7} - {052F70DE-CA5A-45F9-800B-E13CFEAE262C} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A} EndGlobalSection EndGlobal diff --git a/src/Elastic.Documentation.Search/IngestCollector.cs b/src/Elastic.Documentation.Search/IngestCollector.cs deleted file mode 100644 index 193412ca6..000000000 --- a/src/Elastic.Documentation.Search/IngestCollector.cs +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using System.Buffers; -using System.Text.Json; -using Elastic.Ingest.Elasticsearch; -using Elastic.Ingest.Elasticsearch.Indices; -using Elastic.Transport; -using Microsoft.Extensions.Hosting; -using Microsoft.Extensions.Logging; - -namespace Elastic.Documentation.Search; - -public record DocumentationDocument -{ - public string? Title { get; set; } -} - -public class IngestCollector : IDisposable -{ - private readonly IndexChannel _channel; - private readonly ILogger _logger; - - public IngestCollector(ILoggerFactory logFactory, string url, string apiKey) - { - _logger = logFactory.CreateLogger(); - var uri = new Uri(url); - var moniker = $"{uri.Host}${Guid.NewGuid():N}"; - var base64 = Convert.ToBase64String(System.Text.Encoding.UTF8.GetBytes(moniker)); - var cloudId = $"name:{base64}"; - - var pool = new CloudNodePool(cloudId, new ApiKey(apiKey)); - var configuration = new TransportConfiguration(pool); - var transport = new DistributedTransport(configuration); - var options = new IndexChannelOptions(transport) - { - IndexFormat = "documentation", - ExportExceptionCallback = e => _logger.LogError(e, "Failed to export document"), - ServerRejectionCallback = items => _logger.LogInformation("Server rejection: {Rejection}", items.First().Item2) - }; - _channel = new IndexChannel(options); - } - - public async ValueTask TryWrite(DocumentationDocument document, CancellationToken ctx = default) - { - if (_channel.TryWrite(document)) - return true; - - if (await _channel.WaitToWriteAsync(ctx)) - return _channel.TryWrite(document); - return false; - } - - public void Dispose() - { - _channel.Complete(); - _channel.Dispose(); - GC.SuppressFinalize(this); - } -} diff --git a/src/Elastic.Documentation/Search/DocumentationDocument.cs b/src/Elastic.Documentation/Search/DocumentationDocument.cs new file mode 100644 index 000000000..f6b5a931c --- /dev/null +++ b/src/Elastic.Documentation/Search/DocumentationDocument.cs @@ -0,0 +1,11 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +namespace Elastic.Documentation.Search; + +public record DocumentationDocument +{ + public string? Title { get; set; } +} + diff --git a/src/Elastic.Documentation/Serialization/SourceGenerationContext.cs b/src/Elastic.Documentation/Serialization/SourceGenerationContext.cs index 53e6b94d0..48362ca70 100644 --- a/src/Elastic.Documentation/Serialization/SourceGenerationContext.cs +++ b/src/Elastic.Documentation/Serialization/SourceGenerationContext.cs @@ -4,6 +4,7 @@ using System.Text.Json.Serialization; using Elastic.Documentation.Links; +using Elastic.Documentation.Search; using Elastic.Documentation.State; namespace Elastic.Documentation.Serialization; @@ -16,4 +17,5 @@ namespace Elastic.Documentation.Serialization; [JsonSerializable(typeof(GitCheckoutInformation))] [JsonSerializable(typeof(LinkReferenceRegistry))] [JsonSerializable(typeof(LinkRegistryEntry))] +[JsonSerializable(typeof(DocumentationDocument))] public sealed partial class SourceGenerationContext : JsonSerializerContext; diff --git a/src/Elastic.Markdown/DocumentationGenerator.cs b/src/Elastic.Markdown/DocumentationGenerator.cs index 767f17005..e08ec8510 100644 --- a/src/Elastic.Markdown/DocumentationGenerator.cs +++ b/src/Elastic.Markdown/DocumentationGenerator.cs @@ -103,7 +103,7 @@ public async Task GenerateAll(Cancel ctx) var generationState = Context.SkipDocumentationState ? null : GetPreviousGenerationState(); - // clear output directory if force is true but never for assembler builds since these build multiple times to the output. + // clear the output directory if force is true but never for assembler builds since these build multiple times to the output. if (Context is { AssemblerBuild: false, Force: true } // clear the output directory if force is false but generation state is null, except for assembler builds. || (Context is { AssemblerBuild: false, Force: false } && generationState == null)) diff --git a/src/tooling/docs-assembler/Building/AssemblerBuilder.cs b/src/tooling/docs-assembler/Building/AssemblerBuilder.cs index 9a547ac0d..58cf41c91 100644 --- a/src/tooling/docs-assembler/Building/AssemblerBuilder.cs +++ b/src/tooling/docs-assembler/Building/AssemblerBuilder.cs @@ -35,6 +35,13 @@ public async Task BuildAllAsync(FrozenDictionary(); + var esExporter = + Environment.GetEnvironmentVariable("ELASTIC_API_KEY") is { } apiKey && + Environment.GetEnvironmentVariable("ELASTIC_URL") is { } url + ? new ElasticsearchMarkdownExporter(logger, url, apiKey) + : null; + IMarkdownExporter[] markdownExporters = esExporter is null ? [] : [esExporter]; + foreach (var (_, set) in assembleSets) { var checkout = set.Checkout; @@ -46,7 +53,7 @@ public async Task BuildAllAsync(FrozenDictionary BuildAsync(AssemblerDocumentationSet set, Cancel ctx) + private async Task BuildAsync(AssemblerDocumentationSet set, IMarkdownExporter[]? markdownExporters, Cancel ctx) { - IMarkdownExporter[]? markdownExporters = - Environment.GetEnvironmentVariable("ELASTIC_API_KEY") is { } apiKey && - Environment.GetEnvironmentVariable("ELASTIC_URL") is { } url - ? [new ElasticsearchMarkdownExporter(logger, url, apiKey)] - : null; var generator = new DocumentationGenerator( set.DocumentationSet, logger, HtmlWriter, diff --git a/src/tooling/docs-assembler/Indexing/ElasticsearchMarkdownExporter.cs b/src/tooling/docs-assembler/Indexing/ElasticsearchMarkdownExporter.cs index 4cf7ece09..d2f7ccf84 100644 --- a/src/tooling/docs-assembler/Indexing/ElasticsearchMarkdownExporter.cs +++ b/src/tooling/docs-assembler/Indexing/ElasticsearchMarkdownExporter.cs @@ -3,19 +3,62 @@ // See the LICENSE file in the project root for more information using Elastic.Documentation.Search; +using Elastic.Documentation.Serialization; +using Elastic.Ingest.Elasticsearch.Indices; using Elastic.Markdown.Exporters; using Elastic.Markdown.IO; +using Elastic.Transport; +using Elastic.Transport.Products.Elasticsearch; using Microsoft.Extensions.Logging; namespace Documentation.Assembler.Indexing; -public class ElasticsearchMarkdownExporter(ILoggerFactory logFactory, string url, string apiKey) : IMarkdownExporter, IDisposable +public class ElasticsearchMarkdownExporter : IMarkdownExporter, IDisposable { - private readonly IngestCollector _ingestCollector = new(logFactory, url, apiKey); + private readonly IndexChannel _channel; + private readonly ILogger _logger; + + public ElasticsearchMarkdownExporter(ILoggerFactory logFactory, string url, string apiKey) + { + _logger = logFactory.CreateLogger(); + var configuration = new ElasticsearchConfiguration(new Uri(url), new ApiKey(apiKey)) + { + //Uncomment to see the requests with Fiddler + //ProxyAddress = "http://localhost:8866" + }; + var transport = new DistributedTransport(configuration); + var options = new IndexChannelOptions(transport) + { + SerializerContext = SourceGenerationContext.Default, + IndexFormat = "documentation", + ExportExceptionCallback = e => _logger.LogError(e, "Failed to export document"), + ServerRejectionCallback = items => _logger.LogInformation("Server rejection: {Rejection}", items.First().Item2) + }; + _channel = new IndexChannel(options); + } + + public async Task WaitForDrain() + { + _logger.LogInformation("Elasticsearch export: waiting for in flight exports"); + var drained = await _channel.WaitForDrainAsync(); + if (!drained) + _logger.LogError("Elasticsearch export: failed to complete indexing in a timely fashion while shutting down"); + } + + private async ValueTask TryWrite(DocumentationDocument document, Cancel ctx = default) + { + if (_channel.TryWrite(document)) + return true; + + if (await _channel.WaitToWriteAsync(ctx)) + return _channel.TryWrite(document); + return false; + } public void Dispose() { - _ingestCollector.Dispose(); + _channel.Complete(); + _channel.Dispose(); GC.SuppressFinalize(this); } @@ -25,6 +68,6 @@ public async ValueTask Export(MarkdownFile file) { Title = file.Title, }; - return await _ingestCollector.TryWrite(doc); + return await TryWrite(doc); } } diff --git a/src/tooling/docs-assembler/docs-assembler.csproj b/src/tooling/docs-assembler/docs-assembler.csproj index 2e7ae39b0..12d1823ae 100644 --- a/src/tooling/docs-assembler/docs-assembler.csproj +++ b/src/tooling/docs-assembler/docs-assembler.csproj @@ -19,6 +19,7 @@ + @@ -26,7 +27,6 @@ - From 6e1a7bed56f0541ea2440d29e07a16cfddb09d30 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Tue, 27 May 2025 22:10:08 +0200 Subject: [PATCH 3/5] Add exporter options to assembler command line --- Directory.Packages.props | 2 +- .../Diagnostics/IDiagnosticsCollector.cs | 12 ++ .../Search/DocumentationDocument.cs | 18 +++ .../DocumentationGenerator.cs | 30 ++-- .../Exporters/DocumentationFileExporter.cs | 47 +++--- .../Exporters/IMarkdownExporter.cs | 12 +- .../NoopDocumentationFileExporter.cs | 7 +- .../RuleDocumentationFileExporter.cs | 26 ++-- src/Elastic.Markdown/IO/MarkdownFile.cs | 12 ++ src/Elastic.Markdown/Myst/MarkdownParser.cs | 20 +-- src/Elastic.Markdown/Slices/HtmlWriter.cs | 4 +- .../Console/ErrataFileSourceRepository.cs | 4 + .../Building/AssemblerBuilder.cs | 35 +++-- .../docs-assembler/Cli/RepositoryCommands.cs | 34 ++++- .../ElasticsearchMarkdownExporter.cs | 135 ++++++++++++++++++ .../Exporters/LLMTextExporter.cs | 21 +++ .../Indexing/ElasticsearchMarkdownExporter.cs | 73 ---------- 17 files changed, 344 insertions(+), 148 deletions(-) create mode 100644 src/tooling/docs-assembler/Exporters/ElasticsearchMarkdownExporter.cs create mode 100644 src/tooling/docs-assembler/Exporters/LLMTextExporter.cs delete mode 100644 src/tooling/docs-assembler/Indexing/ElasticsearchMarkdownExporter.cs diff --git a/Directory.Packages.props b/Directory.Packages.props index c4d4e1699..b47004d73 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -16,7 +16,7 @@ - + diff --git a/src/Elastic.Documentation/Diagnostics/IDiagnosticsCollector.cs b/src/Elastic.Documentation/Diagnostics/IDiagnosticsCollector.cs index fc98220dd..146ce2874 100644 --- a/src/Elastic.Documentation/Diagnostics/IDiagnosticsCollector.cs +++ b/src/Elastic.Documentation/Diagnostics/IDiagnosticsCollector.cs @@ -35,6 +35,18 @@ public static void EmitWarning(this IDiagnosticsCollector collector, IFileInfo f public static void EmitHint(this IDiagnosticsCollector collector, IFileInfo file, string message) => collector.EmitHint(file.FullName, message); + + /// Emit an error not associated with a file + public static void EmitGlobalError(this IDiagnosticsCollector collector, string message, Exception? e = null) => + collector.EmitError(string.Empty, message, e); + + /// Emit a warning not associated with a file + public static void EmitGlobalWarning(this IDiagnosticsCollector collector, string message) => + collector.EmitWarning(string.Empty, message); + + /// Emit a hint not associated with a file + public static void EmitGlobalHint(this IDiagnosticsCollector collector, string message) => + collector.EmitHint(string.Empty, message); } diff --git a/src/Elastic.Documentation/Search/DocumentationDocument.cs b/src/Elastic.Documentation/Search/DocumentationDocument.cs index f6b5a931c..0ddd6da19 100644 --- a/src/Elastic.Documentation/Search/DocumentationDocument.cs +++ b/src/Elastic.Documentation/Search/DocumentationDocument.cs @@ -2,10 +2,28 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information +using System.Text.Json.Serialization; + namespace Elastic.Documentation.Search; public record DocumentationDocument { + [JsonPropertyName("title")] public string? Title { get; set; } + + [JsonPropertyName("body")] + public string? Body { get; set; } + + [JsonPropertyName("abstract")] + public string? Abstract { get; set; } + + [JsonPropertyName("headings")] + public string[] Headings { get; set; } = []; + + [JsonPropertyName("links")] + public string[] Links { get; set; } = []; + + [JsonPropertyName("url")] + public string? Url { get; set; } } diff --git a/src/Elastic.Markdown/DocumentationGenerator.cs b/src/Elastic.Markdown/DocumentationGenerator.cs index 24fb2635d..70596341b 100644 --- a/src/Elastic.Markdown/DocumentationGenerator.cs +++ b/src/Elastic.Markdown/DocumentationGenerator.cs @@ -18,6 +18,7 @@ namespace Elastic.Markdown; +/// Used primarily for testing, do not use in production paths since it might keep references alive to long public interface IConversionCollector { void Collect(MarkdownFile file, MarkdownDocument document, string html); @@ -212,7 +213,7 @@ private async Task ExtractEmbeddedStaticResources(Cancel ctx) } } - private async Task ProcessFile(HashSet offendingFiles, DocumentationFile file, DateTimeOffset outputSeenChanges, Cancel token) + private async Task ProcessFile(HashSet offendingFiles, DocumentationFile file, DateTimeOffset outputSeenChanges, Cancel ctx) { if (!Context.Force) { @@ -222,17 +223,28 @@ private async Task ProcessFile(HashSet offendingFiles, DocumentationFile return; } - if (file is MarkdownFile markdown) - { - foreach (var exporter in _markdownExporters) - _ = await exporter.Export(markdown); - } - _logger.LogTrace("--> {FileFullPath}", file.SourceFile.FullName); - //TODO send file to OutputFile() so we can validate its scope is defined in navigation.yml var outputFile = OutputFile(file.RelativePath); if (outputFile is not null) - await _documentationFileExporter.ProcessFile(Context, file, outputFile, HtmlWriter, _conversionCollector, token); + { + var context = new ProcessingFileContext + { + BuildContext = Context, + OutputFile = outputFile, + ConversionCollector = _conversionCollector, + File = file, + HtmlWriter = HtmlWriter + }; + await _documentationFileExporter.ProcessFile(context, ctx); + if (file is MarkdownFile markdown) + { + foreach (var exporter in _markdownExporters) + { + var document = context.MarkdownDocument ??= await markdown.ParseFullAsync(ctx); + _ = await exporter.ExportAsync(new MarkdownExportContext { Document = document, File = markdown }, ctx); + } + } + } } private IFileInfo? OutputFile(string relativePath) diff --git a/src/Elastic.Markdown/Exporters/DocumentationFileExporter.cs b/src/Elastic.Markdown/Exporters/DocumentationFileExporter.cs index beb73fd53..e85cf8cf5 100644 --- a/src/Elastic.Markdown/Exporters/DocumentationFileExporter.cs +++ b/src/Elastic.Markdown/Exporters/DocumentationFileExporter.cs @@ -5,16 +5,27 @@ using System.IO.Abstractions; using Elastic.Markdown.IO; using Elastic.Markdown.Slices; +using Markdig.Syntax; namespace Elastic.Markdown.Exporters; +public class ProcessingFileContext +{ + public required BuildContext BuildContext { get; init; } + public required DocumentationFile File { get; init; } + public required IFileInfo OutputFile { get; init; } + public required HtmlWriter HtmlWriter { get; init; } + public required IConversionCollector? ConversionCollector { get; init; } + + public MarkdownDocument? MarkdownDocument { get; set; } +} + public interface IDocumentationFileExporter { - /// Used in documentation state to ensure we break the build cache if a different exporter is chosen + /// Used in the documentation state to ensure we break the build cache if a different exporter is chosen string Name { get; } - Task ProcessFile(BuildContext context, DocumentationFile file, IFileInfo outputFile, HtmlWriter htmlWriter, IConversionCollector? conversionCollector, - Cancel token); + ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx); Task CopyEmbeddedResource(IFileInfo outputFile, Stream resourceStream, Cancel ctx); } @@ -23,16 +34,14 @@ public abstract class DocumentationFileExporterBase(IFileSystem readFileSystem, { public abstract string Name { get; } - public abstract Task ProcessFile(BuildContext context, DocumentationFile file, IFileInfo outputFile, HtmlWriter htmlWriter, - IConversionCollector? conversionCollector, - Cancel token); + public abstract ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx); protected async Task CopyFileFsAware(DocumentationFile file, IFileInfo outputFile, Cancel ctx) { // fast path, normal case. if (readFileSystem == writeFileSystem) readFileSystem.File.Copy(file.SourceFile.FullName, outputFile.FullName, true); - //slower when we are mocking the write filesystem + //slower when we are mocking the write-filesystem else { var bytes = await file.SourceFile.FileSystem.File.ReadAllBytesAsync(file.SourceFile.FullName, ctx); @@ -49,26 +58,20 @@ public async Task CopyEmbeddedResource(IFileInfo outputFile, Stream resourceStre } } -public class DocumentationFileExporter( - IFileSystem readFileSystem, - IFileSystem writeFileSystem -) : DocumentationFileExporterBase(readFileSystem, writeFileSystem) +public class DocumentationFileExporter(IFileSystem readFileSystem, IFileSystem writeFileSystem) + : DocumentationFileExporterBase(readFileSystem, writeFileSystem) { - public override string Name { get; } = nameof(DocumentationFileExporter); + public override string Name => nameof(DocumentationFileExporter); - public override async Task ProcessFile(BuildContext context, DocumentationFile file, - IFileInfo outputFile, - HtmlWriter htmlWriter, - IConversionCollector? conversionCollector, - Cancel token) + public override async ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx) { - if (file is MarkdownFile markdown) - await htmlWriter.WriteAsync(outputFile, markdown, conversionCollector, token); + if (context.File is MarkdownFile markdown) + context.MarkdownDocument = await context.HtmlWriter.WriteAsync(context.OutputFile, markdown, context.ConversionCollector, ctx); else { - if (outputFile.Directory is { Exists: false }) - outputFile.Directory.Create(); - await CopyFileFsAware(file, outputFile, token); + if (context.OutputFile.Directory is { Exists: false }) + context.OutputFile.Directory.Create(); + await CopyFileFsAware(context.File, context.OutputFile, ctx); } } } diff --git a/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs index 35a591508..b96704c64 100644 --- a/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs +++ b/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs @@ -3,10 +3,20 @@ // See the LICENSE file in the project root for more information using Elastic.Markdown.IO; +using Markdig.Syntax; namespace Elastic.Markdown.Exporters; +public class MarkdownExportContext +{ + public required MarkdownDocument Document { get; init; } + public required MarkdownFile File { get; init; } + public string? LLMText { get; set; } +} + public interface IMarkdownExporter { - ValueTask Export(MarkdownFile file); + ValueTask StartAsync(Cancel ctx = default); + ValueTask StopAsync(Cancel ctx = default); + ValueTask ExportAsync(MarkdownExportContext context, Cancel ctx); } diff --git a/src/Elastic.Markdown/Exporters/NoopDocumentationFileExporter.cs b/src/Elastic.Markdown/Exporters/NoopDocumentationFileExporter.cs index 0185a0b44..391e88c6f 100644 --- a/src/Elastic.Markdown/Exporters/NoopDocumentationFileExporter.cs +++ b/src/Elastic.Markdown/Exporters/NoopDocumentationFileExporter.cs @@ -3,8 +3,6 @@ // See the LICENSE file in the project root for more information using System.IO.Abstractions; -using Elastic.Markdown.IO; -using Elastic.Markdown.Slices; namespace Elastic.Markdown.Exporters; @@ -12,9 +10,8 @@ public class NoopDocumentationFileExporter : IDocumentationFileExporter { public string Name { get; } = nameof(NoopDocumentationFileExporter); - public Task ProcessFile(BuildContext context, DocumentationFile file, IFileInfo outputFile, HtmlWriter htmlWriter, - IConversionCollector? conversionCollector, Cancel token) => - Task.CompletedTask; + public ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx) => + ValueTask.CompletedTask; public Task CopyEmbeddedResource(IFileInfo outputFile, Stream resourceStream, Cancel ctx) => Task.CompletedTask; } diff --git a/src/Elastic.Markdown/Extensions/DetectionRules/RuleDocumentationFileExporter.cs b/src/Elastic.Markdown/Extensions/DetectionRules/RuleDocumentationFileExporter.cs index f557297e9..66e9a04ca 100644 --- a/src/Elastic.Markdown/Extensions/DetectionRules/RuleDocumentationFileExporter.cs +++ b/src/Elastic.Markdown/Extensions/DetectionRules/RuleDocumentationFileExporter.cs @@ -14,18 +14,24 @@ public class RuleDocumentationFileExporter(IFileSystem readFileSystem, IFileSyst { public override string Name { get; } = nameof(RuleDocumentationFileExporter); - public override async Task ProcessFile(BuildContext context, DocumentationFile file, IFileInfo outputFile, HtmlWriter htmlWriter, - IConversionCollector? conversionCollector, Cancel token) + public override async ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx) { - if (file is DetectionRuleFile df) - await htmlWriter.WriteAsync(DetectionRuleFile.OutputPath(outputFile, context), df, conversionCollector, token); - else if (file is MarkdownFile markdown) - await htmlWriter.WriteAsync(outputFile, markdown, conversionCollector, token); - else + var htmlWriter = context.HtmlWriter; + var outputFile = context.OutputFile; + var conversionCollector = context.ConversionCollector; + switch (context.File) { - if (outputFile.Directory is { Exists: false }) - outputFile.Directory.Create(); - await CopyFileFsAware(file, outputFile, token); + case DetectionRuleFile df: + context.MarkdownDocument = await htmlWriter.WriteAsync(DetectionRuleFile.OutputPath(outputFile, context.BuildContext), df, conversionCollector, ctx); + break; + case MarkdownFile markdown: + context.MarkdownDocument = await htmlWriter.WriteAsync(outputFile, markdown, conversionCollector, ctx); + break; + default: + if (outputFile.Directory is { Exists: false }) + outputFile.Directory.Create(); + await CopyFileFsAware(context.File, outputFile, ctx); + break; } } } diff --git a/src/Elastic.Markdown/IO/MarkdownFile.cs b/src/Elastic.Markdown/IO/MarkdownFile.cs index d6823cf06..3c533824e 100644 --- a/src/Elastic.Markdown/IO/MarkdownFile.cs +++ b/src/Elastic.Markdown/IO/MarkdownFile.cs @@ -17,6 +17,7 @@ using Elastic.Markdown.Slices; using Markdig; using Markdig.Extensions.Yaml; +using Markdig.Renderers.Roundtrip; using Markdig.Syntax; namespace Elastic.Markdown.IO; @@ -186,6 +187,17 @@ public async Task ParseFullAsync(Cancel ctx) return document; } + public static string ToLLMText(MarkdownDocument document) + { + using var sw = new StringWriter(); + var rr = new RoundtripRenderer(sw); + rr.Write(document); + var outputMarkdown = sw.ToString(); + + return outputMarkdown; + + } + private IReadOnlyDictionary GetSubstitutions() { var globalSubstitutions = _globalSubstitutions; diff --git a/src/Elastic.Markdown/Myst/MarkdownParser.cs b/src/Elastic.Markdown/Myst/MarkdownParser.cs index 1feafcbf8..ad7fa8a48 100644 --- a/src/Elastic.Markdown/Myst/MarkdownParser.cs +++ b/src/Elastic.Markdown/Myst/MarkdownParser.cs @@ -31,21 +31,11 @@ public class MarkdownParser(BuildContext build, IParserResolvers resolvers) private BuildContext Build { get; } = build; private IParserResolvers Resolvers { get; } = resolvers; - public Task MinimalParseAsync(IFileInfo path, Cancel ctx) - { - var state = new ParserState(Build) - { - MarkdownSourcePath = path, - YamlFrontMatter = null, - DocumentationFileLookup = Resolvers.DocumentationFileLookup, - CrossLinkResolver = Resolvers.CrossLinkResolver, - SkipValidation = true - }; - var context = new ParserContext(state); - return ParseAsync(path, context, MinimalPipeline, ctx); - } + public Task ParseAsync(IFileInfo path, YamlFrontMatter? matter, Cancel ctx) => ParseFromFile(path, matter, Pipeline, ctx); - public Task ParseAsync(IFileInfo path, YamlFrontMatter? matter, Cancel ctx) + public Task MinimalParseAsync(IFileInfo path, Cancel ctx) => ParseFromFile(path, null, MinimalPipeline, ctx); + + private Task ParseFromFile(IFileInfo path, YamlFrontMatter? matter, MarkdownPipeline pipeline, Cancel ctx) { var state = new ParserState(Build) { @@ -55,7 +45,7 @@ public Task ParseAsync(IFileInfo path, YamlFrontMatter? matter CrossLinkResolver = Resolvers.CrossLinkResolver }; var context = new ParserContext(state); - return ParseAsync(path, context, Pipeline, ctx); + return ParseAsync(path, context, pipeline, ctx); } public Task ParseSnippetAsync(IFileInfo path, IFileInfo parentPath, YamlFrontMatter? matter, Cancel ctx) diff --git a/src/Elastic.Markdown/Slices/HtmlWriter.cs b/src/Elastic.Markdown/Slices/HtmlWriter.cs index 4e85fb1bc..3cd927b01 100644 --- a/src/Elastic.Markdown/Slices/HtmlWriter.cs +++ b/src/Elastic.Markdown/Slices/HtmlWriter.cs @@ -161,7 +161,7 @@ private async Task RenderLayout(MarkdownFile markdown, MarkdownDocument return await slice.RenderAsync(cancellationToken: ctx); } - public async Task WriteAsync(IFileInfo outputFile, MarkdownFile markdown, IConversionCollector? collector, Cancel ctx = default) + public async Task WriteAsync(IFileInfo outputFile, MarkdownFile markdown, IConversionCollector? collector, Cancel ctx = default) { if (outputFile.Directory is { Exists: false }) outputFile.Directory.Create(); @@ -184,8 +184,10 @@ public async Task WriteAsync(IFileInfo outputFile, MarkdownFile markdown, IConve } var document = await markdown.ParseFullAsync(ctx); + var rendered = await RenderLayout(markdown, document, ctx); collector?.Collect(markdown, document, rendered); await writeFileSystem.File.WriteAllTextAsync(path, rendered, ctx); + return document; } } diff --git a/src/tooling/Elastic.Documentation.Tooling/Diagnostics/Console/ErrataFileSourceRepository.cs b/src/tooling/Elastic.Documentation.Tooling/Diagnostics/Console/ErrataFileSourceRepository.cs index 1ceca1093..7628b8da4 100644 --- a/src/tooling/Elastic.Documentation.Tooling/Diagnostics/Console/ErrataFileSourceRepository.cs +++ b/src/tooling/Elastic.Documentation.Tooling/Diagnostics/Console/ErrataFileSourceRepository.cs @@ -18,6 +18,10 @@ public class ErrataFileSourceRepository : ISourceRepository [SuppressMessage("Reliability", "CA2012:Use ValueTasks correctly")] public bool TryGet(string id, [NotNullWhen(true)] out Source? source) { + source = new Source(id, string.Empty); + if (id == string.Empty) + return true; + using var reader = new Utf8StreamReader(id); var text = Encoding.UTF8.GetString(reader.ReadToEndAsync().GetAwaiter().GetResult()); source = new Source(id, text); diff --git a/src/tooling/docs-assembler/Building/AssemblerBuilder.cs b/src/tooling/docs-assembler/Building/AssemblerBuilder.cs index 58cf41c91..410a88eaf 100644 --- a/src/tooling/docs-assembler/Building/AssemblerBuilder.cs +++ b/src/tooling/docs-assembler/Building/AssemblerBuilder.cs @@ -3,7 +3,7 @@ // See the LICENSE file in the project root for more information using System.Collections.Frozen; -using Documentation.Assembler.Indexing; +using Documentation.Assembler.Exporters; using Documentation.Assembler.Navigation; using Elastic.Documentation.Legacy; using Elastic.Documentation.Links; @@ -14,6 +14,13 @@ namespace Documentation.Assembler.Building; +public enum ExportOption +{ + Html = 0, + LLMText = 1, + Elasticsearch = 2 +} + public class AssemblerBuilder( ILoggerFactory logger, AssembleContext context, @@ -27,7 +34,7 @@ public class AssemblerBuilder( private ILegacyUrlMapper? LegacyUrlMapper { get; } = legacyUrlMapper; - public async Task BuildAllAsync(FrozenDictionary assembleSets, Cancel ctx) + public async Task BuildAllAsync(FrozenDictionary assembleSets, IReadOnlySet exportOptions, Cancel ctx) { if (context.OutputDirectory.Exists) context.OutputDirectory.Delete(true); @@ -38,9 +45,18 @@ public async Task BuildAllAsync(FrozenDictionary(3); + if (exportOptions.Contains(ExportOption.LLMText)) + markdownExporters.Add(new LLMTextExporter()); + if (exportOptions.Contains(ExportOption.Elasticsearch) && esExporter is { }) + markdownExporters.Add(esExporter); + var noopBuild = !exportOptions.Contains(ExportOption.Html); + + var tasks = markdownExporters.Select(async e => await e.StartAsync(ctx)); + await Task.WhenAll(tasks); foreach (var (_, set) in assembleSets) { @@ -53,7 +69,7 @@ public async Task BuildAllAsync(FrozenDictionary await e.StopAsync(ctx)); + await Task.WhenAll(tasks); } private static void CollectRedirects( @@ -102,7 +116,7 @@ string Resolve(string relativeMarkdownPath) } } - private async Task BuildAsync(AssemblerDocumentationSet set, IMarkdownExporter[]? markdownExporters, Cancel ctx) + private async Task BuildAsync(AssemblerDocumentationSet set, bool noop, IMarkdownExporter[]? markdownExporters, Cancel ctx) { var generator = new DocumentationGenerator( set.DocumentationSet, @@ -110,6 +124,7 @@ private async Task BuildAsync(AssemblerDocumentationSet set, I pathProvider, legacyUrlMapper: LegacyUrlMapper, positionalNavigation: navigation, + documentationExporter: noop ? new NoopDocumentationFileExporter() : null, markdownExporters: markdownExporters ); return await generator.GenerateAll(ctx); diff --git a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs index f2d4fdf92..ed0cd3730 100644 --- a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs +++ b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs @@ -21,6 +21,7 @@ using Elastic.Markdown.Exporters; using Elastic.Markdown.IO; using Microsoft.Extensions.Logging; +using YamlDotNet.Core; namespace Documentation.Assembler.Cli; @@ -69,6 +70,7 @@ public async Task CloneAll( /// Treat warnings as errors and fail the build on warnings /// Allow indexing and following of html files /// The environment to build + /// configure exporters explicitly available (html,llmtext,es), defaults to html /// [Command("build-all")] public async Task BuildAll( @@ -76,8 +78,11 @@ public async Task BuildAll( bool? strict = null, bool? allowIndexing = null, string? environment = null, + [ExporterParser] IReadOnlySet? exporters = null, Cancel ctx = default) { + exporters ??= new HashSet([ExportOption.Html]); + AssignOutputLogger(); var githubEnvironmentInput = githubActionsService.GetInput("environment"); environment ??= !string.IsNullOrEmpty(githubEnvironmentInput) ? githubEnvironmentInput : "dev"; @@ -116,7 +121,7 @@ public async Task BuildAll( var historyMapper = new PageLegacyUrlMapper(assembleSources.HistoryMappings); var builder = new AssemblerBuilder(logger, assembleContext, navigation, htmlWriter, pathProvider, historyMapper); - await builder.BuildAllAsync(assembleSources.AssembleSets, ctx); + await builder.BuildAllAsync(assembleSources.AssembleSets, exporters, ctx); var sitemapBuilder = new SitemapBuilder(navigation.NavigationItems, assembleContext.WriteFileSystem, assembleContext.OutputDirectory); sitemapBuilder.Generate(); @@ -193,3 +198,30 @@ await Parallel.ForEachAsync(repositories, return collector.Errors > 0 ? 1 : 0; } } + +[AttributeUsage(AttributeTargets.Parameter)] +public class ExporterParserAttribute : Attribute, IArgumentParser> +{ + public static bool TryParse(ReadOnlySpan s, out IReadOnlySet result) + { + result = new HashSet([ExportOption.Html]); + var set = new HashSet(); + var ranges = s.Split(','); + foreach (var range in ranges) + { + ExportOption? export = s[range].Trim().ToString().ToLowerInvariant() switch + { + "llm" => ExportOption.LLMText, + "llmtext" => ExportOption.LLMText, + "es" => ExportOption.Elasticsearch, + "elasticsearch" => ExportOption.Elasticsearch, + "html" => ExportOption.Html, + _ => null + }; + if (export.HasValue) + _ = set.Add(export.Value); + } + result = set; + return true; + } +} diff --git a/src/tooling/docs-assembler/Exporters/ElasticsearchMarkdownExporter.cs b/src/tooling/docs-assembler/Exporters/ElasticsearchMarkdownExporter.cs new file mode 100644 index 000000000..8221ea28c --- /dev/null +++ b/src/tooling/docs-assembler/Exporters/ElasticsearchMarkdownExporter.cs @@ -0,0 +1,135 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Documentation.Diagnostics; +using Elastic.Documentation.Search; +using Elastic.Documentation.Serialization; +using Elastic.Ingest.Elasticsearch; +using Elastic.Ingest.Elasticsearch.Semantic; +using Elastic.Markdown.Exporters; +using Elastic.Markdown.IO; +using Elastic.Transport; +using Elastic.Transport.Products.Elasticsearch; +using Microsoft.Extensions.Logging; + +namespace Documentation.Assembler.Exporters; + +public class ElasticsearchMarkdownExporter : IMarkdownExporter, IDisposable +{ + private readonly DiagnosticsCollector _collector; + private readonly SemanticIndexChannel _channel; + private readonly ILogger _logger; + + public ElasticsearchMarkdownExporter(ILoggerFactory logFactory, DiagnosticsCollector collector, string url, string apiKey) + { + _collector = collector; + _logger = logFactory.CreateLogger(); + var configuration = new ElasticsearchConfiguration(new Uri(url), new ApiKey(apiKey)) + { + //Uncomment to see the requests with Fiddler + ProxyAddress = "http://localhost:8866" + }; + var transport = new DistributedTransport(configuration); + //The max num threads per allocated node, from testing its best to limit our max concurrency + //producing to this number as well + var indexNumThreads = 8; + var options = new SemanticIndexChannelOptions(transport) + { + BufferOptions = + { + OutboundBufferMaxSize = 100, + ExportMaxConcurrency = indexNumThreads, + ExportMaxRetries = 3 + }, + SerializerContext = SourceGenerationContext.Default, + IndexFormat = "documentation-{0:yyyy.MM.dd.HHmmss}", + IndexNumThreads = indexNumThreads, + ActiveSearchAlias = "documentation", + ExportExceptionCallback = e => _logger.LogError(e, "Failed to export document"), + ServerRejectionCallback = items => _logger.LogInformation("Server rejection: {Rejection}", items.First().Item2), + GetMapping = (inferenceId, _) => // language=json + $$""" + { + "properties": { + "title": { "type": "text" }, + "body": { + "type": "text" + }, + "abstract": { + "type": "semantic_text", + "inference_id": "{{inferenceId}}" + } + } + } + """ + }; + _channel = new SemanticIndexChannel(options); + } + + public async ValueTask StartAsync(Cancel ctx = default) + { + _logger.LogInformation($"Bootstrapping {nameof(SemanticIndexChannel)} Elasticsearch target for indexing"); + _ = await _channel.BootstrapElasticsearchAsync(BootstrapMethod.Failure, null, ctx); + } + + public async ValueTask StopAsync(Cancel ctx = default) + { + _logger.LogInformation("Waiting to drain all inflight exports to Elasticsearch"); + var drained = await _channel.WaitForDrainAsync(null, ctx); + if (!drained) + _collector.EmitGlobalError("Elasticsearch export: failed to complete indexing in a timely fashion while shutting down"); + + _logger.LogInformation("Refreshing target index {Index}", _channel.IndexName); + var refreshed = await _channel.RefreshAsync(ctx); + if (!refreshed) + _logger.LogError("Refreshing target index {Index} did not complete successfully", _channel.IndexName); + + _logger.LogInformation("Applying aliases to {Index}", _channel.IndexName); + var swapped = await _channel.ApplyAliasesAsync(ctx); + if (!swapped) + _collector.EmitGlobalError($"{nameof(ElasticsearchMarkdownExporter)} failed to apply aliases to index {_channel.IndexName}"); + } + + public void Dispose() + { + _channel.Complete(); + _channel.Dispose(); + GC.SuppressFinalize(this); + } + + private async ValueTask TryWrite(DocumentationDocument document, Cancel ctx = default) + { + if (_channel.TryWrite(document)) + return true; + + if (await _channel.WaitToWriteAsync(ctx)) + return _channel.TryWrite(document); + return false; + } + + public async ValueTask ExportAsync(MarkdownExportContext context, Cancel ctx) + { + var file = context.File; + var document = context.Document; + if (file.FileName.EndsWith(".toml", StringComparison.OrdinalIgnoreCase)) + return true; + + var url = file.Url; + // integrations are too big, we need to sanitize the fieldsets and example docs out of these. + if (url.Contains("/reference/integrations")) + return true; + + var body = context.LLMText ??= MarkdownFile.ToLLMText(document); + var doc = new DocumentationDocument + { + Title = file.Title, + //Body = body, + Abstract = !string.IsNullOrEmpty(body) + ? body[..Math.Min(body.Length, 400)] + : string.Empty, + Url = url + }; + return await TryWrite(doc, ctx); + } +} diff --git a/src/tooling/docs-assembler/Exporters/LLMTextExporter.cs b/src/tooling/docs-assembler/Exporters/LLMTextExporter.cs new file mode 100644 index 000000000..f91011d98 --- /dev/null +++ b/src/tooling/docs-assembler/Exporters/LLMTextExporter.cs @@ -0,0 +1,21 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Markdown.Exporters; +using Elastic.Markdown.IO; + +namespace Documentation.Assembler.Exporters; + +public class LLMTextExporter : IMarkdownExporter +{ + public ValueTask StartAsync(CancellationToken ctx = default) => ValueTask.CompletedTask; + + public ValueTask StopAsync(CancellationToken ctx = default) => ValueTask.CompletedTask; + + public ValueTask ExportAsync(MarkdownExportContext context, CancellationToken ctx) + { + var llmText = context.LLMText ??= MarkdownFile.ToLLMText(context.Document); + return ValueTask.FromResult(true); + } +} diff --git a/src/tooling/docs-assembler/Indexing/ElasticsearchMarkdownExporter.cs b/src/tooling/docs-assembler/Indexing/ElasticsearchMarkdownExporter.cs deleted file mode 100644 index d2f7ccf84..000000000 --- a/src/tooling/docs-assembler/Indexing/ElasticsearchMarkdownExporter.cs +++ /dev/null @@ -1,73 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using Elastic.Documentation.Search; -using Elastic.Documentation.Serialization; -using Elastic.Ingest.Elasticsearch.Indices; -using Elastic.Markdown.Exporters; -using Elastic.Markdown.IO; -using Elastic.Transport; -using Elastic.Transport.Products.Elasticsearch; -using Microsoft.Extensions.Logging; - -namespace Documentation.Assembler.Indexing; - -public class ElasticsearchMarkdownExporter : IMarkdownExporter, IDisposable -{ - private readonly IndexChannel _channel; - private readonly ILogger _logger; - - public ElasticsearchMarkdownExporter(ILoggerFactory logFactory, string url, string apiKey) - { - _logger = logFactory.CreateLogger(); - var configuration = new ElasticsearchConfiguration(new Uri(url), new ApiKey(apiKey)) - { - //Uncomment to see the requests with Fiddler - //ProxyAddress = "http://localhost:8866" - }; - var transport = new DistributedTransport(configuration); - var options = new IndexChannelOptions(transport) - { - SerializerContext = SourceGenerationContext.Default, - IndexFormat = "documentation", - ExportExceptionCallback = e => _logger.LogError(e, "Failed to export document"), - ServerRejectionCallback = items => _logger.LogInformation("Server rejection: {Rejection}", items.First().Item2) - }; - _channel = new IndexChannel(options); - } - - public async Task WaitForDrain() - { - _logger.LogInformation("Elasticsearch export: waiting for in flight exports"); - var drained = await _channel.WaitForDrainAsync(); - if (!drained) - _logger.LogError("Elasticsearch export: failed to complete indexing in a timely fashion while shutting down"); - } - - private async ValueTask TryWrite(DocumentationDocument document, Cancel ctx = default) - { - if (_channel.TryWrite(document)) - return true; - - if (await _channel.WaitToWriteAsync(ctx)) - return _channel.TryWrite(document); - return false; - } - - public void Dispose() - { - _channel.Complete(); - _channel.Dispose(); - GC.SuppressFinalize(this); - } - - public async ValueTask Export(MarkdownFile file) - { - var doc = new DocumentationDocument - { - Title = file.Title, - }; - return await TryWrite(doc); - } -} From 2b2e7829830e9e92a435ace6e23cbea5c949a1e7 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Tue, 27 May 2025 22:29:52 +0200 Subject: [PATCH 4/5] ensure minimal parse skips validation again --- src/Elastic.Markdown/Myst/MarkdownParser.cs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/Elastic.Markdown/Myst/MarkdownParser.cs b/src/Elastic.Markdown/Myst/MarkdownParser.cs index ad7fa8a48..8fb336545 100644 --- a/src/Elastic.Markdown/Myst/MarkdownParser.cs +++ b/src/Elastic.Markdown/Myst/MarkdownParser.cs @@ -31,18 +31,23 @@ public class MarkdownParser(BuildContext build, IParserResolvers resolvers) private BuildContext Build { get; } = build; private IParserResolvers Resolvers { get; } = resolvers; - public Task ParseAsync(IFileInfo path, YamlFrontMatter? matter, Cancel ctx) => ParseFromFile(path, matter, Pipeline, ctx); + public Task ParseAsync(IFileInfo path, YamlFrontMatter? matter, Cancel ctx) => + ParseFromFile(path, matter, Pipeline, false, ctx); - public Task MinimalParseAsync(IFileInfo path, Cancel ctx) => ParseFromFile(path, null, MinimalPipeline, ctx); + public Task MinimalParseAsync(IFileInfo path, Cancel ctx) => + ParseFromFile(path, null, MinimalPipeline, true, ctx); - private Task ParseFromFile(IFileInfo path, YamlFrontMatter? matter, MarkdownPipeline pipeline, Cancel ctx) + private Task ParseFromFile( + IFileInfo path, YamlFrontMatter? matter, MarkdownPipeline pipeline, bool skip, Cancel ctx + ) { var state = new ParserState(Build) { MarkdownSourcePath = path, YamlFrontMatter = matter, DocumentationFileLookup = Resolvers.DocumentationFileLookup, - CrossLinkResolver = Resolvers.CrossLinkResolver + CrossLinkResolver = Resolvers.CrossLinkResolver, + SkipValidation = skip }; var context = new ParserContext(state); return ParseAsync(path, context, pipeline, ctx); From 90c8031c869f718c7628f2a27151dbcd495ead51 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Thu, 29 May 2025 13:48:30 +0200 Subject: [PATCH 5/5] Bump to ingest and transport that AOT compile under .NET 9 --- Directory.Packages.props | 2 +- src/tooling/docs-assembler/docs-assembler.csproj | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Directory.Packages.props b/Directory.Packages.props index b47004d73..cdb7e0b4c 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -16,7 +16,7 @@ - + diff --git a/src/tooling/docs-assembler/docs-assembler.csproj b/src/tooling/docs-assembler/docs-assembler.csproj index 12d1823ae..91133e4d2 100644 --- a/src/tooling/docs-assembler/docs-assembler.csproj +++ b/src/tooling/docs-assembler/docs-assembler.csproj @@ -10,6 +10,7 @@ true true true + false true true