diff --git a/Directory.Packages.props b/Directory.Packages.props
index dd121ad7c..cdb7e0b4c 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -16,6 +16,7 @@
+
@@ -63,4 +64,4 @@
-
+
\ No newline at end of file
diff --git a/src/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj b/src/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj
new file mode 100644
index 000000000..73f028e53
--- /dev/null
+++ b/src/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj
@@ -0,0 +1,17 @@
+
+
+
+ net9.0
+ enable
+ enable
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/Elastic.Documentation/Diagnostics/IDiagnosticsCollector.cs b/src/Elastic.Documentation/Diagnostics/IDiagnosticsCollector.cs
index fc98220dd..146ce2874 100644
--- a/src/Elastic.Documentation/Diagnostics/IDiagnosticsCollector.cs
+++ b/src/Elastic.Documentation/Diagnostics/IDiagnosticsCollector.cs
@@ -35,6 +35,18 @@ public static void EmitWarning(this IDiagnosticsCollector collector, IFileInfo f
public static void EmitHint(this IDiagnosticsCollector collector, IFileInfo file, string message) =>
collector.EmitHint(file.FullName, message);
+
+ /// Emit an error not associated with a file
+ public static void EmitGlobalError(this IDiagnosticsCollector collector, string message, Exception? e = null) =>
+ collector.EmitError(string.Empty, message, e);
+
+ /// Emit a warning not associated with a file
+ public static void EmitGlobalWarning(this IDiagnosticsCollector collector, string message) =>
+ collector.EmitWarning(string.Empty, message);
+
+ /// Emit a hint not associated with a file
+ public static void EmitGlobalHint(this IDiagnosticsCollector collector, string message) =>
+ collector.EmitHint(string.Empty, message);
}
diff --git a/src/Elastic.Documentation/Search/DocumentationDocument.cs b/src/Elastic.Documentation/Search/DocumentationDocument.cs
new file mode 100644
index 000000000..0ddd6da19
--- /dev/null
+++ b/src/Elastic.Documentation/Search/DocumentationDocument.cs
@@ -0,0 +1,29 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+using System.Text.Json.Serialization;
+
+namespace Elastic.Documentation.Search;
+
+public record DocumentationDocument
+{
+ [JsonPropertyName("title")]
+ public string? Title { get; set; }
+
+ [JsonPropertyName("body")]
+ public string? Body { get; set; }
+
+ [JsonPropertyName("abstract")]
+ public string? Abstract { get; set; }
+
+ [JsonPropertyName("headings")]
+ public string[] Headings { get; set; } = [];
+
+ [JsonPropertyName("links")]
+ public string[] Links { get; set; } = [];
+
+ [JsonPropertyName("url")]
+ public string? Url { get; set; }
+}
+
diff --git a/src/Elastic.Documentation/Serialization/SourceGenerationContext.cs b/src/Elastic.Documentation/Serialization/SourceGenerationContext.cs
index 69bab9947..a0067ac6f 100644
--- a/src/Elastic.Documentation/Serialization/SourceGenerationContext.cs
+++ b/src/Elastic.Documentation/Serialization/SourceGenerationContext.cs
@@ -4,6 +4,7 @@
using System.Text.Json.Serialization;
using Elastic.Documentation.Links;
+using Elastic.Documentation.Search;
using Elastic.Documentation.State;
namespace Elastic.Documentation.Serialization;
@@ -16,4 +17,5 @@ namespace Elastic.Documentation.Serialization;
[JsonSerializable(typeof(GitCheckoutInformation))]
[JsonSerializable(typeof(LinkRegistry))]
[JsonSerializable(typeof(LinkRegistryEntry))]
+[JsonSerializable(typeof(DocumentationDocument))]
public sealed partial class SourceGenerationContext : JsonSerializerContext;
diff --git a/src/Elastic.Markdown/DocumentationGenerator.cs b/src/Elastic.Markdown/DocumentationGenerator.cs
index 3a91c5f65..70596341b 100644
--- a/src/Elastic.Markdown/DocumentationGenerator.cs
+++ b/src/Elastic.Markdown/DocumentationGenerator.cs
@@ -18,6 +18,7 @@
namespace Elastic.Markdown;
+/// Used primarily for testing, do not use in production paths since it might keep references alive to long
public interface IConversionCollector
{
void Collect(MarkdownFile file, MarkdownDocument document, string html);
@@ -40,6 +41,7 @@ public class DocumentationGenerator
private readonly ILogger _logger;
private readonly IFileSystem _writeFileSystem;
private readonly IDocumentationFileExporter _documentationFileExporter;
+ private readonly IMarkdownExporter[] _markdownExporters;
private HtmlWriter HtmlWriter { get; }
public DocumentationSet DocumentationSet { get; }
@@ -51,12 +53,14 @@ public DocumentationGenerator(
ILoggerFactory logger,
INavigationHtmlWriter? navigationHtmlWriter = null,
IDocumentationFileOutputProvider? documentationFileOutputProvider = null,
+ IMarkdownExporter[]? markdownExporters = null,
IDocumentationFileExporter? documentationExporter = null,
IConversionCollector? conversionCollector = null,
ILegacyUrlMapper? legacyUrlMapper = null,
IPositionalNavigation? positionalNavigation = null
)
{
+ _markdownExporters = markdownExporters ?? [];
_documentationFileOutputProvider = documentationFileOutputProvider;
_conversionCollector = conversionCollector;
_writeFileSystem = docSet.Context.WriteFileSystem;
@@ -100,7 +104,7 @@ public async Task GenerateAll(Cancel ctx)
var generationState = Context.SkipDocumentationState ? null : GetPreviousGenerationState();
- // clear output directory if force is true but never for assembler builds since these build multiple times to the output.
+ // clear the output directory if force is true but never for assembler builds since these build multiple times to the output.
if (Context is { AssemblerBuild: false, Force: true }
// clear the output directory if force is false but generation state is null, except for assembler builds.
|| (Context is { AssemblerBuild: false, Force: false } && generationState == null))
@@ -209,7 +213,7 @@ private async Task ExtractEmbeddedStaticResources(Cancel ctx)
}
}
- private async Task ProcessFile(HashSet offendingFiles, DocumentationFile file, DateTimeOffset outputSeenChanges, Cancel token)
+ private async Task ProcessFile(HashSet offendingFiles, DocumentationFile file, DateTimeOffset outputSeenChanges, Cancel ctx)
{
if (!Context.Force)
{
@@ -220,10 +224,27 @@ private async Task ProcessFile(HashSet offendingFiles, DocumentationFile
}
_logger.LogTrace("--> {FileFullPath}", file.SourceFile.FullName);
- //TODO send file to OutputFile() so we can validate its scope is defined in navigation.yml
var outputFile = OutputFile(file.RelativePath);
if (outputFile is not null)
- await _documentationFileExporter.ProcessFile(Context, file, outputFile, HtmlWriter, _conversionCollector, token);
+ {
+ var context = new ProcessingFileContext
+ {
+ BuildContext = Context,
+ OutputFile = outputFile,
+ ConversionCollector = _conversionCollector,
+ File = file,
+ HtmlWriter = HtmlWriter
+ };
+ await _documentationFileExporter.ProcessFile(context, ctx);
+ if (file is MarkdownFile markdown)
+ {
+ foreach (var exporter in _markdownExporters)
+ {
+ var document = context.MarkdownDocument ??= await markdown.ParseFullAsync(ctx);
+ _ = await exporter.ExportAsync(new MarkdownExportContext { Document = document, File = markdown }, ctx);
+ }
+ }
+ }
}
private IFileInfo? OutputFile(string relativePath)
diff --git a/src/Elastic.Markdown/Exporters/DocumentationFileExporter.cs b/src/Elastic.Markdown/Exporters/DocumentationFileExporter.cs
index beb73fd53..e85cf8cf5 100644
--- a/src/Elastic.Markdown/Exporters/DocumentationFileExporter.cs
+++ b/src/Elastic.Markdown/Exporters/DocumentationFileExporter.cs
@@ -5,16 +5,27 @@
using System.IO.Abstractions;
using Elastic.Markdown.IO;
using Elastic.Markdown.Slices;
+using Markdig.Syntax;
namespace Elastic.Markdown.Exporters;
+public class ProcessingFileContext
+{
+ public required BuildContext BuildContext { get; init; }
+ public required DocumentationFile File { get; init; }
+ public required IFileInfo OutputFile { get; init; }
+ public required HtmlWriter HtmlWriter { get; init; }
+ public required IConversionCollector? ConversionCollector { get; init; }
+
+ public MarkdownDocument? MarkdownDocument { get; set; }
+}
+
public interface IDocumentationFileExporter
{
- /// Used in documentation state to ensure we break the build cache if a different exporter is chosen
+ /// Used in the documentation state to ensure we break the build cache if a different exporter is chosen
string Name { get; }
- Task ProcessFile(BuildContext context, DocumentationFile file, IFileInfo outputFile, HtmlWriter htmlWriter, IConversionCollector? conversionCollector,
- Cancel token);
+ ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx);
Task CopyEmbeddedResource(IFileInfo outputFile, Stream resourceStream, Cancel ctx);
}
@@ -23,16 +34,14 @@ public abstract class DocumentationFileExporterBase(IFileSystem readFileSystem,
{
public abstract string Name { get; }
- public abstract Task ProcessFile(BuildContext context, DocumentationFile file, IFileInfo outputFile, HtmlWriter htmlWriter,
- IConversionCollector? conversionCollector,
- Cancel token);
+ public abstract ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx);
protected async Task CopyFileFsAware(DocumentationFile file, IFileInfo outputFile, Cancel ctx)
{
// fast path, normal case.
if (readFileSystem == writeFileSystem)
readFileSystem.File.Copy(file.SourceFile.FullName, outputFile.FullName, true);
- //slower when we are mocking the write filesystem
+ //slower when we are mocking the write-filesystem
else
{
var bytes = await file.SourceFile.FileSystem.File.ReadAllBytesAsync(file.SourceFile.FullName, ctx);
@@ -49,26 +58,20 @@ public async Task CopyEmbeddedResource(IFileInfo outputFile, Stream resourceStre
}
}
-public class DocumentationFileExporter(
- IFileSystem readFileSystem,
- IFileSystem writeFileSystem
-) : DocumentationFileExporterBase(readFileSystem, writeFileSystem)
+public class DocumentationFileExporter(IFileSystem readFileSystem, IFileSystem writeFileSystem)
+ : DocumentationFileExporterBase(readFileSystem, writeFileSystem)
{
- public override string Name { get; } = nameof(DocumentationFileExporter);
+ public override string Name => nameof(DocumentationFileExporter);
- public override async Task ProcessFile(BuildContext context, DocumentationFile file,
- IFileInfo outputFile,
- HtmlWriter htmlWriter,
- IConversionCollector? conversionCollector,
- Cancel token)
+ public override async ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx)
{
- if (file is MarkdownFile markdown)
- await htmlWriter.WriteAsync(outputFile, markdown, conversionCollector, token);
+ if (context.File is MarkdownFile markdown)
+ context.MarkdownDocument = await context.HtmlWriter.WriteAsync(context.OutputFile, markdown, context.ConversionCollector, ctx);
else
{
- if (outputFile.Directory is { Exists: false })
- outputFile.Directory.Create();
- await CopyFileFsAware(file, outputFile, token);
+ if (context.OutputFile.Directory is { Exists: false })
+ context.OutputFile.Directory.Create();
+ await CopyFileFsAware(context.File, context.OutputFile, ctx);
}
}
}
diff --git a/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs
new file mode 100644
index 000000000..b96704c64
--- /dev/null
+++ b/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs
@@ -0,0 +1,22 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+using Elastic.Markdown.IO;
+using Markdig.Syntax;
+
+namespace Elastic.Markdown.Exporters;
+
+public class MarkdownExportContext
+{
+ public required MarkdownDocument Document { get; init; }
+ public required MarkdownFile File { get; init; }
+ public string? LLMText { get; set; }
+}
+
+public interface IMarkdownExporter
+{
+ ValueTask StartAsync(Cancel ctx = default);
+ ValueTask StopAsync(Cancel ctx = default);
+ ValueTask ExportAsync(MarkdownExportContext context, Cancel ctx);
+}
diff --git a/src/Elastic.Markdown/Exporters/NoopDocumentationFileExporter.cs b/src/Elastic.Markdown/Exporters/NoopDocumentationFileExporter.cs
index 0185a0b44..391e88c6f 100644
--- a/src/Elastic.Markdown/Exporters/NoopDocumentationFileExporter.cs
+++ b/src/Elastic.Markdown/Exporters/NoopDocumentationFileExporter.cs
@@ -3,8 +3,6 @@
// See the LICENSE file in the project root for more information
using System.IO.Abstractions;
-using Elastic.Markdown.IO;
-using Elastic.Markdown.Slices;
namespace Elastic.Markdown.Exporters;
@@ -12,9 +10,8 @@ public class NoopDocumentationFileExporter : IDocumentationFileExporter
{
public string Name { get; } = nameof(NoopDocumentationFileExporter);
- public Task ProcessFile(BuildContext context, DocumentationFile file, IFileInfo outputFile, HtmlWriter htmlWriter,
- IConversionCollector? conversionCollector, Cancel token) =>
- Task.CompletedTask;
+ public ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx) =>
+ ValueTask.CompletedTask;
public Task CopyEmbeddedResource(IFileInfo outputFile, Stream resourceStream, Cancel ctx) => Task.CompletedTask;
}
diff --git a/src/Elastic.Markdown/Extensions/DetectionRules/RuleDocumentationFileExporter.cs b/src/Elastic.Markdown/Extensions/DetectionRules/RuleDocumentationFileExporter.cs
index f557297e9..66e9a04ca 100644
--- a/src/Elastic.Markdown/Extensions/DetectionRules/RuleDocumentationFileExporter.cs
+++ b/src/Elastic.Markdown/Extensions/DetectionRules/RuleDocumentationFileExporter.cs
@@ -14,18 +14,24 @@ public class RuleDocumentationFileExporter(IFileSystem readFileSystem, IFileSyst
{
public override string Name { get; } = nameof(RuleDocumentationFileExporter);
- public override async Task ProcessFile(BuildContext context, DocumentationFile file, IFileInfo outputFile, HtmlWriter htmlWriter,
- IConversionCollector? conversionCollector, Cancel token)
+ public override async ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx)
{
- if (file is DetectionRuleFile df)
- await htmlWriter.WriteAsync(DetectionRuleFile.OutputPath(outputFile, context), df, conversionCollector, token);
- else if (file is MarkdownFile markdown)
- await htmlWriter.WriteAsync(outputFile, markdown, conversionCollector, token);
- else
+ var htmlWriter = context.HtmlWriter;
+ var outputFile = context.OutputFile;
+ var conversionCollector = context.ConversionCollector;
+ switch (context.File)
{
- if (outputFile.Directory is { Exists: false })
- outputFile.Directory.Create();
- await CopyFileFsAware(file, outputFile, token);
+ case DetectionRuleFile df:
+ context.MarkdownDocument = await htmlWriter.WriteAsync(DetectionRuleFile.OutputPath(outputFile, context.BuildContext), df, conversionCollector, ctx);
+ break;
+ case MarkdownFile markdown:
+ context.MarkdownDocument = await htmlWriter.WriteAsync(outputFile, markdown, conversionCollector, ctx);
+ break;
+ default:
+ if (outputFile.Directory is { Exists: false })
+ outputFile.Directory.Create();
+ await CopyFileFsAware(context.File, outputFile, ctx);
+ break;
}
}
}
diff --git a/src/Elastic.Markdown/IO/MarkdownFile.cs b/src/Elastic.Markdown/IO/MarkdownFile.cs
index d6823cf06..3c533824e 100644
--- a/src/Elastic.Markdown/IO/MarkdownFile.cs
+++ b/src/Elastic.Markdown/IO/MarkdownFile.cs
@@ -17,6 +17,7 @@
using Elastic.Markdown.Slices;
using Markdig;
using Markdig.Extensions.Yaml;
+using Markdig.Renderers.Roundtrip;
using Markdig.Syntax;
namespace Elastic.Markdown.IO;
@@ -186,6 +187,17 @@ public async Task ParseFullAsync(Cancel ctx)
return document;
}
+ public static string ToLLMText(MarkdownDocument document)
+ {
+ using var sw = new StringWriter();
+ var rr = new RoundtripRenderer(sw);
+ rr.Write(document);
+ var outputMarkdown = sw.ToString();
+
+ return outputMarkdown;
+
+ }
+
private IReadOnlyDictionary GetSubstitutions()
{
var globalSubstitutions = _globalSubstitutions;
diff --git a/src/Elastic.Markdown/Myst/MarkdownParser.cs b/src/Elastic.Markdown/Myst/MarkdownParser.cs
index 1feafcbf8..8fb336545 100644
--- a/src/Elastic.Markdown/Myst/MarkdownParser.cs
+++ b/src/Elastic.Markdown/Myst/MarkdownParser.cs
@@ -31,31 +31,26 @@ public class MarkdownParser(BuildContext build, IParserResolvers resolvers)
private BuildContext Build { get; } = build;
private IParserResolvers Resolvers { get; } = resolvers;
- public Task MinimalParseAsync(IFileInfo path, Cancel ctx)
- {
- var state = new ParserState(Build)
- {
- MarkdownSourcePath = path,
- YamlFrontMatter = null,
- DocumentationFileLookup = Resolvers.DocumentationFileLookup,
- CrossLinkResolver = Resolvers.CrossLinkResolver,
- SkipValidation = true
- };
- var context = new ParserContext(state);
- return ParseAsync(path, context, MinimalPipeline, ctx);
- }
+ public Task ParseAsync(IFileInfo path, YamlFrontMatter? matter, Cancel ctx) =>
+ ParseFromFile(path, matter, Pipeline, false, ctx);
+
+ public Task MinimalParseAsync(IFileInfo path, Cancel ctx) =>
+ ParseFromFile(path, null, MinimalPipeline, true, ctx);
- public Task ParseAsync(IFileInfo path, YamlFrontMatter? matter, Cancel ctx)
+ private Task ParseFromFile(
+ IFileInfo path, YamlFrontMatter? matter, MarkdownPipeline pipeline, bool skip, Cancel ctx
+ )
{
var state = new ParserState(Build)
{
MarkdownSourcePath = path,
YamlFrontMatter = matter,
DocumentationFileLookup = Resolvers.DocumentationFileLookup,
- CrossLinkResolver = Resolvers.CrossLinkResolver
+ CrossLinkResolver = Resolvers.CrossLinkResolver,
+ SkipValidation = skip
};
var context = new ParserContext(state);
- return ParseAsync(path, context, Pipeline, ctx);
+ return ParseAsync(path, context, pipeline, ctx);
}
public Task ParseSnippetAsync(IFileInfo path, IFileInfo parentPath, YamlFrontMatter? matter, Cancel ctx)
diff --git a/src/Elastic.Markdown/Slices/HtmlWriter.cs b/src/Elastic.Markdown/Slices/HtmlWriter.cs
index 4e85fb1bc..3cd927b01 100644
--- a/src/Elastic.Markdown/Slices/HtmlWriter.cs
+++ b/src/Elastic.Markdown/Slices/HtmlWriter.cs
@@ -161,7 +161,7 @@ private async Task RenderLayout(MarkdownFile markdown, MarkdownDocument
return await slice.RenderAsync(cancellationToken: ctx);
}
- public async Task WriteAsync(IFileInfo outputFile, MarkdownFile markdown, IConversionCollector? collector, Cancel ctx = default)
+ public async Task WriteAsync(IFileInfo outputFile, MarkdownFile markdown, IConversionCollector? collector, Cancel ctx = default)
{
if (outputFile.Directory is { Exists: false })
outputFile.Directory.Create();
@@ -184,8 +184,10 @@ public async Task WriteAsync(IFileInfo outputFile, MarkdownFile markdown, IConve
}
var document = await markdown.ParseFullAsync(ctx);
+
var rendered = await RenderLayout(markdown, document, ctx);
collector?.Collect(markdown, document, rendered);
await writeFileSystem.File.WriteAllTextAsync(path, rendered, ctx);
+ return document;
}
}
diff --git a/src/tooling/Elastic.Documentation.Tooling/Diagnostics/Console/ErrataFileSourceRepository.cs b/src/tooling/Elastic.Documentation.Tooling/Diagnostics/Console/ErrataFileSourceRepository.cs
index 1ceca1093..7628b8da4 100644
--- a/src/tooling/Elastic.Documentation.Tooling/Diagnostics/Console/ErrataFileSourceRepository.cs
+++ b/src/tooling/Elastic.Documentation.Tooling/Diagnostics/Console/ErrataFileSourceRepository.cs
@@ -18,6 +18,10 @@ public class ErrataFileSourceRepository : ISourceRepository
[SuppressMessage("Reliability", "CA2012:Use ValueTasks correctly")]
public bool TryGet(string id, [NotNullWhen(true)] out Source? source)
{
+ source = new Source(id, string.Empty);
+ if (id == string.Empty)
+ return true;
+
using var reader = new Utf8StreamReader(id);
var text = Encoding.UTF8.GetString(reader.ReadToEndAsync().GetAwaiter().GetResult());
source = new Source(id, text);
diff --git a/src/tooling/docs-assembler/Building/AssemblerBuilder.cs b/src/tooling/docs-assembler/Building/AssemblerBuilder.cs
index 943107bab..410a88eaf 100644
--- a/src/tooling/docs-assembler/Building/AssemblerBuilder.cs
+++ b/src/tooling/docs-assembler/Building/AssemblerBuilder.cs
@@ -3,15 +3,24 @@
// See the LICENSE file in the project root for more information
using System.Collections.Frozen;
+using Documentation.Assembler.Exporters;
using Documentation.Assembler.Navigation;
using Elastic.Documentation.Legacy;
using Elastic.Documentation.Links;
using Elastic.Markdown;
+using Elastic.Markdown.Exporters;
using Elastic.Markdown.Links.CrossLinks;
using Microsoft.Extensions.Logging;
namespace Documentation.Assembler.Building;
+public enum ExportOption
+{
+ Html = 0,
+ LLMText = 1,
+ Elasticsearch = 2
+}
+
public class AssemblerBuilder(
ILoggerFactory logger,
AssembleContext context,
@@ -25,7 +34,7 @@ public class AssemblerBuilder(
private ILegacyUrlMapper? LegacyUrlMapper { get; } = legacyUrlMapper;
- public async Task BuildAllAsync(FrozenDictionary assembleSets, Cancel ctx)
+ public async Task BuildAllAsync(FrozenDictionary assembleSets, IReadOnlySet exportOptions, Cancel ctx)
{
if (context.OutputDirectory.Exists)
context.OutputDirectory.Delete(true);
@@ -33,6 +42,22 @@ public async Task BuildAllAsync(FrozenDictionary();
+ var esExporter =
+ Environment.GetEnvironmentVariable("ELASTIC_API_KEY") is { } apiKey &&
+ Environment.GetEnvironmentVariable("ELASTIC_URL") is { } url
+ ? new ElasticsearchMarkdownExporter(logger, context.Collector, url, apiKey)
+ : null;
+
+ var markdownExporters = new List(3);
+ if (exportOptions.Contains(ExportOption.LLMText))
+ markdownExporters.Add(new LLMTextExporter());
+ if (exportOptions.Contains(ExportOption.Elasticsearch) && esExporter is { })
+ markdownExporters.Add(esExporter);
+ var noopBuild = !exportOptions.Contains(ExportOption.Html);
+
+ var tasks = markdownExporters.Select(async e => await e.StartAsync(ctx));
+ await Task.WhenAll(tasks);
+
foreach (var (_, set) in assembleSets)
{
var checkout = set.Checkout;
@@ -44,7 +69,7 @@ public async Task BuildAllAsync(FrozenDictionary await e.StopAsync(ctx));
+ await Task.WhenAll(tasks);
}
private static void CollectRedirects(
@@ -90,14 +116,16 @@ string Resolve(string relativeMarkdownPath)
}
}
- private async Task BuildAsync(AssemblerDocumentationSet set, Cancel ctx)
+ private async Task BuildAsync(AssemblerDocumentationSet set, bool noop, IMarkdownExporter[]? markdownExporters, Cancel ctx)
{
var generator = new DocumentationGenerator(
set.DocumentationSet,
logger, HtmlWriter,
pathProvider,
legacyUrlMapper: LegacyUrlMapper,
- positionalNavigation: navigation
+ positionalNavigation: navigation,
+ documentationExporter: noop ? new NoopDocumentationFileExporter() : null,
+ markdownExporters: markdownExporters
);
return await generator.GenerateAll(ctx);
}
diff --git a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs
index 99a9bdbf8..ed0cd3730 100644
--- a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs
+++ b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs
@@ -21,6 +21,7 @@
using Elastic.Markdown.Exporters;
using Elastic.Markdown.IO;
using Microsoft.Extensions.Logging;
+using YamlDotNet.Core;
namespace Documentation.Assembler.Cli;
@@ -69,6 +70,7 @@ public async Task CloneAll(
/// Treat warnings as errors and fail the build on warnings
/// Allow indexing and following of html files
/// The environment to build
+ /// configure exporters explicitly available (html,llmtext,es), defaults to html
///
[Command("build-all")]
public async Task BuildAll(
@@ -76,8 +78,11 @@ public async Task BuildAll(
bool? strict = null,
bool? allowIndexing = null,
string? environment = null,
+ [ExporterParser] IReadOnlySet? exporters = null,
Cancel ctx = default)
{
+ exporters ??= new HashSet([ExportOption.Html]);
+
AssignOutputLogger();
var githubEnvironmentInput = githubActionsService.GetInput("environment");
environment ??= !string.IsNullOrEmpty(githubEnvironmentInput) ? githubEnvironmentInput : "dev";
@@ -116,7 +121,7 @@ public async Task BuildAll(
var historyMapper = new PageLegacyUrlMapper(assembleSources.HistoryMappings);
var builder = new AssemblerBuilder(logger, assembleContext, navigation, htmlWriter, pathProvider, historyMapper);
- await builder.BuildAllAsync(assembleSources.AssembleSets, ctx);
+ await builder.BuildAllAsync(assembleSources.AssembleSets, exporters, ctx);
var sitemapBuilder = new SitemapBuilder(navigation.NavigationItems, assembleContext.WriteFileSystem, assembleContext.OutputDirectory);
sitemapBuilder.Generate();
@@ -163,7 +168,7 @@ await Parallel.ForEachAsync(repositories,
outputPath
);
var set = new DocumentationSet(context, logger);
- var generator = new DocumentationGenerator(set, logger, null, null, new NoopDocumentationFileExporter());
+ var generator = new DocumentationGenerator(set, logger, null, null, null, new NoopDocumentationFileExporter());
_ = await generator.GenerateAll(c);
IAmazonS3 s3Client = new AmazonS3Client();
@@ -193,3 +198,30 @@ await Parallel.ForEachAsync(repositories,
return collector.Errors > 0 ? 1 : 0;
}
}
+
+[AttributeUsage(AttributeTargets.Parameter)]
+public class ExporterParserAttribute : Attribute, IArgumentParser>
+{
+ public static bool TryParse(ReadOnlySpan s, out IReadOnlySet result)
+ {
+ result = new HashSet([ExportOption.Html]);
+ var set = new HashSet();
+ var ranges = s.Split(',');
+ foreach (var range in ranges)
+ {
+ ExportOption? export = s[range].Trim().ToString().ToLowerInvariant() switch
+ {
+ "llm" => ExportOption.LLMText,
+ "llmtext" => ExportOption.LLMText,
+ "es" => ExportOption.Elasticsearch,
+ "elasticsearch" => ExportOption.Elasticsearch,
+ "html" => ExportOption.Html,
+ _ => null
+ };
+ if (export.HasValue)
+ _ = set.Add(export.Value);
+ }
+ result = set;
+ return true;
+ }
+}
diff --git a/src/tooling/docs-assembler/Exporters/ElasticsearchMarkdownExporter.cs b/src/tooling/docs-assembler/Exporters/ElasticsearchMarkdownExporter.cs
new file mode 100644
index 000000000..8221ea28c
--- /dev/null
+++ b/src/tooling/docs-assembler/Exporters/ElasticsearchMarkdownExporter.cs
@@ -0,0 +1,135 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+using Elastic.Documentation.Diagnostics;
+using Elastic.Documentation.Search;
+using Elastic.Documentation.Serialization;
+using Elastic.Ingest.Elasticsearch;
+using Elastic.Ingest.Elasticsearch.Semantic;
+using Elastic.Markdown.Exporters;
+using Elastic.Markdown.IO;
+using Elastic.Transport;
+using Elastic.Transport.Products.Elasticsearch;
+using Microsoft.Extensions.Logging;
+
+namespace Documentation.Assembler.Exporters;
+
+public class ElasticsearchMarkdownExporter : IMarkdownExporter, IDisposable
+{
+ private readonly DiagnosticsCollector _collector;
+ private readonly SemanticIndexChannel _channel;
+ private readonly ILogger _logger;
+
+ public ElasticsearchMarkdownExporter(ILoggerFactory logFactory, DiagnosticsCollector collector, string url, string apiKey)
+ {
+ _collector = collector;
+ _logger = logFactory.CreateLogger();
+ var configuration = new ElasticsearchConfiguration(new Uri(url), new ApiKey(apiKey))
+ {
+ //Uncomment to see the requests with Fiddler
+ ProxyAddress = "http://localhost:8866"
+ };
+ var transport = new DistributedTransport(configuration);
+ //The max num threads per allocated node, from testing its best to limit our max concurrency
+ //producing to this number as well
+ var indexNumThreads = 8;
+ var options = new SemanticIndexChannelOptions(transport)
+ {
+ BufferOptions =
+ {
+ OutboundBufferMaxSize = 100,
+ ExportMaxConcurrency = indexNumThreads,
+ ExportMaxRetries = 3
+ },
+ SerializerContext = SourceGenerationContext.Default,
+ IndexFormat = "documentation-{0:yyyy.MM.dd.HHmmss}",
+ IndexNumThreads = indexNumThreads,
+ ActiveSearchAlias = "documentation",
+ ExportExceptionCallback = e => _logger.LogError(e, "Failed to export document"),
+ ServerRejectionCallback = items => _logger.LogInformation("Server rejection: {Rejection}", items.First().Item2),
+ GetMapping = (inferenceId, _) => // language=json
+ $$"""
+ {
+ "properties": {
+ "title": { "type": "text" },
+ "body": {
+ "type": "text"
+ },
+ "abstract": {
+ "type": "semantic_text",
+ "inference_id": "{{inferenceId}}"
+ }
+ }
+ }
+ """
+ };
+ _channel = new SemanticIndexChannel(options);
+ }
+
+ public async ValueTask StartAsync(Cancel ctx = default)
+ {
+ _logger.LogInformation($"Bootstrapping {nameof(SemanticIndexChannel)} Elasticsearch target for indexing");
+ _ = await _channel.BootstrapElasticsearchAsync(BootstrapMethod.Failure, null, ctx);
+ }
+
+ public async ValueTask StopAsync(Cancel ctx = default)
+ {
+ _logger.LogInformation("Waiting to drain all inflight exports to Elasticsearch");
+ var drained = await _channel.WaitForDrainAsync(null, ctx);
+ if (!drained)
+ _collector.EmitGlobalError("Elasticsearch export: failed to complete indexing in a timely fashion while shutting down");
+
+ _logger.LogInformation("Refreshing target index {Index}", _channel.IndexName);
+ var refreshed = await _channel.RefreshAsync(ctx);
+ if (!refreshed)
+ _logger.LogError("Refreshing target index {Index} did not complete successfully", _channel.IndexName);
+
+ _logger.LogInformation("Applying aliases to {Index}", _channel.IndexName);
+ var swapped = await _channel.ApplyAliasesAsync(ctx);
+ if (!swapped)
+ _collector.EmitGlobalError($"{nameof(ElasticsearchMarkdownExporter)} failed to apply aliases to index {_channel.IndexName}");
+ }
+
+ public void Dispose()
+ {
+ _channel.Complete();
+ _channel.Dispose();
+ GC.SuppressFinalize(this);
+ }
+
+ private async ValueTask TryWrite(DocumentationDocument document, Cancel ctx = default)
+ {
+ if (_channel.TryWrite(document))
+ return true;
+
+ if (await _channel.WaitToWriteAsync(ctx))
+ return _channel.TryWrite(document);
+ return false;
+ }
+
+ public async ValueTask ExportAsync(MarkdownExportContext context, Cancel ctx)
+ {
+ var file = context.File;
+ var document = context.Document;
+ if (file.FileName.EndsWith(".toml", StringComparison.OrdinalIgnoreCase))
+ return true;
+
+ var url = file.Url;
+ // integrations are too big, we need to sanitize the fieldsets and example docs out of these.
+ if (url.Contains("/reference/integrations"))
+ return true;
+
+ var body = context.LLMText ??= MarkdownFile.ToLLMText(document);
+ var doc = new DocumentationDocument
+ {
+ Title = file.Title,
+ //Body = body,
+ Abstract = !string.IsNullOrEmpty(body)
+ ? body[..Math.Min(body.Length, 400)]
+ : string.Empty,
+ Url = url
+ };
+ return await TryWrite(doc, ctx);
+ }
+}
diff --git a/src/tooling/docs-assembler/Exporters/LLMTextExporter.cs b/src/tooling/docs-assembler/Exporters/LLMTextExporter.cs
new file mode 100644
index 000000000..f91011d98
--- /dev/null
+++ b/src/tooling/docs-assembler/Exporters/LLMTextExporter.cs
@@ -0,0 +1,21 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+using Elastic.Markdown.Exporters;
+using Elastic.Markdown.IO;
+
+namespace Documentation.Assembler.Exporters;
+
+public class LLMTextExporter : IMarkdownExporter
+{
+ public ValueTask StartAsync(CancellationToken ctx = default) => ValueTask.CompletedTask;
+
+ public ValueTask StopAsync(CancellationToken ctx = default) => ValueTask.CompletedTask;
+
+ public ValueTask ExportAsync(MarkdownExportContext context, CancellationToken ctx)
+ {
+ var llmText = context.LLMText ??= MarkdownFile.ToLLMText(context.Document);
+ return ValueTask.FromResult(true);
+ }
+}
diff --git a/src/tooling/docs-assembler/docs-assembler.csproj b/src/tooling/docs-assembler/docs-assembler.csproj
index cfed86523..91133e4d2 100644
--- a/src/tooling/docs-assembler/docs-assembler.csproj
+++ b/src/tooling/docs-assembler/docs-assembler.csproj
@@ -10,6 +10,7 @@
true
true
true
+ false
true
true
@@ -19,6 +20,7 @@
+
diff --git a/src/tooling/docs-builder/Cli/Commands.cs b/src/tooling/docs-builder/Cli/Commands.cs
index 5dacaec9d..f2fcbad1a 100644
--- a/src/tooling/docs-builder/Cli/Commands.cs
+++ b/src/tooling/docs-builder/Cli/Commands.cs
@@ -152,7 +152,7 @@ public async Task Generate(
metadataOnly ??= metaValue;
var exporter = metadataOnly.HasValue && metadataOnly.Value ? new NoopDocumentationFileExporter() : null;
- var generator = new DocumentationGenerator(set, logger, null, null, exporter);
+ var generator = new DocumentationGenerator(set, logger, null, null, null, exporter);
_ = await generator.GenerateAll(ctx);
if (runningOnCi)
diff --git a/tests/authoring/Framework/Setup.fs b/tests/authoring/Framework/Setup.fs
index 66df0e955..95390500c 100644
--- a/tests/authoring/Framework/Setup.fs
+++ b/tests/authoring/Framework/Setup.fs
@@ -112,7 +112,7 @@ type Setup =
let conversionCollector = TestConversionCollector()
let linkResolver = TestCrossLinkResolver(context.Configuration)
let set = DocumentationSet(context, logger, linkResolver);
- let generator = DocumentationGenerator(set, logger, null, null, null, conversionCollector)
+ let generator = DocumentationGenerator(set, logger, null, null, null, null, conversionCollector)
let context = {
Collector = collector