diff --git a/Directory.Packages.props b/Directory.Packages.props index dd121ad7c..cdb7e0b4c 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -16,6 +16,7 @@ + @@ -63,4 +64,4 @@ - + \ No newline at end of file diff --git a/src/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj b/src/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj new file mode 100644 index 000000000..73f028e53 --- /dev/null +++ b/src/Elastic.Documentation.Search/Elastic.Documentation.Search.csproj @@ -0,0 +1,17 @@ + + + + net9.0 + enable + enable + + + + + + + + + + + diff --git a/src/Elastic.Documentation/Diagnostics/IDiagnosticsCollector.cs b/src/Elastic.Documentation/Diagnostics/IDiagnosticsCollector.cs index fc98220dd..146ce2874 100644 --- a/src/Elastic.Documentation/Diagnostics/IDiagnosticsCollector.cs +++ b/src/Elastic.Documentation/Diagnostics/IDiagnosticsCollector.cs @@ -35,6 +35,18 @@ public static void EmitWarning(this IDiagnosticsCollector collector, IFileInfo f public static void EmitHint(this IDiagnosticsCollector collector, IFileInfo file, string message) => collector.EmitHint(file.FullName, message); + + /// Emit an error not associated with a file + public static void EmitGlobalError(this IDiagnosticsCollector collector, string message, Exception? e = null) => + collector.EmitError(string.Empty, message, e); + + /// Emit a warning not associated with a file + public static void EmitGlobalWarning(this IDiagnosticsCollector collector, string message) => + collector.EmitWarning(string.Empty, message); + + /// Emit a hint not associated with a file + public static void EmitGlobalHint(this IDiagnosticsCollector collector, string message) => + collector.EmitHint(string.Empty, message); } diff --git a/src/Elastic.Documentation/Search/DocumentationDocument.cs b/src/Elastic.Documentation/Search/DocumentationDocument.cs new file mode 100644 index 000000000..0ddd6da19 --- /dev/null +++ b/src/Elastic.Documentation/Search/DocumentationDocument.cs @@ -0,0 +1,29 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Text.Json.Serialization; + +namespace Elastic.Documentation.Search; + +public record DocumentationDocument +{ + [JsonPropertyName("title")] + public string? Title { get; set; } + + [JsonPropertyName("body")] + public string? Body { get; set; } + + [JsonPropertyName("abstract")] + public string? Abstract { get; set; } + + [JsonPropertyName("headings")] + public string[] Headings { get; set; } = []; + + [JsonPropertyName("links")] + public string[] Links { get; set; } = []; + + [JsonPropertyName("url")] + public string? Url { get; set; } +} + diff --git a/src/Elastic.Documentation/Serialization/SourceGenerationContext.cs b/src/Elastic.Documentation/Serialization/SourceGenerationContext.cs index 69bab9947..a0067ac6f 100644 --- a/src/Elastic.Documentation/Serialization/SourceGenerationContext.cs +++ b/src/Elastic.Documentation/Serialization/SourceGenerationContext.cs @@ -4,6 +4,7 @@ using System.Text.Json.Serialization; using Elastic.Documentation.Links; +using Elastic.Documentation.Search; using Elastic.Documentation.State; namespace Elastic.Documentation.Serialization; @@ -16,4 +17,5 @@ namespace Elastic.Documentation.Serialization; [JsonSerializable(typeof(GitCheckoutInformation))] [JsonSerializable(typeof(LinkRegistry))] [JsonSerializable(typeof(LinkRegistryEntry))] +[JsonSerializable(typeof(DocumentationDocument))] public sealed partial class SourceGenerationContext : JsonSerializerContext; diff --git a/src/Elastic.Markdown/DocumentationGenerator.cs b/src/Elastic.Markdown/DocumentationGenerator.cs index 3a91c5f65..70596341b 100644 --- a/src/Elastic.Markdown/DocumentationGenerator.cs +++ b/src/Elastic.Markdown/DocumentationGenerator.cs @@ -18,6 +18,7 @@ namespace Elastic.Markdown; +/// Used primarily for testing, do not use in production paths since it might keep references alive to long public interface IConversionCollector { void Collect(MarkdownFile file, MarkdownDocument document, string html); @@ -40,6 +41,7 @@ public class DocumentationGenerator private readonly ILogger _logger; private readonly IFileSystem _writeFileSystem; private readonly IDocumentationFileExporter _documentationFileExporter; + private readonly IMarkdownExporter[] _markdownExporters; private HtmlWriter HtmlWriter { get; } public DocumentationSet DocumentationSet { get; } @@ -51,12 +53,14 @@ public DocumentationGenerator( ILoggerFactory logger, INavigationHtmlWriter? navigationHtmlWriter = null, IDocumentationFileOutputProvider? documentationFileOutputProvider = null, + IMarkdownExporter[]? markdownExporters = null, IDocumentationFileExporter? documentationExporter = null, IConversionCollector? conversionCollector = null, ILegacyUrlMapper? legacyUrlMapper = null, IPositionalNavigation? positionalNavigation = null ) { + _markdownExporters = markdownExporters ?? []; _documentationFileOutputProvider = documentationFileOutputProvider; _conversionCollector = conversionCollector; _writeFileSystem = docSet.Context.WriteFileSystem; @@ -100,7 +104,7 @@ public async Task GenerateAll(Cancel ctx) var generationState = Context.SkipDocumentationState ? null : GetPreviousGenerationState(); - // clear output directory if force is true but never for assembler builds since these build multiple times to the output. + // clear the output directory if force is true but never for assembler builds since these build multiple times to the output. if (Context is { AssemblerBuild: false, Force: true } // clear the output directory if force is false but generation state is null, except for assembler builds. || (Context is { AssemblerBuild: false, Force: false } && generationState == null)) @@ -209,7 +213,7 @@ private async Task ExtractEmbeddedStaticResources(Cancel ctx) } } - private async Task ProcessFile(HashSet offendingFiles, DocumentationFile file, DateTimeOffset outputSeenChanges, Cancel token) + private async Task ProcessFile(HashSet offendingFiles, DocumentationFile file, DateTimeOffset outputSeenChanges, Cancel ctx) { if (!Context.Force) { @@ -220,10 +224,27 @@ private async Task ProcessFile(HashSet offendingFiles, DocumentationFile } _logger.LogTrace("--> {FileFullPath}", file.SourceFile.FullName); - //TODO send file to OutputFile() so we can validate its scope is defined in navigation.yml var outputFile = OutputFile(file.RelativePath); if (outputFile is not null) - await _documentationFileExporter.ProcessFile(Context, file, outputFile, HtmlWriter, _conversionCollector, token); + { + var context = new ProcessingFileContext + { + BuildContext = Context, + OutputFile = outputFile, + ConversionCollector = _conversionCollector, + File = file, + HtmlWriter = HtmlWriter + }; + await _documentationFileExporter.ProcessFile(context, ctx); + if (file is MarkdownFile markdown) + { + foreach (var exporter in _markdownExporters) + { + var document = context.MarkdownDocument ??= await markdown.ParseFullAsync(ctx); + _ = await exporter.ExportAsync(new MarkdownExportContext { Document = document, File = markdown }, ctx); + } + } + } } private IFileInfo? OutputFile(string relativePath) diff --git a/src/Elastic.Markdown/Exporters/DocumentationFileExporter.cs b/src/Elastic.Markdown/Exporters/DocumentationFileExporter.cs index beb73fd53..e85cf8cf5 100644 --- a/src/Elastic.Markdown/Exporters/DocumentationFileExporter.cs +++ b/src/Elastic.Markdown/Exporters/DocumentationFileExporter.cs @@ -5,16 +5,27 @@ using System.IO.Abstractions; using Elastic.Markdown.IO; using Elastic.Markdown.Slices; +using Markdig.Syntax; namespace Elastic.Markdown.Exporters; +public class ProcessingFileContext +{ + public required BuildContext BuildContext { get; init; } + public required DocumentationFile File { get; init; } + public required IFileInfo OutputFile { get; init; } + public required HtmlWriter HtmlWriter { get; init; } + public required IConversionCollector? ConversionCollector { get; init; } + + public MarkdownDocument? MarkdownDocument { get; set; } +} + public interface IDocumentationFileExporter { - /// Used in documentation state to ensure we break the build cache if a different exporter is chosen + /// Used in the documentation state to ensure we break the build cache if a different exporter is chosen string Name { get; } - Task ProcessFile(BuildContext context, DocumentationFile file, IFileInfo outputFile, HtmlWriter htmlWriter, IConversionCollector? conversionCollector, - Cancel token); + ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx); Task CopyEmbeddedResource(IFileInfo outputFile, Stream resourceStream, Cancel ctx); } @@ -23,16 +34,14 @@ public abstract class DocumentationFileExporterBase(IFileSystem readFileSystem, { public abstract string Name { get; } - public abstract Task ProcessFile(BuildContext context, DocumentationFile file, IFileInfo outputFile, HtmlWriter htmlWriter, - IConversionCollector? conversionCollector, - Cancel token); + public abstract ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx); protected async Task CopyFileFsAware(DocumentationFile file, IFileInfo outputFile, Cancel ctx) { // fast path, normal case. if (readFileSystem == writeFileSystem) readFileSystem.File.Copy(file.SourceFile.FullName, outputFile.FullName, true); - //slower when we are mocking the write filesystem + //slower when we are mocking the write-filesystem else { var bytes = await file.SourceFile.FileSystem.File.ReadAllBytesAsync(file.SourceFile.FullName, ctx); @@ -49,26 +58,20 @@ public async Task CopyEmbeddedResource(IFileInfo outputFile, Stream resourceStre } } -public class DocumentationFileExporter( - IFileSystem readFileSystem, - IFileSystem writeFileSystem -) : DocumentationFileExporterBase(readFileSystem, writeFileSystem) +public class DocumentationFileExporter(IFileSystem readFileSystem, IFileSystem writeFileSystem) + : DocumentationFileExporterBase(readFileSystem, writeFileSystem) { - public override string Name { get; } = nameof(DocumentationFileExporter); + public override string Name => nameof(DocumentationFileExporter); - public override async Task ProcessFile(BuildContext context, DocumentationFile file, - IFileInfo outputFile, - HtmlWriter htmlWriter, - IConversionCollector? conversionCollector, - Cancel token) + public override async ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx) { - if (file is MarkdownFile markdown) - await htmlWriter.WriteAsync(outputFile, markdown, conversionCollector, token); + if (context.File is MarkdownFile markdown) + context.MarkdownDocument = await context.HtmlWriter.WriteAsync(context.OutputFile, markdown, context.ConversionCollector, ctx); else { - if (outputFile.Directory is { Exists: false }) - outputFile.Directory.Create(); - await CopyFileFsAware(file, outputFile, token); + if (context.OutputFile.Directory is { Exists: false }) + context.OutputFile.Directory.Create(); + await CopyFileFsAware(context.File, context.OutputFile, ctx); } } } diff --git a/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs new file mode 100644 index 000000000..b96704c64 --- /dev/null +++ b/src/Elastic.Markdown/Exporters/IMarkdownExporter.cs @@ -0,0 +1,22 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Markdown.IO; +using Markdig.Syntax; + +namespace Elastic.Markdown.Exporters; + +public class MarkdownExportContext +{ + public required MarkdownDocument Document { get; init; } + public required MarkdownFile File { get; init; } + public string? LLMText { get; set; } +} + +public interface IMarkdownExporter +{ + ValueTask StartAsync(Cancel ctx = default); + ValueTask StopAsync(Cancel ctx = default); + ValueTask ExportAsync(MarkdownExportContext context, Cancel ctx); +} diff --git a/src/Elastic.Markdown/Exporters/NoopDocumentationFileExporter.cs b/src/Elastic.Markdown/Exporters/NoopDocumentationFileExporter.cs index 0185a0b44..391e88c6f 100644 --- a/src/Elastic.Markdown/Exporters/NoopDocumentationFileExporter.cs +++ b/src/Elastic.Markdown/Exporters/NoopDocumentationFileExporter.cs @@ -3,8 +3,6 @@ // See the LICENSE file in the project root for more information using System.IO.Abstractions; -using Elastic.Markdown.IO; -using Elastic.Markdown.Slices; namespace Elastic.Markdown.Exporters; @@ -12,9 +10,8 @@ public class NoopDocumentationFileExporter : IDocumentationFileExporter { public string Name { get; } = nameof(NoopDocumentationFileExporter); - public Task ProcessFile(BuildContext context, DocumentationFile file, IFileInfo outputFile, HtmlWriter htmlWriter, - IConversionCollector? conversionCollector, Cancel token) => - Task.CompletedTask; + public ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx) => + ValueTask.CompletedTask; public Task CopyEmbeddedResource(IFileInfo outputFile, Stream resourceStream, Cancel ctx) => Task.CompletedTask; } diff --git a/src/Elastic.Markdown/Extensions/DetectionRules/RuleDocumentationFileExporter.cs b/src/Elastic.Markdown/Extensions/DetectionRules/RuleDocumentationFileExporter.cs index f557297e9..66e9a04ca 100644 --- a/src/Elastic.Markdown/Extensions/DetectionRules/RuleDocumentationFileExporter.cs +++ b/src/Elastic.Markdown/Extensions/DetectionRules/RuleDocumentationFileExporter.cs @@ -14,18 +14,24 @@ public class RuleDocumentationFileExporter(IFileSystem readFileSystem, IFileSyst { public override string Name { get; } = nameof(RuleDocumentationFileExporter); - public override async Task ProcessFile(BuildContext context, DocumentationFile file, IFileInfo outputFile, HtmlWriter htmlWriter, - IConversionCollector? conversionCollector, Cancel token) + public override async ValueTask ProcessFile(ProcessingFileContext context, Cancel ctx) { - if (file is DetectionRuleFile df) - await htmlWriter.WriteAsync(DetectionRuleFile.OutputPath(outputFile, context), df, conversionCollector, token); - else if (file is MarkdownFile markdown) - await htmlWriter.WriteAsync(outputFile, markdown, conversionCollector, token); - else + var htmlWriter = context.HtmlWriter; + var outputFile = context.OutputFile; + var conversionCollector = context.ConversionCollector; + switch (context.File) { - if (outputFile.Directory is { Exists: false }) - outputFile.Directory.Create(); - await CopyFileFsAware(file, outputFile, token); + case DetectionRuleFile df: + context.MarkdownDocument = await htmlWriter.WriteAsync(DetectionRuleFile.OutputPath(outputFile, context.BuildContext), df, conversionCollector, ctx); + break; + case MarkdownFile markdown: + context.MarkdownDocument = await htmlWriter.WriteAsync(outputFile, markdown, conversionCollector, ctx); + break; + default: + if (outputFile.Directory is { Exists: false }) + outputFile.Directory.Create(); + await CopyFileFsAware(context.File, outputFile, ctx); + break; } } } diff --git a/src/Elastic.Markdown/IO/MarkdownFile.cs b/src/Elastic.Markdown/IO/MarkdownFile.cs index d6823cf06..3c533824e 100644 --- a/src/Elastic.Markdown/IO/MarkdownFile.cs +++ b/src/Elastic.Markdown/IO/MarkdownFile.cs @@ -17,6 +17,7 @@ using Elastic.Markdown.Slices; using Markdig; using Markdig.Extensions.Yaml; +using Markdig.Renderers.Roundtrip; using Markdig.Syntax; namespace Elastic.Markdown.IO; @@ -186,6 +187,17 @@ public async Task ParseFullAsync(Cancel ctx) return document; } + public static string ToLLMText(MarkdownDocument document) + { + using var sw = new StringWriter(); + var rr = new RoundtripRenderer(sw); + rr.Write(document); + var outputMarkdown = sw.ToString(); + + return outputMarkdown; + + } + private IReadOnlyDictionary GetSubstitutions() { var globalSubstitutions = _globalSubstitutions; diff --git a/src/Elastic.Markdown/Myst/MarkdownParser.cs b/src/Elastic.Markdown/Myst/MarkdownParser.cs index 1feafcbf8..8fb336545 100644 --- a/src/Elastic.Markdown/Myst/MarkdownParser.cs +++ b/src/Elastic.Markdown/Myst/MarkdownParser.cs @@ -31,31 +31,26 @@ public class MarkdownParser(BuildContext build, IParserResolvers resolvers) private BuildContext Build { get; } = build; private IParserResolvers Resolvers { get; } = resolvers; - public Task MinimalParseAsync(IFileInfo path, Cancel ctx) - { - var state = new ParserState(Build) - { - MarkdownSourcePath = path, - YamlFrontMatter = null, - DocumentationFileLookup = Resolvers.DocumentationFileLookup, - CrossLinkResolver = Resolvers.CrossLinkResolver, - SkipValidation = true - }; - var context = new ParserContext(state); - return ParseAsync(path, context, MinimalPipeline, ctx); - } + public Task ParseAsync(IFileInfo path, YamlFrontMatter? matter, Cancel ctx) => + ParseFromFile(path, matter, Pipeline, false, ctx); + + public Task MinimalParseAsync(IFileInfo path, Cancel ctx) => + ParseFromFile(path, null, MinimalPipeline, true, ctx); - public Task ParseAsync(IFileInfo path, YamlFrontMatter? matter, Cancel ctx) + private Task ParseFromFile( + IFileInfo path, YamlFrontMatter? matter, MarkdownPipeline pipeline, bool skip, Cancel ctx + ) { var state = new ParserState(Build) { MarkdownSourcePath = path, YamlFrontMatter = matter, DocumentationFileLookup = Resolvers.DocumentationFileLookup, - CrossLinkResolver = Resolvers.CrossLinkResolver + CrossLinkResolver = Resolvers.CrossLinkResolver, + SkipValidation = skip }; var context = new ParserContext(state); - return ParseAsync(path, context, Pipeline, ctx); + return ParseAsync(path, context, pipeline, ctx); } public Task ParseSnippetAsync(IFileInfo path, IFileInfo parentPath, YamlFrontMatter? matter, Cancel ctx) diff --git a/src/Elastic.Markdown/Slices/HtmlWriter.cs b/src/Elastic.Markdown/Slices/HtmlWriter.cs index 4e85fb1bc..3cd927b01 100644 --- a/src/Elastic.Markdown/Slices/HtmlWriter.cs +++ b/src/Elastic.Markdown/Slices/HtmlWriter.cs @@ -161,7 +161,7 @@ private async Task RenderLayout(MarkdownFile markdown, MarkdownDocument return await slice.RenderAsync(cancellationToken: ctx); } - public async Task WriteAsync(IFileInfo outputFile, MarkdownFile markdown, IConversionCollector? collector, Cancel ctx = default) + public async Task WriteAsync(IFileInfo outputFile, MarkdownFile markdown, IConversionCollector? collector, Cancel ctx = default) { if (outputFile.Directory is { Exists: false }) outputFile.Directory.Create(); @@ -184,8 +184,10 @@ public async Task WriteAsync(IFileInfo outputFile, MarkdownFile markdown, IConve } var document = await markdown.ParseFullAsync(ctx); + var rendered = await RenderLayout(markdown, document, ctx); collector?.Collect(markdown, document, rendered); await writeFileSystem.File.WriteAllTextAsync(path, rendered, ctx); + return document; } } diff --git a/src/tooling/Elastic.Documentation.Tooling/Diagnostics/Console/ErrataFileSourceRepository.cs b/src/tooling/Elastic.Documentation.Tooling/Diagnostics/Console/ErrataFileSourceRepository.cs index 1ceca1093..7628b8da4 100644 --- a/src/tooling/Elastic.Documentation.Tooling/Diagnostics/Console/ErrataFileSourceRepository.cs +++ b/src/tooling/Elastic.Documentation.Tooling/Diagnostics/Console/ErrataFileSourceRepository.cs @@ -18,6 +18,10 @@ public class ErrataFileSourceRepository : ISourceRepository [SuppressMessage("Reliability", "CA2012:Use ValueTasks correctly")] public bool TryGet(string id, [NotNullWhen(true)] out Source? source) { + source = new Source(id, string.Empty); + if (id == string.Empty) + return true; + using var reader = new Utf8StreamReader(id); var text = Encoding.UTF8.GetString(reader.ReadToEndAsync().GetAwaiter().GetResult()); source = new Source(id, text); diff --git a/src/tooling/docs-assembler/Building/AssemblerBuilder.cs b/src/tooling/docs-assembler/Building/AssemblerBuilder.cs index 943107bab..410a88eaf 100644 --- a/src/tooling/docs-assembler/Building/AssemblerBuilder.cs +++ b/src/tooling/docs-assembler/Building/AssemblerBuilder.cs @@ -3,15 +3,24 @@ // See the LICENSE file in the project root for more information using System.Collections.Frozen; +using Documentation.Assembler.Exporters; using Documentation.Assembler.Navigation; using Elastic.Documentation.Legacy; using Elastic.Documentation.Links; using Elastic.Markdown; +using Elastic.Markdown.Exporters; using Elastic.Markdown.Links.CrossLinks; using Microsoft.Extensions.Logging; namespace Documentation.Assembler.Building; +public enum ExportOption +{ + Html = 0, + LLMText = 1, + Elasticsearch = 2 +} + public class AssemblerBuilder( ILoggerFactory logger, AssembleContext context, @@ -25,7 +34,7 @@ public class AssemblerBuilder( private ILegacyUrlMapper? LegacyUrlMapper { get; } = legacyUrlMapper; - public async Task BuildAllAsync(FrozenDictionary assembleSets, Cancel ctx) + public async Task BuildAllAsync(FrozenDictionary assembleSets, IReadOnlySet exportOptions, Cancel ctx) { if (context.OutputDirectory.Exists) context.OutputDirectory.Delete(true); @@ -33,6 +42,22 @@ public async Task BuildAllAsync(FrozenDictionary(); + var esExporter = + Environment.GetEnvironmentVariable("ELASTIC_API_KEY") is { } apiKey && + Environment.GetEnvironmentVariable("ELASTIC_URL") is { } url + ? new ElasticsearchMarkdownExporter(logger, context.Collector, url, apiKey) + : null; + + var markdownExporters = new List(3); + if (exportOptions.Contains(ExportOption.LLMText)) + markdownExporters.Add(new LLMTextExporter()); + if (exportOptions.Contains(ExportOption.Elasticsearch) && esExporter is { }) + markdownExporters.Add(esExporter); + var noopBuild = !exportOptions.Contains(ExportOption.Html); + + var tasks = markdownExporters.Select(async e => await e.StartAsync(ctx)); + await Task.WhenAll(tasks); + foreach (var (_, set) in assembleSets) { var checkout = set.Checkout; @@ -44,7 +69,7 @@ public async Task BuildAllAsync(FrozenDictionary await e.StopAsync(ctx)); + await Task.WhenAll(tasks); } private static void CollectRedirects( @@ -90,14 +116,16 @@ string Resolve(string relativeMarkdownPath) } } - private async Task BuildAsync(AssemblerDocumentationSet set, Cancel ctx) + private async Task BuildAsync(AssemblerDocumentationSet set, bool noop, IMarkdownExporter[]? markdownExporters, Cancel ctx) { var generator = new DocumentationGenerator( set.DocumentationSet, logger, HtmlWriter, pathProvider, legacyUrlMapper: LegacyUrlMapper, - positionalNavigation: navigation + positionalNavigation: navigation, + documentationExporter: noop ? new NoopDocumentationFileExporter() : null, + markdownExporters: markdownExporters ); return await generator.GenerateAll(ctx); } diff --git a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs index 99a9bdbf8..ed0cd3730 100644 --- a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs +++ b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs @@ -21,6 +21,7 @@ using Elastic.Markdown.Exporters; using Elastic.Markdown.IO; using Microsoft.Extensions.Logging; +using YamlDotNet.Core; namespace Documentation.Assembler.Cli; @@ -69,6 +70,7 @@ public async Task CloneAll( /// Treat warnings as errors and fail the build on warnings /// Allow indexing and following of html files /// The environment to build + /// configure exporters explicitly available (html,llmtext,es), defaults to html /// [Command("build-all")] public async Task BuildAll( @@ -76,8 +78,11 @@ public async Task BuildAll( bool? strict = null, bool? allowIndexing = null, string? environment = null, + [ExporterParser] IReadOnlySet? exporters = null, Cancel ctx = default) { + exporters ??= new HashSet([ExportOption.Html]); + AssignOutputLogger(); var githubEnvironmentInput = githubActionsService.GetInput("environment"); environment ??= !string.IsNullOrEmpty(githubEnvironmentInput) ? githubEnvironmentInput : "dev"; @@ -116,7 +121,7 @@ public async Task BuildAll( var historyMapper = new PageLegacyUrlMapper(assembleSources.HistoryMappings); var builder = new AssemblerBuilder(logger, assembleContext, navigation, htmlWriter, pathProvider, historyMapper); - await builder.BuildAllAsync(assembleSources.AssembleSets, ctx); + await builder.BuildAllAsync(assembleSources.AssembleSets, exporters, ctx); var sitemapBuilder = new SitemapBuilder(navigation.NavigationItems, assembleContext.WriteFileSystem, assembleContext.OutputDirectory); sitemapBuilder.Generate(); @@ -163,7 +168,7 @@ await Parallel.ForEachAsync(repositories, outputPath ); var set = new DocumentationSet(context, logger); - var generator = new DocumentationGenerator(set, logger, null, null, new NoopDocumentationFileExporter()); + var generator = new DocumentationGenerator(set, logger, null, null, null, new NoopDocumentationFileExporter()); _ = await generator.GenerateAll(c); IAmazonS3 s3Client = new AmazonS3Client(); @@ -193,3 +198,30 @@ await Parallel.ForEachAsync(repositories, return collector.Errors > 0 ? 1 : 0; } } + +[AttributeUsage(AttributeTargets.Parameter)] +public class ExporterParserAttribute : Attribute, IArgumentParser> +{ + public static bool TryParse(ReadOnlySpan s, out IReadOnlySet result) + { + result = new HashSet([ExportOption.Html]); + var set = new HashSet(); + var ranges = s.Split(','); + foreach (var range in ranges) + { + ExportOption? export = s[range].Trim().ToString().ToLowerInvariant() switch + { + "llm" => ExportOption.LLMText, + "llmtext" => ExportOption.LLMText, + "es" => ExportOption.Elasticsearch, + "elasticsearch" => ExportOption.Elasticsearch, + "html" => ExportOption.Html, + _ => null + }; + if (export.HasValue) + _ = set.Add(export.Value); + } + result = set; + return true; + } +} diff --git a/src/tooling/docs-assembler/Exporters/ElasticsearchMarkdownExporter.cs b/src/tooling/docs-assembler/Exporters/ElasticsearchMarkdownExporter.cs new file mode 100644 index 000000000..8221ea28c --- /dev/null +++ b/src/tooling/docs-assembler/Exporters/ElasticsearchMarkdownExporter.cs @@ -0,0 +1,135 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Documentation.Diagnostics; +using Elastic.Documentation.Search; +using Elastic.Documentation.Serialization; +using Elastic.Ingest.Elasticsearch; +using Elastic.Ingest.Elasticsearch.Semantic; +using Elastic.Markdown.Exporters; +using Elastic.Markdown.IO; +using Elastic.Transport; +using Elastic.Transport.Products.Elasticsearch; +using Microsoft.Extensions.Logging; + +namespace Documentation.Assembler.Exporters; + +public class ElasticsearchMarkdownExporter : IMarkdownExporter, IDisposable +{ + private readonly DiagnosticsCollector _collector; + private readonly SemanticIndexChannel _channel; + private readonly ILogger _logger; + + public ElasticsearchMarkdownExporter(ILoggerFactory logFactory, DiagnosticsCollector collector, string url, string apiKey) + { + _collector = collector; + _logger = logFactory.CreateLogger(); + var configuration = new ElasticsearchConfiguration(new Uri(url), new ApiKey(apiKey)) + { + //Uncomment to see the requests with Fiddler + ProxyAddress = "http://localhost:8866" + }; + var transport = new DistributedTransport(configuration); + //The max num threads per allocated node, from testing its best to limit our max concurrency + //producing to this number as well + var indexNumThreads = 8; + var options = new SemanticIndexChannelOptions(transport) + { + BufferOptions = + { + OutboundBufferMaxSize = 100, + ExportMaxConcurrency = indexNumThreads, + ExportMaxRetries = 3 + }, + SerializerContext = SourceGenerationContext.Default, + IndexFormat = "documentation-{0:yyyy.MM.dd.HHmmss}", + IndexNumThreads = indexNumThreads, + ActiveSearchAlias = "documentation", + ExportExceptionCallback = e => _logger.LogError(e, "Failed to export document"), + ServerRejectionCallback = items => _logger.LogInformation("Server rejection: {Rejection}", items.First().Item2), + GetMapping = (inferenceId, _) => // language=json + $$""" + { + "properties": { + "title": { "type": "text" }, + "body": { + "type": "text" + }, + "abstract": { + "type": "semantic_text", + "inference_id": "{{inferenceId}}" + } + } + } + """ + }; + _channel = new SemanticIndexChannel(options); + } + + public async ValueTask StartAsync(Cancel ctx = default) + { + _logger.LogInformation($"Bootstrapping {nameof(SemanticIndexChannel)} Elasticsearch target for indexing"); + _ = await _channel.BootstrapElasticsearchAsync(BootstrapMethod.Failure, null, ctx); + } + + public async ValueTask StopAsync(Cancel ctx = default) + { + _logger.LogInformation("Waiting to drain all inflight exports to Elasticsearch"); + var drained = await _channel.WaitForDrainAsync(null, ctx); + if (!drained) + _collector.EmitGlobalError("Elasticsearch export: failed to complete indexing in a timely fashion while shutting down"); + + _logger.LogInformation("Refreshing target index {Index}", _channel.IndexName); + var refreshed = await _channel.RefreshAsync(ctx); + if (!refreshed) + _logger.LogError("Refreshing target index {Index} did not complete successfully", _channel.IndexName); + + _logger.LogInformation("Applying aliases to {Index}", _channel.IndexName); + var swapped = await _channel.ApplyAliasesAsync(ctx); + if (!swapped) + _collector.EmitGlobalError($"{nameof(ElasticsearchMarkdownExporter)} failed to apply aliases to index {_channel.IndexName}"); + } + + public void Dispose() + { + _channel.Complete(); + _channel.Dispose(); + GC.SuppressFinalize(this); + } + + private async ValueTask TryWrite(DocumentationDocument document, Cancel ctx = default) + { + if (_channel.TryWrite(document)) + return true; + + if (await _channel.WaitToWriteAsync(ctx)) + return _channel.TryWrite(document); + return false; + } + + public async ValueTask ExportAsync(MarkdownExportContext context, Cancel ctx) + { + var file = context.File; + var document = context.Document; + if (file.FileName.EndsWith(".toml", StringComparison.OrdinalIgnoreCase)) + return true; + + var url = file.Url; + // integrations are too big, we need to sanitize the fieldsets and example docs out of these. + if (url.Contains("/reference/integrations")) + return true; + + var body = context.LLMText ??= MarkdownFile.ToLLMText(document); + var doc = new DocumentationDocument + { + Title = file.Title, + //Body = body, + Abstract = !string.IsNullOrEmpty(body) + ? body[..Math.Min(body.Length, 400)] + : string.Empty, + Url = url + }; + return await TryWrite(doc, ctx); + } +} diff --git a/src/tooling/docs-assembler/Exporters/LLMTextExporter.cs b/src/tooling/docs-assembler/Exporters/LLMTextExporter.cs new file mode 100644 index 000000000..f91011d98 --- /dev/null +++ b/src/tooling/docs-assembler/Exporters/LLMTextExporter.cs @@ -0,0 +1,21 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Markdown.Exporters; +using Elastic.Markdown.IO; + +namespace Documentation.Assembler.Exporters; + +public class LLMTextExporter : IMarkdownExporter +{ + public ValueTask StartAsync(CancellationToken ctx = default) => ValueTask.CompletedTask; + + public ValueTask StopAsync(CancellationToken ctx = default) => ValueTask.CompletedTask; + + public ValueTask ExportAsync(MarkdownExportContext context, CancellationToken ctx) + { + var llmText = context.LLMText ??= MarkdownFile.ToLLMText(context.Document); + return ValueTask.FromResult(true); + } +} diff --git a/src/tooling/docs-assembler/docs-assembler.csproj b/src/tooling/docs-assembler/docs-assembler.csproj index cfed86523..91133e4d2 100644 --- a/src/tooling/docs-assembler/docs-assembler.csproj +++ b/src/tooling/docs-assembler/docs-assembler.csproj @@ -10,6 +10,7 @@ true true true + false true true @@ -19,6 +20,7 @@ + diff --git a/src/tooling/docs-builder/Cli/Commands.cs b/src/tooling/docs-builder/Cli/Commands.cs index 5dacaec9d..f2fcbad1a 100644 --- a/src/tooling/docs-builder/Cli/Commands.cs +++ b/src/tooling/docs-builder/Cli/Commands.cs @@ -152,7 +152,7 @@ public async Task Generate( metadataOnly ??= metaValue; var exporter = metadataOnly.HasValue && metadataOnly.Value ? new NoopDocumentationFileExporter() : null; - var generator = new DocumentationGenerator(set, logger, null, null, exporter); + var generator = new DocumentationGenerator(set, logger, null, null, null, exporter); _ = await generator.GenerateAll(ctx); if (runningOnCi) diff --git a/tests/authoring/Framework/Setup.fs b/tests/authoring/Framework/Setup.fs index 66df0e955..95390500c 100644 --- a/tests/authoring/Framework/Setup.fs +++ b/tests/authoring/Framework/Setup.fs @@ -112,7 +112,7 @@ type Setup = let conversionCollector = TestConversionCollector() let linkResolver = TestCrossLinkResolver(context.Configuration) let set = DocumentationSet(context, logger, linkResolver); - let generator = DocumentationGenerator(set, logger, null, null, null, conversionCollector) + let generator = DocumentationGenerator(set, logger, null, null, null, null, conversionCollector) let context = { Collector = collector