diff --git a/src/Elastic.Markdown/BuildContext.cs b/src/Elastic.Markdown/BuildContext.cs index 98eca2835..df225fe40 100644 --- a/src/Elastic.Markdown/BuildContext.cs +++ b/src/Elastic.Markdown/BuildContext.cs @@ -29,17 +29,16 @@ public record BuildContext public bool Force { get; init; } + // This property is used to determine if the site should be indexed by search engines + public bool AllowIndexing { get; init; } + + private readonly string? _urlPathPrefix; public string? UrlPathPrefix { get => string.IsNullOrWhiteSpace(_urlPathPrefix) ? "" : $"/{_urlPathPrefix.Trim('/')}"; init => _urlPathPrefix = value; } - // This property is used to determine if the site should be indexed by search engines - public bool AllowIndexing { get; init; } - - private readonly string? _urlPathPrefix; - public BuildContext(IFileSystem fileSystem) : this(new DiagnosticsCollector([]), fileSystem, fileSystem, null, null) { } diff --git a/src/docs-assembler/AssembleContext.cs b/src/docs-assembler/AssembleContext.cs index 0c4652d1b..d03bf81d0 100644 --- a/src/docs-assembler/AssembleContext.cs +++ b/src/docs-assembler/AssembleContext.cs @@ -21,9 +21,16 @@ public class AssembleContext public IFileInfo ConfigurationPath { get; } + public IDirectoryInfo CheckoutDirectory { get; set; } + public IDirectoryInfo OutputDirectory { get; set; } - public AssembleContext(DiagnosticsCollector collector, IFileSystem readFileSystem, IFileSystem writeFileSystem, string? output) + public bool Force { get; init; } + + // This property is used to determine if the site should be indexed by search engines + public bool AllowIndexing { get; init; } + + public AssembleContext(DiagnosticsCollector collector, IFileSystem readFileSystem, IFileSystem writeFileSystem, string? checkoutDirectory, string? output) { Collector = collector; ReadFileSystem = readFileSystem; @@ -37,6 +44,7 @@ public AssembleContext(DiagnosticsCollector collector, IFileSystem readFileSyste ConfigurationPath = ReadFileSystem.FileInfo.New(configPath); Configuration = AssemblyConfiguration.Deserialize(ReadFileSystem.File.ReadAllText(ConfigurationPath.FullName)); + CheckoutDirectory = ReadFileSystem.DirectoryInfo.New(checkoutDirectory ?? ".artifacts/checkouts"); OutputDirectory = ReadFileSystem.DirectoryInfo.New(output ?? ".artifacts/assembly"); } @@ -55,7 +63,5 @@ private void ExtractAssemblerConfiguration(string configPath) outputFile.Directory.Create(); using var stream = outputFile.OpenWrite(); resourceStream.CopyTo(stream); - - } } diff --git a/src/docs-assembler/Building/AssemblerBuilder.cs b/src/docs-assembler/Building/AssemblerBuilder.cs new file mode 100644 index 000000000..eb20f1a43 --- /dev/null +++ b/src/docs-assembler/Building/AssemblerBuilder.cs @@ -0,0 +1,53 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Documentation.Assembler.Sourcing; +using Elastic.Markdown; +using Elastic.Markdown.IO; +using Microsoft.Extensions.Logging; + +namespace Documentation.Assembler.Building; + +public class AssemblerBuilder(ILoggerFactory logger, AssembleContext context) +{ + private readonly ILogger _logger = logger.CreateLogger(); + + public async Task BuildAllAsync(IReadOnlyCollection checkouts, Cancel ctx) + { + foreach (var checkout in checkouts) + { + try + { + await BuildAsync(checkout, ctx); + } + catch (Exception e) when (e.Message.Contains("Can not locate docset.yml file in")) + { + // TODO: we should only ignore this temporarily while migration is ongoing + _logger.LogWarning("Skipping {Checkout} as its not yet been migrated to V3", checkout.Directory.FullName); + } + catch (Exception e) + { + Console.WriteLine(e); + throw; + } + } + } + + private async Task BuildAsync(Checkout checkout, Cancel ctx) + { + var path = checkout.Directory.FullName; + var pathPrefix = checkout.Repository.PathPrefix; + var output = pathPrefix != null ? Path.Combine(context.OutputDirectory.FullName, pathPrefix) : context.OutputDirectory.FullName; + + var buildContext = new BuildContext(context.Collector, context.ReadFileSystem, context.WriteFileSystem, path, output) + { + UrlPathPrefix = pathPrefix, + Force = true, + AllowIndexing = true + }; + var set = new DocumentationSet(buildContext, logger); + var generator = new DocumentationGenerator(set, logger); + await generator.GenerateAll(ctx); + } +} diff --git a/src/docs-assembler/Cli/RepositoryCommands.cs b/src/docs-assembler/Cli/RepositoryCommands.cs index 1eabcb029..6fec9d5b1 100644 --- a/src/docs-assembler/Cli/RepositoryCommands.cs +++ b/src/docs-assembler/Cli/RepositoryCommands.cs @@ -6,6 +6,7 @@ using System.IO.Abstractions; using Actions.Core.Services; using ConsoleAppFramework; +using Documentation.Assembler.Building; using Documentation.Assembler.Sourcing; using Elastic.Documentation.Tooling.Diagnostics.Console; using Microsoft.Extensions.Logging; @@ -25,18 +26,56 @@ private void AssignOutputLogger() // would love to use libgit2 so there is no git dependency but // libgit2 is magnitudes slower to clone repositories https://github.com/libgit2/libgit2/issues/4674 /// Clones all repositories + /// Treat warnings as errors and fail the build on warnings /// [Command("clone-all")] - public async Task CloneAll(Cancel ctx = default) + public async Task CloneAll(bool? strict = null, Cancel ctx = default) { AssignOutputLogger(); await using var collector = new ConsoleDiagnosticsCollector(logger, githubActionsService); - var assembleContext = new AssembleContext(collector, new FileSystem(), new FileSystem(), null); - var cloner = new RepositoryCloner(logger, assembleContext); - await cloner.CloneAll(ctx); + var assembleContext = new AssembleContext(collector, new FileSystem(), new FileSystem(), null, null); + var cloner = new RepositoryCheckoutProvider(logger, assembleContext); + _ = await cloner.AcquireAllLatest(ctx); + + if (strict ?? false) + return collector.Errors + collector.Warnings; + return collector.Errors; } + /// Builds all repositories + /// Force a full rebuild of the destination folder + /// Treat warnings as errors and fail the build on warnings + /// Allow indexing and following of html files + /// + [Command("build-all")] + public async Task BuildAll( + bool? force = null, + bool? strict = null, + bool? allowIndexing = null, + Cancel ctx = default) + { + AssignOutputLogger(); + await using var collector = new ConsoleDiagnosticsCollector(logger, githubActionsService); + _ = collector.StartAsync(ctx); + + var assembleContext = new AssembleContext(collector, new FileSystem(), new FileSystem(), null, null) + { + Force = force ?? false, + AllowIndexing = allowIndexing ?? false, + }; + var cloner = new RepositoryCheckoutProvider(logger, assembleContext); + var checkouts = cloner.GetAll().ToArray(); + if (checkouts.Length == 0) + throw new Exception("No checkouts found"); + + var builder = new AssemblerBuilder(logger, assembleContext); + await builder.BuildAllAsync(checkouts, ctx); + + if (strict ?? false) + return collector.Errors + collector.Warnings; + return collector.Errors; + } } diff --git a/src/docs-assembler/Configuration/AssemblyConfiguration.cs b/src/docs-assembler/Configuration/AssemblyConfiguration.cs index 42ad4b4fd..11a037eca 100644 --- a/src/docs-assembler/Configuration/AssemblyConfiguration.cs +++ b/src/docs-assembler/Configuration/AssemblyConfiguration.cs @@ -25,6 +25,12 @@ public static AssemblyConfiguration Deserialize(string yaml) try { var config = deserializer.Deserialize(input); + foreach (var (name, r) in config.ReferenceRepositories) + { + var repository = RepositoryDefaults(r, name); + config.ReferenceRepositories[name] = repository; + } + config.Narrative = RepositoryDefaults(config.Narrative, NarrativeRepository.RepositoryName); return config; } catch (Exception e) @@ -35,27 +41,34 @@ public static AssemblyConfiguration Deserialize(string yaml) } } + private static TRepository RepositoryDefaults(TRepository r, string name) + where TRepository : Repository, new() + { + // ReSharper disable NullCoalescingConditionIsAlwaysNotNullAccordingToAPIContract + var repository = r ?? new TRepository(); + // ReSharper restore NullCoalescingConditionIsAlwaysNotNullAccordingToAPIContract + repository.Name = name; + if (string.IsNullOrEmpty(repository.CurrentBranch)) + repository.CurrentBranch = "main"; + if (string.IsNullOrEmpty(repository.Origin)) + { + if (!string.IsNullOrEmpty(Environment.GetEnvironmentVariable("GITHUB_ACTIONS"))) + { + var token = Environment.GetEnvironmentVariable("GITHUB_TOKEN"); + repository.Origin = !string.IsNullOrEmpty(token) + ? $"https://oath2:{token}@github.com/elastic/{name}.git" + : $"https://github.com/elastic/{name}.git"; + } + else + repository.Origin = $"git@github.com:elastic/{name}.git"; + } + + return repository; + } + [YamlMember(Alias = "narrative")] public NarrativeRepository Narrative { get; set; } = new(); [YamlMember(Alias = "references")] - public Dictionary ReferenceRepositories { get; set; } = []; -} - -public record NarrativeRepository : Repository -{ - public static string Name { get; } = "docs-content"; -} - -public record Repository -{ - [YamlMember(Alias = "repo")] - public string? Origin { get; set; } - - [YamlMember(Alias = "current")] - public string? CurrentBranch { get; set; } - - [YamlMember(Alias = "checkout_strategy")] - public string CheckoutStrategy { get; set; } = "partial"; - + public Dictionary ReferenceRepositories { get; set; } = []; } diff --git a/src/docs-assembler/Configuration/Repository.cs b/src/docs-assembler/Configuration/Repository.cs new file mode 100644 index 000000000..aaedae285 --- /dev/null +++ b/src/docs-assembler/Configuration/Repository.cs @@ -0,0 +1,37 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using YamlDotNet.Serialization; + +namespace Documentation.Assembler.Configuration; + +public record NarrativeRepository : Repository +{ + public static string RepositoryName { get; } = "docs-content"; + public override string Name { get; set; } = RepositoryName; + public override string? PathPrefix { get; set; } +} + +public record Repository +{ + [YamlIgnore] + public virtual string Name { get; set; } = string.Empty; + + [YamlMember(Alias = "repo")] + public string Origin { get; set; } = string.Empty; + + [YamlMember(Alias = "current")] + public string CurrentBranch { get; set; } = "main"; + + [YamlMember(Alias = "checkout_strategy")] + public string CheckoutStrategy { get; set; } = "partial"; + + private string? _pathPrefix; + [YamlMember(Alias = "path_prefix")] + public virtual string? PathPrefix + { + get => _pathPrefix ?? $"reference/{Name}"; + set => _pathPrefix = value; + } +} diff --git a/src/docs-assembler/Sourcing/Checkout.cs b/src/docs-assembler/Sourcing/Checkout.cs new file mode 100644 index 000000000..655b25302 --- /dev/null +++ b/src/docs-assembler/Sourcing/Checkout.cs @@ -0,0 +1,15 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.IO.Abstractions; +using Documentation.Assembler.Configuration; + +namespace Documentation.Assembler.Sourcing; + +public record Checkout +{ + public required Repository Repository { get; init; } + public required string HeadReference { get; init; } + public required IDirectoryInfo Directory { get; init; } +} diff --git a/src/docs-assembler/Sourcing/RepositoryCloner.cs b/src/docs-assembler/Sourcing/RepositoryCloner.cs deleted file mode 100644 index a22948471..000000000 --- a/src/docs-assembler/Sourcing/RepositoryCloner.cs +++ /dev/null @@ -1,113 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using System.Collections.Concurrent; -using System.Diagnostics; -using Documentation.Assembler.Configuration; -using Elastic.Markdown.Diagnostics; -using Elastic.Markdown.IO; -using Microsoft.Extensions.Logging; -using ProcNet; -using ProcNet.Std; - -namespace Documentation.Assembler.Sourcing; - -public class RepositoryCloner(ILoggerFactory logger, AssembleContext context) -{ - private readonly ILogger _logger = logger.CreateLogger(); - - private AssemblyConfiguration Configuration => context.Configuration; - - public async Task CloneAll(Cancel ctx = default) - { - var dict = new ConcurrentDictionary(); - - _logger.LogInformation("Cloning narrative content: {Repository}", NarrativeRepository.Name); - CloneRepository(Configuration.Narrative, NarrativeRepository.Name, dict); - - _logger.LogInformation("Cloning {ReferenceRepositoryCount} repositories", Configuration.ReferenceRepositories.Count); - await Parallel.ForEachAsync(Configuration.ReferenceRepositories, - new ParallelOptions - { - CancellationToken = ctx, - MaxDegreeOfParallelism = Environment.ProcessorCount - }, async (kv, c) => - { - await Task.Run(() => - { - var name = kv.Key.Trim(); - CloneRepository(kv.Value, name, dict); - }, c); - }).ConfigureAwait(false); - - foreach (var kv in dict.OrderBy(kv => kv.Value.Elapsed)) - Console.WriteLine($"-> {kv.Key}\ttook: {kv.Value.Elapsed}"); - } - - private void CloneRepository(Repository? repository, string name, ConcurrentDictionary dict) - { - repository ??= new Repository(); - repository.CurrentBranch ??= "main"; - repository.Origin ??= !string.IsNullOrEmpty(Environment.GetEnvironmentVariable("GITHUB_ACTIONS")) - ? $"https://github.com/elastic/{name}.git" - : $"git@github.com:elastic/{name}.git"; - - var checkoutFolder = Path.Combine(context.OutputDirectory.FullName, name); - var relativePath = Path.GetRelativePath(Paths.Root.FullName, checkoutFolder); - var sw = Stopwatch.StartNew(); - _ = dict.AddOrUpdate(name, sw, (_, _) => sw); - if (context.ReadFileSystem.Directory.Exists(checkoutFolder)) - { - _logger.LogInformation("Pull: {Name}\t{Repository}\t{RelativePath}", name, repository, relativePath); - // --allow-unrelated-histories due to shallow clones not finding a common ancestor - ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff"); - } - else - { - _logger.LogInformation("Checkout: {Name}\t{Repository}\t{RelativePath}", name, repository, relativePath); - if (repository.CheckoutStrategy == "full") - { - Exec("git", "clone", repository.Origin, checkoutFolder, - "--depth", "1", "--single-branch", - "--branch", repository.CurrentBranch - ); - } - else if (repository.CheckoutStrategy == "partial") - { - Exec( - "git", "clone", "--filter=blob:none", "--no-checkout", repository.Origin, checkoutFolder - ); - - ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "--cone"); - ExecIn(checkoutFolder, "git", "checkout", repository.CurrentBranch); - ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "docs"); - } - } - - sw.Stop(); - - void Exec(string binary, params string[] args) => ExecIn(null, binary, args); - - void ExecIn(string? workingDirectory, string binary, params string[] args) - { - var arguments = new StartArguments(binary, args) - { - WorkingDirectory = workingDirectory - }; - var result = Proc.StartRedirected(arguments, new ConsoleLineHandler(_logger, name)); - if (result.ExitCode != 0) - context.Collector.EmitError("", $"Exit code: {result.ExitCode} while executing {binary} {string.Join(" ", arguments)}"); - } - } -} - -public class ConsoleLineHandler(ILogger logger, string prefix) : IConsoleLineHandler -{ - public void Handle(LineOut lineOut) => lineOut.CharsOrString( - r => Console.Write(prefix + ": " + r), - l => logger.LogInformation("{RepositoryName}: {Message}", prefix, l) - ); - - public void Handle(Exception e) { } -} diff --git a/src/docs-assembler/Sourcing/RepositorySourcesFetcher.cs b/src/docs-assembler/Sourcing/RepositorySourcesFetcher.cs new file mode 100644 index 000000000..7c3c975aa --- /dev/null +++ b/src/docs-assembler/Sourcing/RepositorySourcesFetcher.cs @@ -0,0 +1,176 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Collections.Concurrent; +using System.Diagnostics; +using System.IO.Abstractions; +using Documentation.Assembler.Configuration; +using Elastic.Markdown.IO; +using Microsoft.Extensions.Logging; +using ProcNet; +using ProcNet.Std; + +namespace Documentation.Assembler.Sourcing; + +public class RepositoryCheckoutProvider(ILoggerFactory logger, AssembleContext context) +{ + private readonly ILogger _logger = logger.CreateLogger(); + + private AssemblyConfiguration Configuration => context.Configuration; + + public IReadOnlyCollection GetAll() + { + var fs = context.ReadFileSystem; + var repositories = Configuration.ReferenceRepositories.Values.Concat([Configuration.Narrative]); + var checkouts = new List(); + foreach (var repo in repositories) + { + var checkoutFolder = fs.DirectoryInfo.New(Path.Combine(context.CheckoutDirectory.FullName, repo.Name)); + var head = Capture(checkoutFolder, "git", "rev-parse", "HEAD"); + var checkout = new Checkout + { + Repository = repo, + Directory = checkoutFolder, + HeadReference = head + }; + checkouts.Add(checkout); + } + return checkouts; + } + + public async Task> AcquireAllLatest(Cancel ctx = default) + { + var dict = new ConcurrentDictionary(); + var checkouts = new ConcurrentBag(); + + if (context.OutputDirectory.Exists) + { + _logger.LogInformation("Cleaning output directory: {OutputDirectory}", context.OutputDirectory.FullName); + context.OutputDirectory.Delete(true); + } + + + _logger.LogInformation("Cloning narrative content: {Repository}", NarrativeRepository.RepositoryName); + var checkout = CloneOrUpdateRepository(Configuration.Narrative, NarrativeRepository.RepositoryName, dict); + checkouts.Add(checkout); + + _logger.LogInformation("Cloning {ReferenceRepositoryCount} repositories", Configuration.ReferenceRepositories.Count); + await Parallel.ForEachAsync(Configuration.ReferenceRepositories, + new ParallelOptions + { + CancellationToken = ctx, + MaxDegreeOfParallelism = Environment.ProcessorCount + }, async (kv, c) => + { + await Task.Run(() => + { + var name = kv.Key.Trim(); + var clone = CloneOrUpdateRepository(kv.Value, name, dict); + checkouts.Add(clone); + }, c); + }).ConfigureAwait(false); + + foreach (var kv in dict.OrderBy(kv => kv.Value.Elapsed)) + _logger.LogInformation("-> {Repository}\ttook: {Elapsed}", kv.Key, kv.Value.Elapsed); + + return checkouts.ToList().AsReadOnly(); + } + + private Checkout CloneOrUpdateRepository(Repository repository, string name, ConcurrentDictionary dict) + { + var fs = context.ReadFileSystem; + var checkoutFolder = fs.DirectoryInfo.New(Path.Combine(context.CheckoutDirectory.FullName, name)); + var relativePath = Path.GetRelativePath(Paths.Root.FullName, checkoutFolder.FullName); + var sw = Stopwatch.StartNew(); + _ = dict.AddOrUpdate(name, sw, (_, _) => sw); + var head = string.Empty; + if (checkoutFolder.Exists) + { + _logger.LogInformation("Pull: {Name}\t{Repository}\t{RelativePath}", name, repository, relativePath); + // --allow-unrelated-histories due to shallow clones not finding a common ancestor + ExecIn(name, checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff"); + head = Capture(checkoutFolder, "git", "rev-parse", "HEAD"); + } + else + { + _logger.LogInformation("Checkout: {Name}\t{Repository}\t{RelativePath}", name, repository, relativePath); + if (repository.CheckoutStrategy == "full") + { + Exec("git", "clone", repository.Origin, checkoutFolder.FullName, + "--depth", "1", "--single-branch", + "--branch", repository.CurrentBranch + ); + } + else if (repository.CheckoutStrategy == "partial") + { + Exec( + "git", "clone", "--filter=blob:none", "--no-checkout", repository.Origin, checkoutFolder.FullName + ); + + ExecIn(name, checkoutFolder, "git", "sparse-checkout", "set", "--cone"); + ExecIn(name, checkoutFolder, "git", "checkout", repository.CurrentBranch); + ExecIn(name, checkoutFolder, "git", "sparse-checkout", "set", "docs"); + head = Capture(checkoutFolder, "git", "rev-parse", "HEAD"); + } + } + + sw.Stop(); + + return new Checkout + { + Repository = repository, + Directory = checkoutFolder, + HeadReference = head + }; + } + + private void Exec(string name, string binary, params string[] args) => ExecIn(name, null, binary, args); + + private void ExecIn(string name, IDirectoryInfo? workingDirectory, string binary, params string[] args) + { + var arguments = new StartArguments(binary, args) + { + WorkingDirectory = workingDirectory?.FullName + }; + var result = Proc.StartRedirected(arguments, new ConsoleLineHandler(_logger, name)); + if (result.ExitCode != 0) + context.Collector.EmitError("", $"Exit code: {result.ExitCode} while executing {binary} {string.Join(" ", args)} in {workingDirectory}"); + } + + private string Capture(IDirectoryInfo? workingDirectory, string binary, params string[] args) + { + var arguments = new StartArguments(binary, args) + { + WorkingDirectory = workingDirectory?.FullName, + WaitForStreamReadersTimeout = TimeSpan.FromSeconds(3), + Timeout = TimeSpan.FromSeconds(3), + WaitForExit = TimeSpan.FromSeconds(3), + ConsoleOutWriter = NoopConsoleWriter.Instance + }; + var result = Proc.Start(arguments); + if (result.ExitCode != 0) + context.Collector.EmitError("", $"Exit code: {result.ExitCode} while executing {binary} {string.Join(" ", args)} in {workingDirectory}"); + var line = result.ConsoleOut.FirstOrDefault()?.Line ?? throw new Exception($"No output captured for {binary}: {workingDirectory}"); + return line; + } +} + +public class ConsoleLineHandler(ILogger logger, string prefix) : IConsoleLineHandler +{ + public void Handle(LineOut lineOut) => lineOut.CharsOrString( + r => Console.Write(prefix + ": " + r), + l => logger.LogInformation("{RepositoryName}: {Message}", prefix, l) + ); + + public void Handle(Exception e) { } +} + +public class NoopConsoleWriter : IConsoleOutWriter +{ + public static readonly NoopConsoleWriter Instance = new(); + + public void Write(Exception e) { } + + public void Write(ConsoleOut consoleOut) { } +} diff --git a/src/docs-builder/Cli/Commands.cs b/src/docs-builder/Cli/Commands.cs index f0fe63da8..47bcb4f94 100644 --- a/src/docs-builder/Cli/Commands.cs +++ b/src/docs-builder/Cli/Commands.cs @@ -84,7 +84,7 @@ public async Task Generate( { UrlPathPrefix = pathPrefix, Force = force ?? false, - AllowIndexing = allowIndexing != null + AllowIndexing = allowIndexing ?? false }; } // On CI, we are running on merge commit which may have changes against an older