diff --git a/src/Elastic.Documentation.Configuration/Assembler/Repository.cs b/src/Elastic.Documentation.Configuration/Assembler/Repository.cs index 93d0d5667..44f050af4 100644 --- a/src/Elastic.Documentation.Configuration/Assembler/Repository.cs +++ b/src/Elastic.Documentation.Configuration/Assembler/Repository.cs @@ -2,6 +2,7 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information +using System.Runtime.Serialization; using YamlDotNet.Serialization; namespace Elastic.Documentation.Configuration.Assembler; @@ -12,6 +13,14 @@ public record NarrativeRepository : Repository public override string Name { get; set; } = RepositoryName; } +public enum CheckoutStrategy +{ + [EnumMember(Value = "partial")] + Partial, + [EnumMember(Value = "full")] + Full +} + public record Repository { [YamlIgnore] @@ -27,7 +36,7 @@ public record Repository public string GitReferenceNext { get; set; } = "main"; [YamlMember(Alias = "checkout_strategy")] - public string CheckoutStrategy { get; set; } = "partial"; + public CheckoutStrategy CheckoutStrategy { get; set; } = CheckoutStrategy.Partial; [YamlMember(Alias = "skip")] public bool Skip { get; set; } diff --git a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs index 99a9bdbf8..f23d2f71b 100644 --- a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs +++ b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs @@ -3,8 +3,10 @@ // See the LICENSE file in the project root for more information using System.Collections.Concurrent; +using System.ComponentModel; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Globalization; using System.IO.Abstractions; using System.Net.Mime; using Actions.Core.Services; @@ -39,11 +41,13 @@ private void AssignOutputLogger() /// Clones all repositories /// Treat warnings as errors and fail the build on warnings /// The environment to build + /// If true fetch the latest commit of the branch instead of the link registry entry ref /// [Command("clone-all")] public async Task CloneAll( bool? strict = null, string? environment = null, + bool? fetchLatest = null, Cancel ctx = default ) { @@ -55,7 +59,8 @@ public async Task CloneAll( var assembleContext = new AssembleContext(environment, collector, new FileSystem(), new FileSystem(), null, null); var cloner = new AssemblerRepositorySourcer(logger, assembleContext); - _ = await cloner.AcquireAllLatest(ctx); + + _ = await cloner.CloneAll(fetchLatest ?? false, ctx); await collector.StopAsync(ctx); @@ -138,7 +143,6 @@ public async Task UpdateLinkIndexAll(ContentSource contentSource, Cancel ct // It's only used to get the list of repositories. var assembleContext = new AssembleContext("prod", collector, new FileSystem(), new FileSystem(), null, null); var cloner = new RepositorySourcer(logger, assembleContext.CheckoutDirectory, new FileSystem(), collector); - var dict = new ConcurrentDictionary(); var repositories = new Dictionary(assembleContext.Configuration.ReferenceRepositories) { { NarrativeRepository.RepositoryName, assembleContext.Configuration.Narrative } @@ -152,8 +156,7 @@ await Parallel.ForEachAsync(repositories, { try { - var name = kv.Key.Trim(); - var checkout = cloner.CloneOrUpdateRepository(kv.Value, name, kv.Value.GetBranch(contentSource), dict); + var checkout = cloner.CloneRef(kv.Value, kv.Value.GetBranch(contentSource), true); var outputPath = Directory.CreateTempSubdirectory(checkout.Repository.Name).FullName; var context = new BuildContext( collector, diff --git a/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs b/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs index 388306865..0ea5adc66 100644 --- a/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs +++ b/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs @@ -8,6 +8,7 @@ using System.IO.Abstractions; using Elastic.Documentation.Configuration.Assembler; using Elastic.Documentation.Diagnostics; +using Elastic.Documentation.LinkIndex; using Elastic.Markdown.IO; using Microsoft.Extensions.Logging; using ProcNet; @@ -46,129 +47,169 @@ public IReadOnlyCollection GetAll() return checkouts; } - public async Task> AcquireAllLatest(Cancel ctx = default) + public async Task> CloneAll(bool fetchLatest, Cancel ctx = default) { - _logger.LogInformation( - "Cloning all repositories for environment {EnvironmentName} using '{ContentSourceStrategy}' content sourcing strategy", + _logger.LogInformation("Cloning all repositories for environment {EnvironmentName} using '{ContentSourceStrategy}' content sourcing strategy", PublishEnvironment.Name, PublishEnvironment.ContentSource.ToStringFast(true) ); + var checkouts = new ConcurrentBag(); + + ILinkIndexReader linkIndexReader = Aws3LinkIndexReader.CreateAnonymous(); + var linkRegistry = await linkIndexReader.GetRegistry(ctx); var repositories = new Dictionary(Configuration.ReferenceRepositories) { { NarrativeRepository.RepositoryName, Configuration.Narrative } }; - return await RepositorySourcer.AcquireAllLatest(repositories, PublishEnvironment.ContentSource, ctx); - } -} - -public class RepositorySourcer(ILoggerFactory logger, IDirectoryInfo checkoutDirectory, IFileSystem readFileSystem, DiagnosticsCollector collector) -{ - private readonly ILogger _logger = logger.CreateLogger(); - public async Task> AcquireAllLatest(Dictionary repositories, ContentSource source, Cancel ctx = default) - { - var dict = new ConcurrentDictionary(); - var checkouts = new ConcurrentBag(); await Parallel.ForEachAsync(repositories, new ParallelOptions { CancellationToken = ctx, MaxDegreeOfParallelism = Environment.ProcessorCount - }, async (kv, c) => + }, async (repo, c) => { await Task.Run(() => { - var name = kv.Key.Trim(); - var repo = kv.Value; - var clone = CloneOrUpdateRepository(kv.Value, name, repo.GetBranch(source), dict); - checkouts.Add(clone); + if (!linkRegistry.Repositories.TryGetValue(repo.Key, out var entry)) + { + context.Collector.EmitError("", $"'{repo.Key}' does not exist in link index"); + return; + } + var branch = repo.Value.GetBranch(PublishEnvironment.ContentSource); + var gitRef = branch; + if (!fetchLatest) + { + if (!entry.TryGetValue(branch, out var entryInfo)) + { + context.Collector.EmitError("", $"'{repo.Key}' does not have a '{branch}' entry in link index"); + return; + } + gitRef = entryInfo.GitReference; + } + checkouts.Add(RepositorySourcer.CloneRef(repo.Value, gitRef, fetchLatest)); }, c); }).ConfigureAwait(false); - - return checkouts.ToList().AsReadOnly(); + return checkouts; } +} - public Checkout CloneOrUpdateRepository(Repository repository, string name, string branch, ConcurrentDictionary dict) - { - var fs = readFileSystem; - var checkoutFolder = fs.DirectoryInfo.New(Path.Combine(checkoutDirectory.FullName, name)); - var relativePath = Path.GetRelativePath(Paths.WorkingDirectoryRoot.FullName, checkoutFolder.FullName); - var sw = Stopwatch.StartNew(); - _ = dict.AddOrUpdate($"{name} ({branch})", sw, (_, _) => sw); +public class RepositorySourcer(ILoggerFactory logger, IDirectoryInfo checkoutDirectory, IFileSystem readFileSystem, DiagnosticsCollector collector) +{ + private readonly ILogger _logger = logger.CreateLogger(); - string? head; - if (checkoutFolder.Exists) + // + // Clones the repository to the checkout directory and checks out the specified git reference. + // + // The repository to clone. + // The git reference to check out. Branch, commit or tag + public Checkout CloneRef(Repository repository, string gitRef, bool pull = false, int attempt = 1) + { + var checkoutFolder = readFileSystem.DirectoryInfo.New(Path.Combine(checkoutDirectory.FullName, repository.Name)); + if (attempt > 3) { - if (!TryUpdateSource(name, branch, relativePath, checkoutFolder, out head)) - head = CheckoutFromScratch(repository, name, branch, relativePath, checkoutFolder); + collector.EmitError("", $"Failed to clone repository {repository.Name}@{gitRef} after 3 attempts"); + return new Checkout + { + Directory = checkoutFolder, + HeadReference = "", + Repository = repository, + }; } - else - head = CheckoutFromScratch(repository, name, branch, relativePath, checkoutFolder); - - sw.Stop(); - - return new Checkout + _logger.LogInformation("{RepositoryName}: Cloning repository {RepositoryName}@{Commit} to {CheckoutFolder}", repository.Name, repository.Name, gitRef, + checkoutFolder.FullName); + if (!checkoutFolder.Exists) { - Repository = repository, - Directory = checkoutFolder, - HeadReference = head - }; - } - - private bool TryUpdateSource(string name, string branch, string relativePath, IDirectoryInfo checkoutFolder, [NotNullWhen(true)] out string? head) - { - head = null; - try + checkoutFolder.Create(); + checkoutFolder.Refresh(); + } + var isGitInitialized = GitInit(repository, checkoutFolder); + string? head = null; + if (isGitInitialized) { - _logger.LogInformation("Pull: {Name}\t{Branch}\t{RelativePath}", name, branch, relativePath); - // --allow-unrelated-histories due to shallow clones not finding a common ancestor - ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff"); + try + { + head = Capture(checkoutFolder, "git", "rev-parse", "HEAD"); + } + catch (Exception e) + { + _logger.LogError(e, "{RepositoryName}: Failed to acquire current commit, falling back to recreating from scratch", repository.Name); + checkoutFolder.Delete(true); + checkoutFolder.Refresh(); + return CloneRef(repository, gitRef, pull, attempt + 1); + } } - catch (Exception e) + // Repository already checked out the same commit + if (head != null && head == gitRef) + // nothing to do, already at the right commit + _logger.LogInformation("{RepositoryName}: HEAD already at {GitRef}", repository.Name, gitRef); + else { - _logger.LogError(e, "Failed to update {Name} from {RelativePath}, falling back to recreating from scratch", name, relativePath); - if (checkoutFolder.Exists) + FetchAndCheckout(repository, gitRef, checkoutFolder); + if (!pull) + { + return new Checkout + { + Directory = checkoutFolder, + HeadReference = gitRef, + Repository = repository, + }; + } + try + { + ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff", "origin", gitRef); + } + catch (Exception e) { + _logger.LogError(e, "{RepositoryName}: Failed to update {GitRef} from {RelativePath}, falling back to recreating from scratch", + repository.Name, gitRef, checkoutFolder.FullName); checkoutFolder.Delete(true); checkoutFolder.Refresh(); + return CloneRef(repository, gitRef, pull, attempt + 1); } - return false; } - head = Capture(checkoutFolder, "git", "rev-parse", "HEAD"); + return new Checkout + { + Directory = checkoutFolder, + HeadReference = gitRef, + Repository = repository, + }; + } - return true; + /// + /// Initializes the git repository if it is not already initialized. + /// Returns true if the repository was already initialized. + /// + private bool GitInit(Repository repository, IDirectoryInfo checkoutFolder) + { + var isGitAlreadyInitialized = Directory.Exists(Path.Combine(checkoutFolder.FullName, ".git")); + if (isGitAlreadyInitialized) + return true; + ExecIn(checkoutFolder, "git", "init"); + ExecIn(checkoutFolder, "git", "remote", "add", "origin", repository.Origin); + return false; } - private string CheckoutFromScratch(Repository repository, string name, string branch, string relativePath, IDirectoryInfo checkoutFolder) + private void FetchAndCheckout(Repository repository, string gitRef, IDirectoryInfo checkoutFolder) { - _logger.LogInformation("Checkout: {Name}\t{Branch}\t{RelativePath}", name, branch, relativePath); + ExecIn(checkoutFolder, "git", "fetch", "--no-tags", "--prune", "--no-recurse-submodules", "--depth", "1", "origin", gitRef); switch (repository.CheckoutStrategy) { - case "full": - Exec("git", "clone", repository.Origin, checkoutFolder.FullName, - "--depth", "1", "--single-branch", - "--branch", branch - ); + case CheckoutStrategy.Full: + ExecIn(checkoutFolder, "git", "sparse-checkout", "disable"); break; - case "partial": - Exec( - "git", "clone", "--filter=blob:none", "--no-checkout", repository.Origin, checkoutFolder.FullName - ); - - ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "--cone"); - ExecIn(checkoutFolder, "git", "checkout", branch); + case CheckoutStrategy.Partial: ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "docs"); break; + default: + throw new ArgumentOutOfRangeException(nameof(repository), repository.CheckoutStrategy, null); } - - return Capture(checkoutFolder, "git", "rev-parse", "HEAD"); + ExecIn(checkoutFolder, "git", "checkout", "--force", gitRef); } - private void Exec(string binary, params string[] args) => ExecIn(null, binary, args); - private void ExecIn(IDirectoryInfo? workingDirectory, string binary, params string[] args) { var arguments = new ExecArguments(binary, args) @@ -221,7 +262,6 @@ string CaptureOutput() return line; } } - } public class NoopConsoleWriter : IConsoleOutWriter