diff --git a/src/Elastic.Documentation.Configuration/Assembler/Repository.cs b/src/Elastic.Documentation.Configuration/Assembler/Repository.cs
index 93d0d5667..44f050af4 100644
--- a/src/Elastic.Documentation.Configuration/Assembler/Repository.cs
+++ b/src/Elastic.Documentation.Configuration/Assembler/Repository.cs
@@ -2,6 +2,7 @@
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information
+using System.Runtime.Serialization;
using YamlDotNet.Serialization;
namespace Elastic.Documentation.Configuration.Assembler;
@@ -12,6 +13,14 @@ public record NarrativeRepository : Repository
public override string Name { get; set; } = RepositoryName;
}
+public enum CheckoutStrategy
+{
+ [EnumMember(Value = "partial")]
+ Partial,
+ [EnumMember(Value = "full")]
+ Full
+}
+
public record Repository
{
[YamlIgnore]
@@ -27,7 +36,7 @@ public record Repository
public string GitReferenceNext { get; set; } = "main";
[YamlMember(Alias = "checkout_strategy")]
- public string CheckoutStrategy { get; set; } = "partial";
+ public CheckoutStrategy CheckoutStrategy { get; set; } = CheckoutStrategy.Partial;
[YamlMember(Alias = "skip")]
public bool Skip { get; set; }
diff --git a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs
index 99a9bdbf8..f23d2f71b 100644
--- a/src/tooling/docs-assembler/Cli/RepositoryCommands.cs
+++ b/src/tooling/docs-assembler/Cli/RepositoryCommands.cs
@@ -3,8 +3,10 @@
// See the LICENSE file in the project root for more information
using System.Collections.Concurrent;
+using System.ComponentModel;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
+using System.Globalization;
using System.IO.Abstractions;
using System.Net.Mime;
using Actions.Core.Services;
@@ -39,11 +41,13 @@ private void AssignOutputLogger()
/// Clones all repositories
/// Treat warnings as errors and fail the build on warnings
/// The environment to build
+ /// If true fetch the latest commit of the branch instead of the link registry entry ref
///
[Command("clone-all")]
public async Task CloneAll(
bool? strict = null,
string? environment = null,
+ bool? fetchLatest = null,
Cancel ctx = default
)
{
@@ -55,7 +59,8 @@ public async Task CloneAll(
var assembleContext = new AssembleContext(environment, collector, new FileSystem(), new FileSystem(), null, null);
var cloner = new AssemblerRepositorySourcer(logger, assembleContext);
- _ = await cloner.AcquireAllLatest(ctx);
+
+ _ = await cloner.CloneAll(fetchLatest ?? false, ctx);
await collector.StopAsync(ctx);
@@ -138,7 +143,6 @@ public async Task UpdateLinkIndexAll(ContentSource contentSource, Cancel ct
// It's only used to get the list of repositories.
var assembleContext = new AssembleContext("prod", collector, new FileSystem(), new FileSystem(), null, null);
var cloner = new RepositorySourcer(logger, assembleContext.CheckoutDirectory, new FileSystem(), collector);
- var dict = new ConcurrentDictionary();
var repositories = new Dictionary(assembleContext.Configuration.ReferenceRepositories)
{
{ NarrativeRepository.RepositoryName, assembleContext.Configuration.Narrative }
@@ -152,8 +156,7 @@ await Parallel.ForEachAsync(repositories,
{
try
{
- var name = kv.Key.Trim();
- var checkout = cloner.CloneOrUpdateRepository(kv.Value, name, kv.Value.GetBranch(contentSource), dict);
+ var checkout = cloner.CloneRef(kv.Value, kv.Value.GetBranch(contentSource), true);
var outputPath = Directory.CreateTempSubdirectory(checkout.Repository.Name).FullName;
var context = new BuildContext(
collector,
diff --git a/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs b/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs
index 388306865..0ea5adc66 100644
--- a/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs
+++ b/src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs
@@ -8,6 +8,7 @@
using System.IO.Abstractions;
using Elastic.Documentation.Configuration.Assembler;
using Elastic.Documentation.Diagnostics;
+using Elastic.Documentation.LinkIndex;
using Elastic.Markdown.IO;
using Microsoft.Extensions.Logging;
using ProcNet;
@@ -46,129 +47,169 @@ public IReadOnlyCollection GetAll()
return checkouts;
}
- public async Task> AcquireAllLatest(Cancel ctx = default)
+ public async Task> CloneAll(bool fetchLatest, Cancel ctx = default)
{
- _logger.LogInformation(
- "Cloning all repositories for environment {EnvironmentName} using '{ContentSourceStrategy}' content sourcing strategy",
+ _logger.LogInformation("Cloning all repositories for environment {EnvironmentName} using '{ContentSourceStrategy}' content sourcing strategy",
PublishEnvironment.Name,
PublishEnvironment.ContentSource.ToStringFast(true)
);
+ var checkouts = new ConcurrentBag();
+
+ ILinkIndexReader linkIndexReader = Aws3LinkIndexReader.CreateAnonymous();
+ var linkRegistry = await linkIndexReader.GetRegistry(ctx);
var repositories = new Dictionary(Configuration.ReferenceRepositories)
{
{ NarrativeRepository.RepositoryName, Configuration.Narrative }
};
- return await RepositorySourcer.AcquireAllLatest(repositories, PublishEnvironment.ContentSource, ctx);
- }
-}
-
-public class RepositorySourcer(ILoggerFactory logger, IDirectoryInfo checkoutDirectory, IFileSystem readFileSystem, DiagnosticsCollector collector)
-{
- private readonly ILogger _logger = logger.CreateLogger();
- public async Task> AcquireAllLatest(Dictionary repositories, ContentSource source, Cancel ctx = default)
- {
- var dict = new ConcurrentDictionary();
- var checkouts = new ConcurrentBag();
await Parallel.ForEachAsync(repositories,
new ParallelOptions
{
CancellationToken = ctx,
MaxDegreeOfParallelism = Environment.ProcessorCount
- }, async (kv, c) =>
+ }, async (repo, c) =>
{
await Task.Run(() =>
{
- var name = kv.Key.Trim();
- var repo = kv.Value;
- var clone = CloneOrUpdateRepository(kv.Value, name, repo.GetBranch(source), dict);
- checkouts.Add(clone);
+ if (!linkRegistry.Repositories.TryGetValue(repo.Key, out var entry))
+ {
+ context.Collector.EmitError("", $"'{repo.Key}' does not exist in link index");
+ return;
+ }
+ var branch = repo.Value.GetBranch(PublishEnvironment.ContentSource);
+ var gitRef = branch;
+ if (!fetchLatest)
+ {
+ if (!entry.TryGetValue(branch, out var entryInfo))
+ {
+ context.Collector.EmitError("", $"'{repo.Key}' does not have a '{branch}' entry in link index");
+ return;
+ }
+ gitRef = entryInfo.GitReference;
+ }
+ checkouts.Add(RepositorySourcer.CloneRef(repo.Value, gitRef, fetchLatest));
}, c);
}).ConfigureAwait(false);
-
- return checkouts.ToList().AsReadOnly();
+ return checkouts;
}
+}
- public Checkout CloneOrUpdateRepository(Repository repository, string name, string branch, ConcurrentDictionary dict)
- {
- var fs = readFileSystem;
- var checkoutFolder = fs.DirectoryInfo.New(Path.Combine(checkoutDirectory.FullName, name));
- var relativePath = Path.GetRelativePath(Paths.WorkingDirectoryRoot.FullName, checkoutFolder.FullName);
- var sw = Stopwatch.StartNew();
- _ = dict.AddOrUpdate($"{name} ({branch})", sw, (_, _) => sw);
+public class RepositorySourcer(ILoggerFactory logger, IDirectoryInfo checkoutDirectory, IFileSystem readFileSystem, DiagnosticsCollector collector)
+{
+ private readonly ILogger _logger = logger.CreateLogger();
- string? head;
- if (checkoutFolder.Exists)
+ //
+ // Clones the repository to the checkout directory and checks out the specified git reference.
+ //
+ // The repository to clone.
+ // The git reference to check out. Branch, commit or tag
+ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false, int attempt = 1)
+ {
+ var checkoutFolder = readFileSystem.DirectoryInfo.New(Path.Combine(checkoutDirectory.FullName, repository.Name));
+ if (attempt > 3)
{
- if (!TryUpdateSource(name, branch, relativePath, checkoutFolder, out head))
- head = CheckoutFromScratch(repository, name, branch, relativePath, checkoutFolder);
+ collector.EmitError("", $"Failed to clone repository {repository.Name}@{gitRef} after 3 attempts");
+ return new Checkout
+ {
+ Directory = checkoutFolder,
+ HeadReference = "",
+ Repository = repository,
+ };
}
- else
- head = CheckoutFromScratch(repository, name, branch, relativePath, checkoutFolder);
-
- sw.Stop();
-
- return new Checkout
+ _logger.LogInformation("{RepositoryName}: Cloning repository {RepositoryName}@{Commit} to {CheckoutFolder}", repository.Name, repository.Name, gitRef,
+ checkoutFolder.FullName);
+ if (!checkoutFolder.Exists)
{
- Repository = repository,
- Directory = checkoutFolder,
- HeadReference = head
- };
- }
-
- private bool TryUpdateSource(string name, string branch, string relativePath, IDirectoryInfo checkoutFolder, [NotNullWhen(true)] out string? head)
- {
- head = null;
- try
+ checkoutFolder.Create();
+ checkoutFolder.Refresh();
+ }
+ var isGitInitialized = GitInit(repository, checkoutFolder);
+ string? head = null;
+ if (isGitInitialized)
{
- _logger.LogInformation("Pull: {Name}\t{Branch}\t{RelativePath}", name, branch, relativePath);
- // --allow-unrelated-histories due to shallow clones not finding a common ancestor
- ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff");
+ try
+ {
+ head = Capture(checkoutFolder, "git", "rev-parse", "HEAD");
+ }
+ catch (Exception e)
+ {
+ _logger.LogError(e, "{RepositoryName}: Failed to acquire current commit, falling back to recreating from scratch", repository.Name);
+ checkoutFolder.Delete(true);
+ checkoutFolder.Refresh();
+ return CloneRef(repository, gitRef, pull, attempt + 1);
+ }
}
- catch (Exception e)
+ // Repository already checked out the same commit
+ if (head != null && head == gitRef)
+ // nothing to do, already at the right commit
+ _logger.LogInformation("{RepositoryName}: HEAD already at {GitRef}", repository.Name, gitRef);
+ else
{
- _logger.LogError(e, "Failed to update {Name} from {RelativePath}, falling back to recreating from scratch", name, relativePath);
- if (checkoutFolder.Exists)
+ FetchAndCheckout(repository, gitRef, checkoutFolder);
+ if (!pull)
+ {
+ return new Checkout
+ {
+ Directory = checkoutFolder,
+ HeadReference = gitRef,
+ Repository = repository,
+ };
+ }
+ try
+ {
+ ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff", "origin", gitRef);
+ }
+ catch (Exception e)
{
+ _logger.LogError(e, "{RepositoryName}: Failed to update {GitRef} from {RelativePath}, falling back to recreating from scratch",
+ repository.Name, gitRef, checkoutFolder.FullName);
checkoutFolder.Delete(true);
checkoutFolder.Refresh();
+ return CloneRef(repository, gitRef, pull, attempt + 1);
}
- return false;
}
- head = Capture(checkoutFolder, "git", "rev-parse", "HEAD");
+ return new Checkout
+ {
+ Directory = checkoutFolder,
+ HeadReference = gitRef,
+ Repository = repository,
+ };
+ }
- return true;
+ ///
+ /// Initializes the git repository if it is not already initialized.
+ /// Returns true if the repository was already initialized.
+ ///
+ private bool GitInit(Repository repository, IDirectoryInfo checkoutFolder)
+ {
+ var isGitAlreadyInitialized = Directory.Exists(Path.Combine(checkoutFolder.FullName, ".git"));
+ if (isGitAlreadyInitialized)
+ return true;
+ ExecIn(checkoutFolder, "git", "init");
+ ExecIn(checkoutFolder, "git", "remote", "add", "origin", repository.Origin);
+ return false;
}
- private string CheckoutFromScratch(Repository repository, string name, string branch, string relativePath, IDirectoryInfo checkoutFolder)
+ private void FetchAndCheckout(Repository repository, string gitRef, IDirectoryInfo checkoutFolder)
{
- _logger.LogInformation("Checkout: {Name}\t{Branch}\t{RelativePath}", name, branch, relativePath);
+ ExecIn(checkoutFolder, "git", "fetch", "--no-tags", "--prune", "--no-recurse-submodules", "--depth", "1", "origin", gitRef);
switch (repository.CheckoutStrategy)
{
- case "full":
- Exec("git", "clone", repository.Origin, checkoutFolder.FullName,
- "--depth", "1", "--single-branch",
- "--branch", branch
- );
+ case CheckoutStrategy.Full:
+ ExecIn(checkoutFolder, "git", "sparse-checkout", "disable");
break;
- case "partial":
- Exec(
- "git", "clone", "--filter=blob:none", "--no-checkout", repository.Origin, checkoutFolder.FullName
- );
-
- ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "--cone");
- ExecIn(checkoutFolder, "git", "checkout", branch);
+ case CheckoutStrategy.Partial:
ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "docs");
break;
+ default:
+ throw new ArgumentOutOfRangeException(nameof(repository), repository.CheckoutStrategy, null);
}
-
- return Capture(checkoutFolder, "git", "rev-parse", "HEAD");
+ ExecIn(checkoutFolder, "git", "checkout", "--force", gitRef);
}
- private void Exec(string binary, params string[] args) => ExecIn(null, binary, args);
-
private void ExecIn(IDirectoryInfo? workingDirectory, string binary, params string[] args)
{
var arguments = new ExecArguments(binary, args)
@@ -221,7 +262,6 @@ string CaptureOutput()
return line;
}
}
-
}
public class NoopConsoleWriter : IConsoleOutWriter