Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Elastic.Documentation/Links/LinkRegistry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System.Diagnostics.CodeAnalysis;
using System.Text.Json;
using System.Text.Json.Serialization;
using Elastic.Documentation.Serialization;
Expand Down
8 changes: 6 additions & 2 deletions src/tooling/docs-assembler/Cli/RepositoryCommands.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
using Documentation.Assembler.Navigation;
using Documentation.Assembler.Sourcing;
using Elastic.Documentation.Configuration.Assembler;
using Elastic.Documentation.Links;
using Elastic.Documentation.Tooling.Diagnostics.Console;
using Elastic.Markdown;
using Elastic.Markdown.Exporters;
Expand Down Expand Up @@ -104,9 +105,10 @@ public async Task<int> BuildAll(
await assembleContext.Collector.StopAsync(ctx);
return 1;
}

var cloner = new AssemblerRepositorySourcer(logger, assembleContext);
var checkouts = cloner.GetAll().ToArray();
var checkoutResult = cloner.GetAll();
var checkouts = checkoutResult.Checkouts.ToArray();

if (checkouts.Length == 0)
throw new Exception("No checkouts found");

Expand All @@ -123,6 +125,8 @@ public async Task<int> BuildAll(
var builder = new AssemblerBuilder(logger, assembleContext, navigation, htmlWriter, pathProvider, historyMapper);
await builder.BuildAllAsync(assembleSources.AssembleSets, ctx);

await cloner.WriteLinkRegistrySnapshot(checkoutResult.LinkRegistrySnapshot, ctx);

var sitemapBuilder = new SitemapBuilder(navigation.NavigationItems, assembleContext.WriteFileSystem, assembleContext.OutputDirectory);
sitemapBuilder.Generate();

Expand Down
96 changes: 96 additions & 0 deletions src/tooling/docs-assembler/Sourcing/GitFacade.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Licensed to Elasticsearch B.V under one or more agreements.
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System.IO.Abstractions;
using Elastic.Documentation.Diagnostics;
using ProcNet;

namespace Documentation.Assembler.Sourcing;


public interface IGitRepository
{
void Init();
string GetCurrentCommit();
void GitAddOrigin(string origin);
bool IsInitialized();
void Pull(string branch);
void Fetch(string reference);
void EnableSparseCheckout(string folder);
void DisableSparseCheckout();
void Checkout(string reference);
}

// This git repository implementation is optimized for pull and fetching single commits.
// It uses `git pull --depth 1` and `git fetch --depth 1` to minimize the amount of data transferred.
public class SingleCommitOptimizedGitRepository(DiagnosticsCollector collector, IDirectoryInfo workingDirectory) : IGitRepository
{
public string GetCurrentCommit() => Capture("git", "rev-parse", "HEAD");

public void Init() => ExecIn("git", "init");
public bool IsInitialized() => Directory.Exists(Path.Combine(workingDirectory.FullName, ".git"));
public void Pull(string branch) => ExecIn("git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff", "origin", branch);
public void Fetch(string reference) => ExecIn("git", "fetch", "--no-tags", "--prune", "--no-recurse-submodules", "--depth", "1", "origin", reference);
public void EnableSparseCheckout(string folder) => ExecIn("git", "sparse-checkout", "set", folder);
public void DisableSparseCheckout() => ExecIn("git", "sparse-checkout", "disable");
public void Checkout(string reference) => ExecIn("git", "checkout", "--force", reference);

public void GitAddOrigin(string origin) => ExecIn("git", "remote", "add", "origin", origin);

private void ExecIn(string binary, params string[] args)
{
var arguments = new ExecArguments(binary, args)
{
WorkingDirectory = workingDirectory.FullName,
Environment = new Dictionary<string, string>
{
// Disable git editor prompts:
// There are cases where `git pull` would prompt for an editor to write a commit message.
// This env variable prevents that.
{ "GIT_EDITOR", "true" }
},
};
var result = Proc.Exec(arguments);
if (result != 0)
collector.EmitError("", $"Exit code: {result} while executing {binary} {string.Join(" ", args)} in {workingDirectory}");
}
private string Capture(string binary, params string[] args)
{
// Try 10 times to capture the output of the command, if it fails, we'll throw an exception on the last try
Exception? e = null;
for (var i = 0; i <= 9; i++)
{
try
{
return CaptureOutput();
}
catch (Exception ex)
{
if (ex is not null)
e = ex;
}
}

if (e is not null)
collector.EmitError("", "failure capturing stdout", e);

return string.Empty;

string CaptureOutput()
{
var arguments = new StartArguments(binary, args)
{
WorkingDirectory = workingDirectory.FullName,
Timeout = TimeSpan.FromSeconds(3),
WaitForExit = TimeSpan.FromSeconds(3),
ConsoleOutWriter = NoopConsoleWriter.Instance
};
var result = Proc.Start(arguments);
var line = result.ExitCode != 0
? throw new Exception($"Exit code is not 0. Received {result.ExitCode} from {binary}: {workingDirectory}")
: result.ConsoleOut.FirstOrDefault()?.Line ?? throw new Exception($"No output captured for {binary}: {workingDirectory}");
return line;
}
}
}
137 changes: 59 additions & 78 deletions src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@
// See the LICENSE file in the project root for more information

using System.Collections.Concurrent;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.IO.Abstractions;
using Elastic.Documentation.Configuration.Assembler;
using Elastic.Documentation.Diagnostics;
using Elastic.Documentation.LinkIndex;
using Elastic.Markdown.IO;
using Elastic.Documentation.Links;
using Microsoft.Extensions.Logging;
using ProcNet;
using ProcNet.Std;
Expand All @@ -25,29 +23,42 @@ public class AssemblerRepositorySourcer(ILoggerFactory logger, AssembleContext c

private RepositorySourcer RepositorySourcer => new(logger, context.CheckoutDirectory, context.ReadFileSystem, context.Collector);

public IReadOnlyCollection<Checkout> GetAll()
public CheckoutResult GetAll()
{
var fs = context.ReadFileSystem;
var repositories = Configuration.ReferenceRepositories.Values.Concat<Repository>([Configuration.Narrative]);
var checkouts = new List<Checkout>();
var linkRegistrySnapshotPath = Path.Combine(context.CheckoutDirectory.FullName, CheckoutResult.LinkRegistrySnapshotFileName);
if (!fs.File.Exists(linkRegistrySnapshotPath))
throw new FileNotFoundException("Link-index snapshot not found. Run the clone-all command first.", linkRegistrySnapshotPath);
var linkRegistrySnapshotStr = File.ReadAllText(linkRegistrySnapshotPath);
var linkRegistry = LinkRegistry.Deserialize(linkRegistrySnapshotStr);
foreach (var repo in repositories)
{
var checkoutFolder = fs.DirectoryInfo.New(Path.Combine(context.CheckoutDirectory.FullName, repo.Name));
IGitRepository gitFacade = new SingleCommitOptimizedGitRepository(context.Collector, checkoutFolder);
if (!checkoutFolder.Exists)
{
context.Collector.EmitError(checkoutFolder.FullName, $"'{repo.Name}' does not exist in link index checkout directory");
continue;
}
var head = gitFacade.GetCurrentCommit();
var checkout = new Checkout
{
Repository = repo,
Directory = checkoutFolder,
//TODO read from links.json and ensure we check out exactly that git reference
//+ validate that git reference belongs to the appropriate branch
HeadReference = Guid.NewGuid().ToString("N")
HeadReference = head
};
checkouts.Add(checkout);
}

return checkouts;
return new CheckoutResult
{
Checkouts = checkouts,
LinkRegistrySnapshot = linkRegistry
};
}

public async Task<IReadOnlyCollection<Checkout>> CloneAll(bool fetchLatest, Cancel ctx = default)
public async Task<CheckoutResult> CloneAll(bool fetchLatest, Cancel ctx = default)
{
_logger.LogInformation("Cloning all repositories for environment {EnvironmentName} using '{ContentSourceStrategy}' content sourcing strategy",
PublishEnvironment.Name,
Expand Down Expand Up @@ -91,8 +102,23 @@ await Task.Run(() =>
checkouts.Add(RepositorySourcer.CloneRef(repo.Value, gitRef, fetchLatest));
}, c);
}).ConfigureAwait(false);
return checkouts;
await context.WriteFileSystem.File.WriteAllTextAsync(
Path.Combine(context.CheckoutDirectory.FullName, CheckoutResult.LinkRegistrySnapshotFileName),
LinkRegistry.Serialize(linkRegistry),
ctx
);
return new CheckoutResult
{
Checkouts = checkouts,
LinkRegistrySnapshot = linkRegistry
};
}

public async Task WriteLinkRegistrySnapshot(LinkRegistry linkRegistrySnapshot, Cancel ctx = default) => await context.WriteFileSystem.File.WriteAllTextAsync(
Path.Combine(context.OutputDirectory.FullName, "docs", CheckoutResult.LinkRegistrySnapshotFileName),
LinkRegistry.Serialize(linkRegistrySnapshot),
ctx
);
}


Expand All @@ -108,6 +134,7 @@ public class RepositorySourcer(ILoggerFactory logger, IDirectoryInfo checkoutDir
public Checkout CloneRef(Repository repository, string gitRef, bool pull = false, int attempt = 1)
{
var checkoutFolder = readFileSystem.DirectoryInfo.New(Path.Combine(checkoutDirectory.FullName, repository.Name));
IGitRepository git = new SingleCommitOptimizedGitRepository(collector, checkoutFolder);
if (attempt > 3)
{
collector.EmitError("", $"Failed to clone repository {repository.Name}@{gitRef} after 3 attempts");
Expand All @@ -125,13 +152,13 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false
checkoutFolder.Create();
checkoutFolder.Refresh();
}
var isGitInitialized = GitInit(repository, checkoutFolder);
var isGitInitialized = GitInit(git, repository);
string? head = null;
if (isGitInitialized)
{
try
{
head = Capture(checkoutFolder, "git", "rev-parse", "HEAD");
head = git.GetCurrentCommit();
}
catch (Exception e)
{
Expand All @@ -147,7 +174,7 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false
_logger.LogInformation("{RepositoryName}: HEAD already at {GitRef}", repository.Name, gitRef);
else
{
FetchAndCheckout(repository, gitRef, checkoutFolder);
FetchAndCheckout(git, repository, gitRef);
if (!pull)
{
return new Checkout
Expand All @@ -159,11 +186,11 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false
}
try
{
ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff", "origin", gitRef);
git.Pull(gitRef);
}
catch (Exception e)
{
_logger.LogError(e, "{RepositoryName}: Failed to update {GitRef} from {RelativePath}, falling back to recreating from scratch",
_logger.LogError(e, "{RepositoryName}: Failed to update {GitRef} from {Path}, falling back to recreating from scratch",
repository.Name, gitRef, checkoutFolder.FullName);
checkoutFolder.Delete(true);
checkoutFolder.Refresh();
Expand All @@ -183,84 +210,31 @@ public Checkout CloneRef(Repository repository, string gitRef, bool pull = false
/// Initializes the git repository if it is not already initialized.
/// Returns true if the repository was already initialized.
/// </summary>
private bool GitInit(Repository repository, IDirectoryInfo checkoutFolder)
private static bool GitInit(IGitRepository git, Repository repository)
{
var isGitAlreadyInitialized = Directory.Exists(Path.Combine(checkoutFolder.FullName, ".git"));
var isGitAlreadyInitialized = git.IsInitialized();
if (isGitAlreadyInitialized)
return true;
ExecIn(checkoutFolder, "git", "init");
ExecIn(checkoutFolder, "git", "remote", "add", "origin", repository.Origin);
git.Init();
git.GitAddOrigin(repository.Origin);
return false;
}

private void FetchAndCheckout(Repository repository, string gitRef, IDirectoryInfo checkoutFolder)
private static void FetchAndCheckout(IGitRepository git, Repository repository, string gitRef)
{
ExecIn(checkoutFolder, "git", "fetch", "--no-tags", "--prune", "--no-recurse-submodules", "--depth", "1", "origin", gitRef);
git.Fetch(gitRef);
switch (repository.CheckoutStrategy)
{
case CheckoutStrategy.Full:
ExecIn(checkoutFolder, "git", "sparse-checkout", "disable");
git.DisableSparseCheckout();
break;
case CheckoutStrategy.Partial:
ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "docs");
git.EnableSparseCheckout("docs");
break;
default:
throw new ArgumentOutOfRangeException(nameof(repository), repository.CheckoutStrategy, null);
}
ExecIn(checkoutFolder, "git", "checkout", "--force", gitRef);
}

private void ExecIn(IDirectoryInfo? workingDirectory, string binary, params string[] args)
{
var arguments = new ExecArguments(binary, args)
{
WorkingDirectory = workingDirectory?.FullName
};
var result = Proc.Exec(arguments);
if (result != 0)
collector.EmitError("", $"Exit code: {result} while executing {binary} {string.Join(" ", args)} in {workingDirectory}");
}

// ReSharper disable once UnusedMember.Local
private string Capture(IDirectoryInfo? workingDirectory, string binary, params string[] args)
{
// Try 10 times to capture the output of the command, if it fails, we'll throw an exception on the last try
Exception? e = null;
for (var i = 0; i <= 9; i++)
{
try
{
return CaptureOutput();
}
catch (Exception ex)
{
if (ex is not null)
e = ex;
}
}

if (e is not null)
collector.EmitError("", "failure capturing stdout", e);


return string.Empty;

string CaptureOutput()
{
var arguments = new StartArguments(binary, args)
{
WorkingDirectory = workingDirectory?.FullName,
//WaitForStreamReadersTimeout = TimeSpan.FromSeconds(3),
Timeout = TimeSpan.FromSeconds(3),
WaitForExit = TimeSpan.FromSeconds(3),
ConsoleOutWriter = NoopConsoleWriter.Instance
};
var result = Proc.Start(arguments);
var line = result.ExitCode != 0
? throw new Exception($"Exit code is not 0. Received {result.ExitCode} from {binary}: {workingDirectory}")
: result.ConsoleOut.FirstOrDefault()?.Line ?? throw new Exception($"No output captured for {binary}: {workingDirectory}");
return line;
}
git.Checkout(gitRef);
}
}

Expand All @@ -272,3 +246,10 @@ public void Write(Exception e) { }

public void Write(ConsoleOut consoleOut) { }
}

public record CheckoutResult
{
public static string LinkRegistrySnapshotFileName => "link-index.snapshot.json";
public required LinkRegistry LinkRegistrySnapshot { get; init; }
public required IReadOnlyCollection<Checkout> Checkouts { get; init; }
}
Loading