From 6c1ee43eac4bb4fc9bd7c47e4cf789376301c429 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 22 Mar 2023 17:19:01 -0400 Subject: [PATCH 1/3] Test Elasticsearch's links The Elasticsearch links are in a funny, elasticsearch specific spot in a json file. This digs them out of the json file in the most perl way I could think of. But it's compatible with the link checker. And checks the links! --- build_docs.pl | 70 ++++++++++++++++++++++++++++++++++----------- lib/ES/Book.pm | 2 +- lib/ES/LinkCheck.pm | 1 - 3 files changed, 55 insertions(+), 18 deletions(-) diff --git a/build_docs.pl b/build_docs.pl index cc4abfca90116..c1f84ac412717 100755 --- a/build_docs.pl +++ b/build_docs.pl @@ -331,6 +331,7 @@ sub check_links { $link_checker->check; check_kibana_links( $build_dir, $link_checker ) if exists $Conf->{repos}{kibana}; + check_elasticsearch_links( $build_dir, $link_checker ) if exists $Conf->{repos}{elasticsearch}; if ( $link_checker->has_bad ) { say $link_checker->report; } @@ -350,22 +351,6 @@ sub check_kibana_links { say "Checking Kibana links"; -# ${baseUrl}guide/en/elasticsearch/reference/${urlVersion}/modules-scripting-expression.html -# ${ELASTIC_WEBSITE_URL}guide/en/beats/filebeat/${DOC_LINK_VERSION} -# ${ELASTIC_DOCS}search-aggregations-bucket-datehistogram-aggregation.html -# ${ELASTICSEARCH_DOCS}update-transform.html -# ${KIBANA_DOCS}canvas.html -# ${PLUGIN_DOCS}repository-s3.html -# ${FLEET_DOCS}fleet-overview.html -# ${APM_DOCS}overview.html -# ${STACK_DOCS}upgrading-elastic-stack.html -# ${SECURITY_SOLUTION_DOCS}sec-requirements.html -# ${STACK_GETTING_STARTED}get-started-elastic-stack.html -# ${APP_SEARCH_DOCS}authentication.html -# ${ENTERPRISE_SEARCH_DOCS}authentication.html -# ${WORKPLACE_SEARCH_DOCS}workplace-search-getting-started.html -# ${MACHINE_LEARNING_DOCS}machine-learning-intro.html - my $extractor = sub { my $contents = shift; return sub { @@ -453,6 +438,59 @@ sub check_kibana_links { } } +#=================================== +sub check_elasticsearch_links { +#=================================== + my $build_dir = shift; + my $link_checker = shift; + my $branch; + my $version; + + say "Checking Elasticsearch links"; + + my $extractor = sub { + my $contents = shift; + return sub { + while ( $contents =~ m!"([^"\#]+)(?:\#([^"]+))?"!g ) { + my $path = $1; + next unless $path =~ m!html!; + return "en/elasticsearch/reference/$version/$path"; + } + return; + }; + }; + + my $src_path = 'server/src/main/resources/org/elasticsearch/common/reference-docs-links.json'; + my $repo = ES::Repo->get_repo('elasticsearch'); + + my @versions = sort map { $_->basename } + grep { $_->is_dir } $build_dir->subdir('en/elasticsearch/reference')->children; + + my $link_check_name = 'link-check-elasticsearch'; + + for (@versions) { + $version = $_; + next if $version eq 'current' || $version =~ /^(\d+)\.(\d+)/ && ($1 lt 8 || ($1 eq 8 && $2 lt 7)); + # @versions is looping through the directories in the output (which + # still contains `master`), but we need to look in the `main` branch of + # the ES repo for this file. + # + # TODO: remove as part of + # https://github.com/elastic/docs/issues/2264 + $branch = $version eq "master" ? "main" : $version; + say " Branch: $branch, Version: $version"; + my $source = $repo->show_file( $link_check_name, $branch, $src_path ); + + $link_checker->check_source( $source, $extractor, + "Elasticsearch [$version]: $src_path" ); + + # Mark the file that we need for the link check done so we can use + # --keep_hash with it during some other build. + $repo->mark_done( $link_check_name, $branch, $src_path, 0 ); + } +} + + #=================================== sub build_entries { #=================================== diff --git a/lib/ES/Book.pm b/lib/ES/Book.pm index 2369bb0c12d22..d95226cb43cc2 100644 --- a/lib/ES/Book.pm +++ b/lib/ES/Book.pm @@ -408,7 +408,7 @@ sub _update_title_and_version_drop_downs { # If a book uses a custom index page, it may not include the TOC. The # substitution below will fail, so we abort early in this case. - next unless ($_ == 'index.html' && ($html =~ /ul class="toc"/)); + next unless ($_ eq 'index.html' && ($html =~ /ul class="toc"/)); my $success = ($html =~ s/