@@ -331,6 +331,7 @@ sub check_links {
331331 $link_checker -> check;
332332
333333 check_kibana_links( $build_dir , $link_checker ) if exists $Conf -> {repos }{kibana };
334+ check_elasticsearch_links( $build_dir , $link_checker ) if exists $Conf -> {repos }{elasticsearch };
334335 if ( $link_checker -> has_bad ) {
335336 say $link_checker -> report;
336337 }
@@ -350,22 +351,6 @@ sub check_kibana_links {
350351
351352 say " Checking Kibana links" ;
352353
353- # ${baseUrl}guide/en/elasticsearch/reference/${urlVersion}/modules-scripting-expression.html
354- # ${ELASTIC_WEBSITE_URL}guide/en/beats/filebeat/${DOC_LINK_VERSION}
355- # ${ELASTIC_DOCS}search-aggregations-bucket-datehistogram-aggregation.html
356- # ${ELASTICSEARCH_DOCS}update-transform.html
357- # ${KIBANA_DOCS}canvas.html
358- # ${PLUGIN_DOCS}repository-s3.html
359- # ${FLEET_DOCS}fleet-overview.html
360- # ${APM_DOCS}overview.html
361- # ${STACK_DOCS}upgrading-elastic-stack.html
362- # ${SECURITY_SOLUTION_DOCS}sec-requirements.html
363- # ${STACK_GETTING_STARTED}get-started-elastic-stack.html
364- # ${APP_SEARCH_DOCS}authentication.html
365- # ${ENTERPRISE_SEARCH_DOCS}authentication.html
366- # ${WORKPLACE_SEARCH_DOCS}workplace-search-getting-started.html
367- # ${MACHINE_LEARNING_DOCS}machine-learning-intro.html
368-
369354 my $extractor = sub {
370355 my $contents = shift ;
371356 return sub {
@@ -453,6 +438,65 @@ sub check_kibana_links {
453438 }
454439}
455440
441+ # ===================================
442+ sub check_elasticsearch_links {
443+ # ===================================
444+ my $build_dir = shift ;
445+ my $link_checker = shift ;
446+ my $branch ;
447+ my $version ;
448+
449+ say " Checking Elasticsearch links" ;
450+
451+ # Grab URLs from the JSON file. This is lame, but we sort of need to parse
452+ # using regexes because that's what the rest of the infrastructure expects.
453+ # So we grab all quoted strings that contain `html`. This *should* be fine
454+ # for a while because the keys in the file are all in SHOUTING_SNAKE_CASE
455+ # so even if one contains "html" it'll contain "HTML" which doesn't match.
456+ my $extractor = sub {
457+ my $contents = shift ;
458+ return sub {
459+ while ( $contents =~ m ! "([^"\# ]+)(?:\# ([^"]+))?"! g ) {
460+ my $path = $1 ;
461+ next unless $path =~ m ! html! ;
462+ return " en/elasticsearch/reference/$version /$path " ;
463+ }
464+ return ;
465+ };
466+ };
467+
468+ my $src_path = ' server/src/main/resources/org/elasticsearch/common/reference-docs-links.json' ;
469+ my $repo = ES::Repo-> get_repo(' elasticsearch' );
470+
471+ my @versions = sort map { $_ -> basename }
472+ grep { $_ -> is_dir } $build_dir -> subdir(' en/elasticsearch/reference' )-> children;
473+
474+ my $link_check_name = ' link-check-elasticsearch' ;
475+
476+ for (@versions ) {
477+ $version = $_ ;
478+ # check versions after 8.6
479+ next if $version eq ' current' || $version =~ / ^(\d +)\. (\d +)/ && ($1 lt 8 || ($1 eq 8 && $2 lt 7));
480+ # @versions is looping through the directories in the output (which
481+ # still contains `master`), but we need to look in the `main` branch of
482+ # the ES repo for this file.
483+ #
484+ # TODO: remove as part of
485+ # https://github.com/elastic/docs/issues/2264
486+ $branch = $version eq " master" ? " main" : $version ;
487+ say " Branch: $branch , Version: $version " ;
488+ my $source = $repo -> show_file( $link_check_name , $branch , $src_path );
489+
490+ $link_checker -> check_source( $source , $extractor ,
491+ " Elasticsearch [$version ]: $src_path " );
492+
493+ # Mark the file that we need for the link check done so we can use
494+ # --keep_hash with it during some other build.
495+ $repo -> mark_done( $link_check_name , $branch , $src_path , 0 );
496+ }
497+ }
498+
499+
456500# ===================================
457501sub build_entries {
458502# ===================================
0 commit comments