diff --git a/lib/hexdocs/file_rewriter.ex b/lib/hexdocs/file_rewriter.ex index 5dd73e2..3e65054 100644 --- a/lib/hexdocs/file_rewriter.ex +++ b/lib/hexdocs/file_rewriter.ex @@ -16,9 +16,25 @@ defmodule Hexdocs.FileRewriter do |> add_elixir_org_link(path) |> add_analytics(path) |> remove_noindex(path) + |> rewrite_canonical_links(path) |> add_nofollow(path) end + @canonical_tag_re ~r{]*\brel=["']canonical["'][^>]*>}i + @hexdocs_link_re ~r{https?://hexdocs\.pm/([a-z][a-z0-9_]*)(?![a-zA-Z0-9_.-])} + + defp rewrite_canonical_links(content, path) do + if String.ends_with?(path, ".html") do + Regex.replace(@canonical_tag_re, content, fn tag -> + Regex.replace(@hexdocs_link_re, tag, fn _match, package -> + "https://#{Hexdocs.Utils.package_to_subdomain(package)}.hexdocs.pm" + end) + end) + else + content + end + end + defp add_elixir_org_link(content, path) do if String.ends_with?(path, ".html") and not String.contains?(content, @link_addition) do String.replace(content, @link_hooks, &(&1 <> " for the " <> @link_addition)) diff --git a/test/hexdocs/file_rewriter_test.exs b/test/hexdocs/file_rewriter_test.exs index 174a050..f31ae25 100644 --- a/test/hexdocs/file_rewriter_test.exs +++ b/test/hexdocs/file_rewriter_test.exs @@ -17,6 +17,108 @@ defmodule Hexdocs.FileRewriterTest do assert FileRewriter.run("index.html", ~s||) == "" end + describe "rewrite hexdocs.pm canonical links to subdomains" do + test "rewrites a canonical link" do + assert FileRewriter.run( + "index.html", + ~s|| + ) == + ~s|| + end + + test "preserves version, query and fragment in the tail" do + assert FileRewriter.run( + "index.html", + ~s|| + ) == + ~s|| + end + + test "maps underscores in the package name to hyphens" do + assert FileRewriter.run( + "index.html", + ~s|| + ) == + ~s|| + end + + test "rewrites the hex package" do + assert FileRewriter.run( + "index.html", + ~s|| + ) == + ~s|| + end + + test "rewrites http links to https subdomains" do + assert FileRewriter.run( + "index.html", + ~s|| + ) == + ~s|| + end + + test "handles href before rel in the canonical tag" do + assert FileRewriter.run( + "index.html", + ~s|| + ) == + ~s|| + end + + test "does not rewrite body links or text" do + for input <- [ + ~s|Jason|, + ~s|
visit https://hexdocs.pm/jason/readme.html|
+ ] do
+ assert FileRewriter.run("index.html", input) == input
+ end
+ end
+
+ test "does not rewrite other link tags" do
+ input = ~s||
+ assert FileRewriter.run("index.html", input) == input
+ end
+
+ test "leaves the bare apex untouched" do
+ for input <- [
+ ~s||,
+ ~s||
+ ] do
+ assert FileRewriter.run("index.html", input) == input
+ end
+ end
+
+ test "leaves apex files untouched" do
+ for input <- [
+ ~s||,
+ ~s||
+ ] do
+ assert FileRewriter.run("index.html", input) == input
+ end
+ end
+
+ test "does not touch canonical links that already use a subdomain" do
+ for input <- [
+ ~s||,
+ ~s||
+ ] do
+ assert FileRewriter.run("index.html", input) == input
+ end
+ end
+
+ test "is idempotent" do
+ input = ~s||
+ once = FileRewriter.run("index.html", input)
+ assert FileRewriter.run("index.html", once) == once
+ end
+
+ test "does not modify non-html files" do
+ input = ~s||
+ assert FileRewriter.run("index.js", input) == input
+ end
+ end
+
describe "add_nofollow" do
test "adds rel=nofollow to external links" do
assert FileRewriter.run("index.html", ~s|example|) ==
@@ -42,7 +144,7 @@ defmodule Hexdocs.FileRewriterTest do
test "does not add nofollow to official ecosystem links" do
for url <- [
"https://hex.pm/packages/foo",
- "https://hexdocs.pm/foo",
+ "https://hexdocs.pm",
"https://elixir-lang.org",
"https://www.erlang.org",
"https://preview.hexdocs.pm/foo"