Fix links in docs and improve check_links.py (#1680)

* fix links and improve check_links.py * address comment by @matt-gardner
allenai · Aug 28, 2018 · e9710c8 · e9710c8
1 parent cbeef92
commit e9710c8
Show file tree

Hide file tree

Showing 3 changed files with 47 additions and 21 deletions.
diff --git a/scripts/check_links.py b/scripts/check_links.py
@@ -1,57 +1,83 @@
-#! /usr/bin/env python
+#!/usr/bin/env python
+# encoding: UTF-8
+
 """
 Goes through all the inline-links in markdown files and reports the breakages.
 """
+
 import re
 import sys
 import pathlib
+import os
 from multiprocessing.dummy import Pool
 from typing import Tuple, NamedTuple
+
 import requests
 
+
 class MatchTuple(NamedTuple):
     source: str
     name: str
     link: str
 
-def url_ok(match_tuple: MatchTuple) -> Tuple[MatchTuple, bool]:
+
+def url_ok(match_tuple: MatchTuple) -> bool:
+    """Check if a URL is reachable."""
     try:
-        result = requests.get(match_tuple.link)
-        print(f"  {result.status_code}: {match_tuple.source}")
-        return (match_tuple, result.ok)
-    except requests.ConnectionError:
-        return (match_tuple, False)
+        result = requests.get(match_tuple.link, timeout=5)
+        return result.ok
+    except (requests.ConnectionError, requests.Timeout):
+        return False
+
+
+def path_ok(match_tuple: MatchTuple) -> bool:
+    """Check if a file in this repository exists."""
+    relative_path = match_tuple.link.split("#")[0]
+    full_path = os.path.join(os.path.dirname(str(match_tuple.source)), relative_path)
+    return os.path.exists(full_path)
 
-if __name__ == "__main__":
 
+def link_ok(match_tuple: MatchTuple) -> Tuple[MatchTuple, bool]:
+    if match_tuple.link.startswith("http"):
+        result_ok = url_ok(match_tuple)
+    else:
+        result_ok = path_ok(match_tuple)
+    print(f"  {'✓' if result_ok else '✗'} {match_tuple.link}")
+    return match_tuple, result_ok
+
+
+def main():
     print("Finding all markdown files in the current directory...")
 
     project_root = (pathlib.Path(__file__).parent / "..").resolve() # pylint: disable=no-member
     markdown_files = project_root.glob('**/*.md')
 
     all_matches = set()
-    url_regex = re.compile(r'\[([^!][^\]]+)\]\((http[s]?[^)(]+)\)')
+    url_regex = re.compile(r'\[([^!][^\]]+)\]\(([^)(]+)\)')
     for markdown_file in markdown_files:
         with open(markdown_file) as handle:
             for line in handle.readlines():
                 matches = url_regex.findall(line)
                 for name, link in matches:
                     if 'localhost' not in link:
                         all_matches.add(MatchTuple(source=str(markdown_file), name=name, link=link))
-    print(f"  {len(all_matches)} markdown files found")
 
-    print("Checking to make sure we can retrieve each web URL...")
+    print(f"  {len(all_matches)} markdown files found")
+    print("Checking to make sure we can retrieve each link...")
 
     with Pool(processes=10) as pool:
-        results = pool.map(url_ok, [match for match in list(all_matches)])
-    unreachable_results = [result for result in results if not result[1]]
+        results = pool.map(link_ok, [match for match in list(all_matches)])
+    unreachable_results = [match_tuple for match_tuple, success in results if not success]
 
     if unreachable_results:
-        print(f"Unreachable Links ({len(unreachable_results)}):")
-        for index, result in enumerate(unreachable_results):
-            print("  " + str(index))
-            print("  Source: " + result[0].source)
-            print("  Name: " + result[0].name)
-            print("  Link: " + result[0].link)
+        print(f"Unreachable links ({len(unreachable_results)}):")
+        for match_tuple in unreachable_results:
+            print("  > Source: " + match_tuple.source)
+            print("    Name: " + match_tuple.name)
+            print("    Link: " + match_tuple.link)
         sys.exit(1)
     print("No Unreachable link found.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tutorials/getting_started/using_as_a_library_pt2.md b/tutorials/getting_started/using_as_a_library_pt2.md
@@ -7,7 +7,7 @@ for something. In this tutorial we'll cover both
 * How to run a web demonstration of your model
 
 Here we'll be working with the paper classification model
-we developed in the ["Using AllenNLP in your Project"](using_in_your_repo.md)
+we developed in [Part 1](./using_as_a_library_pt1.md) of this
 tutorial. All the code for that model is [on GitHub](https://github.com/allenai/allennlp-as-a-library-example/tree/master).
 You can either train it yourself or download a
 [trained model](https://s3-us-west-2.amazonaws.com/allennlp/models/tutorial-s2-classification-model-2018-02-01.tar.gz),

diff --git a/tutorials/how_to/elmo.md b/tutorials/how_to/elmo.md
@@ -75,7 +75,7 @@ Note that this simple case only includes one layer of ELMo representation
 in the final model.
 In some case (e.g. SQuAD and SNLI) we found that including multiple layers improved performance.  Multiple layers require code changes (see below).
 
-We will use existing SRL model [configuration file](../../training_config/semantic_role_labeler.json) as an example to illustrate the changes.  Without ELMo, it uses 100 dimensional pre-trained GloVe vectors.
+We will use existing SRL model [configuration file](../../training_config/semantic_role_labeler.jsonnet) as an example to illustrate the changes.  Without ELMo, it uses 100 dimensional pre-trained GloVe vectors.
 
 To add ELMo, there are three relevant changes.  First, modify the `text_field_embedder` section by adding an `elmo` section as follows: