Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Fix links in docs and improve check_links.py (#1680)
Browse files Browse the repository at this point in the history
* fix links and improve check_links.py

* address comment by @matt-gardner
  • Loading branch information
epwalsh authored and matt-gardner committed Aug 28, 2018
1 parent cbeef92 commit e9710c8
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 21 deletions.
64 changes: 45 additions & 19 deletions scripts/check_links.py
@@ -1,57 +1,83 @@
#! /usr/bin/env python
#!/usr/bin/env python
# encoding: UTF-8

"""
Goes through all the inline-links in markdown files and reports the breakages.
"""

import re
import sys
import pathlib
import os
from multiprocessing.dummy import Pool
from typing import Tuple, NamedTuple

import requests


class MatchTuple(NamedTuple):
source: str
name: str
link: str

def url_ok(match_tuple: MatchTuple) -> Tuple[MatchTuple, bool]:

def url_ok(match_tuple: MatchTuple) -> bool:
"""Check if a URL is reachable."""
try:
result = requests.get(match_tuple.link)
print(f" {result.status_code}: {match_tuple.source}")
return (match_tuple, result.ok)
except requests.ConnectionError:
return (match_tuple, False)
result = requests.get(match_tuple.link, timeout=5)
return result.ok
except (requests.ConnectionError, requests.Timeout):
return False


def path_ok(match_tuple: MatchTuple) -> bool:
"""Check if a file in this repository exists."""
relative_path = match_tuple.link.split("#")[0]
full_path = os.path.join(os.path.dirname(str(match_tuple.source)), relative_path)
return os.path.exists(full_path)

if __name__ == "__main__":

def link_ok(match_tuple: MatchTuple) -> Tuple[MatchTuple, bool]:
if match_tuple.link.startswith("http"):
result_ok = url_ok(match_tuple)
else:
result_ok = path_ok(match_tuple)
print(f" {'✓' if result_ok else '✗'} {match_tuple.link}")
return match_tuple, result_ok


def main():
print("Finding all markdown files in the current directory...")

project_root = (pathlib.Path(__file__).parent / "..").resolve() # pylint: disable=no-member
markdown_files = project_root.glob('**/*.md')

all_matches = set()
url_regex = re.compile(r'\[([^!][^\]]+)\]\((http[s]?[^)(]+)\)')
url_regex = re.compile(r'\[([^!][^\]]+)\]\(([^)(]+)\)')
for markdown_file in markdown_files:
with open(markdown_file) as handle:
for line in handle.readlines():
matches = url_regex.findall(line)
for name, link in matches:
if 'localhost' not in link:
all_matches.add(MatchTuple(source=str(markdown_file), name=name, link=link))
print(f" {len(all_matches)} markdown files found")

print("Checking to make sure we can retrieve each web URL...")
print(f" {len(all_matches)} markdown files found")
print("Checking to make sure we can retrieve each link...")

with Pool(processes=10) as pool:
results = pool.map(url_ok, [match for match in list(all_matches)])
unreachable_results = [result for result in results if not result[1]]
results = pool.map(link_ok, [match for match in list(all_matches)])
unreachable_results = [match_tuple for match_tuple, success in results if not success]

if unreachable_results:
print(f"Unreachable Links ({len(unreachable_results)}):")
for index, result in enumerate(unreachable_results):
print(" " + str(index))
print(" Source: " + result[0].source)
print(" Name: " + result[0].name)
print(" Link: " + result[0].link)
print(f"Unreachable links ({len(unreachable_results)}):")
for match_tuple in unreachable_results:
print(" > Source: " + match_tuple.source)
print(" Name: " + match_tuple.name)
print(" Link: " + match_tuple.link)
sys.exit(1)
print("No Unreachable link found.")


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion tutorials/getting_started/using_as_a_library_pt2.md
Expand Up @@ -7,7 +7,7 @@ for something. In this tutorial we'll cover both
* How to run a web demonstration of your model

Here we'll be working with the paper classification model
we developed in the ["Using AllenNLP in your Project"](using_in_your_repo.md)
we developed in [Part 1](./using_as_a_library_pt1.md) of this
tutorial. All the code for that model is [on GitHub](https://github.com/allenai/allennlp-as-a-library-example/tree/master).
You can either train it yourself or download a
[trained model](https://s3-us-west-2.amazonaws.com/allennlp/models/tutorial-s2-classification-model-2018-02-01.tar.gz),
Expand Down
2 changes: 1 addition & 1 deletion tutorials/how_to/elmo.md
Expand Up @@ -75,7 +75,7 @@ Note that this simple case only includes one layer of ELMo representation
in the final model.
In some case (e.g. SQuAD and SNLI) we found that including multiple layers improved performance. Multiple layers require code changes (see below).

We will use existing SRL model [configuration file](../../training_config/semantic_role_labeler.json) as an example to illustrate the changes. Without ELMo, it uses 100 dimensional pre-trained GloVe vectors.
We will use existing SRL model [configuration file](../../training_config/semantic_role_labeler.jsonnet) as an example to illustrate the changes. Without ELMo, it uses 100 dimensional pre-trained GloVe vectors.

To add ELMo, there are three relevant changes. First, modify the `text_field_embedder` section by adding an `elmo` section as follows:

Expand Down

0 comments on commit e9710c8

Please sign in to comment.