This repository has been archived by the owner on Dec 16, 2022. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix links in docs and improve check_links.py (#1680)
* fix links and improve check_links.py * address comment by @matt-gardner
- Loading branch information
1 parent
cbeef92
commit e9710c8
Showing
3 changed files
with
47 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,57 +1,83 @@ | ||
#! /usr/bin/env python | ||
#!/usr/bin/env python | ||
# encoding: UTF-8 | ||
|
||
""" | ||
Goes through all the inline-links in markdown files and reports the breakages. | ||
""" | ||
|
||
import re | ||
import sys | ||
import pathlib | ||
import os | ||
from multiprocessing.dummy import Pool | ||
from typing import Tuple, NamedTuple | ||
|
||
import requests | ||
|
||
|
||
class MatchTuple(NamedTuple): | ||
source: str | ||
name: str | ||
link: str | ||
|
||
def url_ok(match_tuple: MatchTuple) -> Tuple[MatchTuple, bool]: | ||
|
||
def url_ok(match_tuple: MatchTuple) -> bool: | ||
"""Check if a URL is reachable.""" | ||
try: | ||
result = requests.get(match_tuple.link) | ||
print(f" {result.status_code}: {match_tuple.source}") | ||
return (match_tuple, result.ok) | ||
except requests.ConnectionError: | ||
return (match_tuple, False) | ||
result = requests.get(match_tuple.link, timeout=5) | ||
return result.ok | ||
except (requests.ConnectionError, requests.Timeout): | ||
return False | ||
|
||
|
||
def path_ok(match_tuple: MatchTuple) -> bool: | ||
"""Check if a file in this repository exists.""" | ||
relative_path = match_tuple.link.split("#")[0] | ||
full_path = os.path.join(os.path.dirname(str(match_tuple.source)), relative_path) | ||
return os.path.exists(full_path) | ||
|
||
if __name__ == "__main__": | ||
|
||
def link_ok(match_tuple: MatchTuple) -> Tuple[MatchTuple, bool]: | ||
if match_tuple.link.startswith("http"): | ||
result_ok = url_ok(match_tuple) | ||
else: | ||
result_ok = path_ok(match_tuple) | ||
print(f" {'✓' if result_ok else '✗'} {match_tuple.link}") | ||
return match_tuple, result_ok | ||
|
||
|
||
def main(): | ||
print("Finding all markdown files in the current directory...") | ||
|
||
project_root = (pathlib.Path(__file__).parent / "..").resolve() # pylint: disable=no-member | ||
markdown_files = project_root.glob('**/*.md') | ||
|
||
all_matches = set() | ||
url_regex = re.compile(r'\[([^!][^\]]+)\]\((http[s]?[^)(]+)\)') | ||
url_regex = re.compile(r'\[([^!][^\]]+)\]\(([^)(]+)\)') | ||
for markdown_file in markdown_files: | ||
with open(markdown_file) as handle: | ||
for line in handle.readlines(): | ||
matches = url_regex.findall(line) | ||
for name, link in matches: | ||
if 'localhost' not in link: | ||
all_matches.add(MatchTuple(source=str(markdown_file), name=name, link=link)) | ||
print(f" {len(all_matches)} markdown files found") | ||
|
||
print("Checking to make sure we can retrieve each web URL...") | ||
print(f" {len(all_matches)} markdown files found") | ||
print("Checking to make sure we can retrieve each link...") | ||
|
||
with Pool(processes=10) as pool: | ||
results = pool.map(url_ok, [match for match in list(all_matches)]) | ||
unreachable_results = [result for result in results if not result[1]] | ||
results = pool.map(link_ok, [match for match in list(all_matches)]) | ||
unreachable_results = [match_tuple for match_tuple, success in results if not success] | ||
|
||
if unreachable_results: | ||
print(f"Unreachable Links ({len(unreachable_results)}):") | ||
for index, result in enumerate(unreachable_results): | ||
print(" " + str(index)) | ||
print(" Source: " + result[0].source) | ||
print(" Name: " + result[0].name) | ||
print(" Link: " + result[0].link) | ||
print(f"Unreachable links ({len(unreachable_results)}):") | ||
for match_tuple in unreachable_results: | ||
print(" > Source: " + match_tuple.source) | ||
print(" Name: " + match_tuple.name) | ||
print(" Link: " + match_tuple.link) | ||
sys.exit(1) | ||
print("No Unreachable link found.") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters