In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
!pip install retry joblib



In [3]:
import sys

# parse_s4ext
extensions_index_script_dir = '/home/jcfr/Projects/ExtensionsIndex/scripts/'
if extensions_index_script_dir not in sys.path:
    sys.path.insert(0, extensions_index_script_dir)

from check_description_files import parse_s4ext

# ExtensionProject
slicer_scripts_dir = "/tmp/SlicerWizard/Utilities/Scripts/"
if slicer_scripts_dir not in sys.path:
    sys.path.insert(0, slicer_scripts_dir)

from SlicerWizard.ExtensionProject import ExtensionProject

In [4]:
import errno
import os
    
# Copied from https://github.com/scikit-build/scikit-build/blob/master/skbuild/utils/__init__.py
def mkdir_p(path):
    """Ensure directory ``path`` exists. If needed, parent directories
    are created.
    Adapted from http://stackoverflow.com/a/600612/1539918
    """
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:  # pragma: no cover
            raise

In [5]:
import urllib.request

from socket import timeout as SocketTimeout
from http.client import HTTPException

from retry import retry

def check_url(url, timeout=1):

    @retry(TimeoutError, tries=3, delay=1, jitter=1, max_delay=3)
    def _check_url():
        return urllib.request.urlopen(url, timeout=timeout).getcode(), None
    try:
        return _check_url()
    except urllib.request.HTTPError as exc:
        return exc.code, str(exc)
    except (TimeoutError, urllib.request.URLError, SocketTimeout) as exc:
        return -1, str(exc)
    except HTTPException as exc:
        return -2, str(exc)


In [6]:
import shutil


def download_cmakelists(scmurl, dest_filepath, revision):
    scmurl = scmurl.replace("https://github.com/", "").replace("git://github.com/", "").replace(".git", "")
    user, repo = scmurl.split("/")
    cmakelists_url = "https://raw.githubusercontent.com/{user}/{repo}/{revision}/CMakeLists.txt".format(
        user=user,
        repo=repo,
        revision=revision
    )
    if not os.path.exists(dest_filepath):
        try:
            with urllib.request.urlopen(cmakelists_url) as response, open(dest_filepath, 'wb') as out_file:
                shutil.copyfileobj(response, out_file)
        except urllib.request.HTTPError as exc:
            return [(None, "%d, %s" % (exc.code, cmakelists_url),)]
    
    return []

In [7]:
import urllib.parse


def check_cmakelists(filepath):
    cmakelists_dir = os.path.dirname(filepath)
    cmakelists_filename = os.path.basename(filepath)

    project = ExtensionProject(cmakelists_dir, filename=cmakelists_filename)
    
    urls = []
    errors = []

    def _collect_urls(attribute, required=False, split=False, expect_path=True):
        try:
            cmake_variable_name = "EXTENSION_" + attribute.upper()
            url = project.getValue(cmake_variable_name, substitute=True).strip()
        except KeyError as exc:
            if not required:
                return
            message = str(exc)
            errors.append((attribute, message,))
            return

        if not url:
            if required:
                message = "script sets %r to an empty value" % cmake_variable_name,
                errors.append((attribute, message))
            return

        try:
            result = urllib.parse.urlparse(url)
            if not all([result.scheme, result.netloc, result.path if expect_path else True]):
                raise ValueError("failed to parse either scheme, netloc or path")
        except ValueError:
            message = "script sets %r to an invalid url [%s]" % (cmake_variable_name, url),
            errors.append((attribute, message))
            return

        if split:
            url = re.sub(" +", " ", url)  # Remove redundant spaces
            for item in url.split(" "):
                urls.append((attribute, item,))
        else:
            urls.append((attribute, url,))
    
    _collect_urls("homepage", required=True, expect_path=False)
    _collect_urls("iconurl", required=True)
    _collect_urls("screenshoturls", split=True)

    for attribute, url in urls:
        code, error = check_url(url)
        if code != 200:
            errors.append((attribute, "%d, %s, %s" % (code, url, error),))
    
    return errors

In [8]:
import urllib.parse
import re


def check_s4ext(metadata):
    urls = []
    errors = []

    def _collect_urls(attribute, required=False, split=False, expect_path=True):
        try:
            url = metadata[attribute]
        except KeyError as exc:
            if not required:
                return
            message = str(exc)
            errors.append((attribute, message,))
            return

        if not url:
            if required:
                message = "s4ext associates %r with an empty value" % attribute,
                errors.append((attribute, message))
            return

        try:
            result = urllib.parse.urlparse(url)
            if not all([result.scheme, result.netloc, result.path if expect_path else True]):
                raise ValueError("failed to parse either scheme, netloc or path")
        except ValueError:
            message = "s4ext associates %r with an invalid url [%s]" % (attribute, url),
            errors.append((attribute, message))
            return

        if split:
            url = re.sub(" +", " ", url)  # Remove redundant spaces
            for item in url.split(" "):
                urls.append((attribute, item,))
        else:
            urls.append((attribute, url,))
    
    _collect_urls("homepage", required=True, expect_path=False)
    _collect_urls("iconurl", required=True)
    _collect_urls("screenshoturls", split=True)

    for attribute, url in urls:
        code, error = check_url(url)
        if code != 200:
            errors.append((attribute, "%d, %s, %s" % (code, url, error),))
    
    return errors

In [9]:
import glob

s4ext_dir = "/home/jcfr/Projects/ExtensionsIndex"
s4ext_filepaths = glob.glob(os.path.join(s4ext_dir, "*.s4ext"))

print(f"Found {len(s4ext_filepaths)} extension files")

Found 169 extension files


In [10]:
import urllib.request


# Convenience variable to limit the check to a specific extension
check_extension_names = []

SUPPORTED_CONTEXTS = ["s4ext", "cmakelists"]

def check_extension_metadata(index, filepath, contexts=None, verbose=True):
    
    if not contexts:
        contexts = SUPPORTED_CONTEXTS

    for context in contexts:
        assert context in SUPPORTED_CONTEXTS
    
    extension_name = os.path.splitext(os.path.basename(filepath))[0]

    if check_extension_names and extension_name not in check_extension_names:
        return extension_name, []
    
    def _contextualize(context, values):
       return [(context,) + value for value in values]
        
    def _display(*args, **kwargs):
        if verbose:
            print(*args, **kwargs)

    errors = []

    msg = "[%d/%d] Checking %s ... " % (index + 1, len(s4ext_filepaths), extension_name)
    _display(msg.ljust(50), end='')
    
    metadata = parse_s4ext(filepath)

    if "s4ext" in contexts:
        errors.extend(_contextualize("s4ext", check_s4ext(metadata)))

    scmurl = metadata['scmurl']
    if 'github' not in scmurl:
        _display("[skipping, not a GitHub repository]")
        _display("")
        return extension_name, []

    if "cmakelists" in contexts:
        # Directory to store extension CMakeLists.txt files
        dest_dir = "Extensions-CMakeLists"
        mkdir_p(dest_dir)

        cmakelists_filepath = os.path.join(dest_dir, '%s-CMakeLists.txt' % extension_name)

        # Download CMakeLists.txt
        errors.extend(_contextualize("download_cmakelists", download_cmakelists(scmurl, cmakelists_filepath, metadata['scmrevision'])))

        # Check CMakeLists.txt
        if os.path.exists(cmakelists_filepath):
           errors.extend(_contextualize("cmakelists", check_cmakelists(cmakelists_filepath)))

    if errors:
        _display("[errors]")
        for context, attribute, error in errors:
            if attribute is None:
                _display(f"  {context}, {error}")
            else:
                _display(f"  {context}, {attribute}, {error}")
    else:
        _display("[ok]")

    
    _display("")
    return extension_name, errors
    

In [11]:
# for index, filepath in enumerate(s4ext_filepaths[:3]):
#     check_extension_metadata(index, filepath, contexts=["s4ext"])

In [12]:
from joblib import Parallel, delayed, parallel_backend

with parallel_backend("threading", n_jobs=6):
    jobs = Parallel(verbose=True)(
        delayed(check_extension_metadata)(index, filepath, verbose=False)
        for index, filepath in enumerate(s4ext_filepaths)
    )

[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:   19.0s
[Parallel(n_jobs=6)]: Done 169 out of 169 | elapsed:  1.5min finished


### Issues with URLs found in extensiona's metadata

In [15]:
for extension_name, errors in sorted(jobs):
    if not errors:
        continue
    print(f"{extension_name}")

    for context, attribute, error in errors:
        if context is None:
            print(f"  {context}, {error}")
        else:
            print(f"  {context}, {attribute}, {error}")
    print("")

AblationPlanner
  s4ext, homepage, -1, https://github.com/naterex23/SlicerAblationPlanner, <urlopen error timed out>
  cmakelists, homepage, 404, https://www.slicer.org/wiki/Documentation/Nightly/Extensions/AblationPlanner, HTTP Error 404: Not Found
  cmakelists, screenshoturls, 404, https://github.com/naterex23/SlicerAblationPlanner/raw/main/Screenshots/minimum_required_inputs.png,, HTTP Error 404: Not Found
  cmakelists, screenshoturls, 404, https://github.com/naterex23/SlicerAblationPlanner/raw/main/Screenshots/minimum_required_placement.png,, HTTP Error 404: Not Found
  cmakelists, screenshoturls, 404, https://github.com/naterex23/SlicerAblationPlanner/raw/main/Screenshots/ablation_steps.png,, HTTP Error 404: Not Found
  cmakelists, screenshoturls, 404, https://github.com/naterex23/SlicerAblationPlanner/raw/main/Screenshots/ablation_outputs.png,, HTTP Error 404: Not Found

AnomalousFiltersExtension
  s4ext, screenshoturls, 404, https://www.slicer.org/w/images/4/40/DTI_FA_raw.png, H