Skip to content

Commit

Permalink
💫 New system for error messages and warnings (#2163)
Browse files Browse the repository at this point in the history
* Add spacy.errors module

* Update deprecation and user warnings

* Replace errors and asserts with new error message system

* Remove redundant asserts

* Fix whitespace

* Add messages for print/util.prints statements

* Fix typo

* Fix typos

* Move CLI messages to spacy.cli._messages

* Add decorator to display error code with message

An implementation like this is nice because it only modifies the string when it's retrieved from the containing class – so we don't have to worry about manipulating tracebacks etc.

* Remove unused link in spacy.about

* Update errors for invalid pipeline components

* Improve error for unknown factories

* Add displaCy warnings

* Update formatting consistency

* Move error message to spacy.errors

* Update errors and check if doc returned by component is None
  • Loading branch information
ines committed Apr 3, 2018
1 parent abf8b16 commit 3141e04
Show file tree
Hide file tree
Showing 41 changed files with 652 additions and 443 deletions.
8 changes: 2 additions & 6 deletions spacy/__init__.py
Expand Up @@ -4,18 +4,14 @@
from .cli.info import info as cli_info
from .glossary import explain
from .about import __version__
from .errors import Warnings, deprecation_warning
from . import util


def load(name, **overrides):
depr_path = overrides.get('path')
if depr_path not in (True, False, None):
util.deprecated(
"As of spaCy v2.0, the keyword argument `path=` is deprecated. "
"You can now call spacy.load with the path as its first argument, "
"and the model's meta.json will be used to determine the language "
"to load. For example:\nnlp = spacy.load('{}')".format(depr_path),
'error')
deprecation_warning(Warnings.W001.format(path=depr_path))
return util.load_model(name, **overrides)


Expand Down
8 changes: 4 additions & 4 deletions spacy/_ml.py
Expand Up @@ -23,6 +23,7 @@
import thinc.extra.load_nlp

from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE
from .errors import Errors
from . import util


Expand Down Expand Up @@ -174,7 +175,7 @@ def backward(dY_ids, sgd=None):
sgd(self._mem.weights, self._mem.gradient, key=self.id)
return dXf.reshape((dXf.shape[0], self.nF, self.nI))
return Yf, backward

def _add_padding(self, Yf):
Yf_padded = self.ops.xp.vstack((self.pad, Yf))
return Yf_padded
Expand Down Expand Up @@ -340,18 +341,17 @@ def _divide_array(X, size):


def get_col(idx):
assert idx >= 0, idx
if idx < 0:
raise IndexError(Errors.E066.format(value=idx))

def forward(X, drop=0.):
assert idx >= 0, idx
if isinstance(X, numpy.ndarray):
ops = NumpyOps()
else:
ops = CupyOps()
output = ops.xp.ascontiguousarray(X[:, idx], dtype=X.dtype)

def backward(y, sgd=None):
assert idx >= 0, idx
dX = ops.allocate(X.shape)
dX[:, idx] += y
return dX
Expand Down
1 change: 0 additions & 1 deletion spacy/about.py
Expand Up @@ -11,7 +11,6 @@
__license__ = 'MIT'
__release__ = True

__docs_models__ = 'https://spacy.io/usage/models'
__download_url__ = 'https://github.com/explosion/spacy-models/releases/download'
__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json'
__shortcuts__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/shortcuts-v2.json'
73 changes: 73 additions & 0 deletions spacy/cli/_messages.py
@@ -0,0 +1,73 @@
# coding: utf8
from __future__ import unicode_literals

class Messages(object):
M001 = ("Download successful but linking failed")
M002 = ("Creating a shortcut link for 'en' didn't work (maybe you "
"don't have admin permissions?), but you can still load the "
"model via its full package name: nlp = spacy.load('{name}')")
M003 = ("Server error ({code}: {desc})")
M004 = ("Couldn't fetch {desc}. Please find a model for your spaCy "
"installation (v{version}), and download it manually. For more "
"details, see the documentation: https://spacy.io/usage/models")
M005 = ("Compatibility error")
M006 = ("No compatible models found for v{version} of spaCy.")
M007 = ("No compatible model found for '{name}' (spaCy v{version}).")
M008 = ("Can't locate model data")
M009 = ("The data should be located in {path}")
M010 = ("Can't find the spaCy data path to create model symlink")
M011 = ("Make sure a directory `/data` exists within your spaCy "
"installation and try again. The data directory should be "
"located here:")
M012 = ("Link '{name}' already exists")
M013 = ("To overwrite an existing link, use the --force flag.")
M014 = ("Can't overwrite symlink '{name}'")
M015 = ("This can happen if your data directory contains a directory or "
"file of the same name.")
M016 = ("Error: Couldn't link model to '{name}'")
M017 = ("Creating a symlink in spacy/data failed. Make sure you have the "
"required permissions and try re-running the command as admin, or "
"use a virtualenv. You can still import the model as a module and "
"call its load() method, or create the symlink manually.")
M018 = ("Linking successful")
M019 = ("You can now load the model via spacy.load('{name}')")
M020 = ("Can't find model meta.json")
M021 = ("Couldn't fetch compatibility table.")
M022 = ("Can't find spaCy v{version} in compatibility table")
M023 = ("Installed models (spaCy v{version})")
M024 = ("No models found in your current environment.")
M025 = ("Use the following commands to update the model packages:")
M026 = ("The following models are not available for spaCy "
"v{version}: {models}")
M027 = ("You may also want to overwrite the incompatible links using the "
"`python -m spacy link` command with `--force`, or remove them "
"from the data directory. Data path: {path}")
M028 = ("Input file not found")
M029 = ("Output directory not found")
M030 = ("Unknown format")
M031 = ("Can't find converter for {converter}")
M032 = ("Generated output file {name}")
M033 = ("Created {n_docs} documents")
M034 = ("Evaluation data not found")
M035 = ("Visualization output directory not found")
M036 = ("Generated {n} parses as HTML")
M037 = ("Can't find words frequencies file")
M038 = ("Sucessfully compiled vocab")
M039 = ("{entries} entries, {vectors} vectors")
M040 = ("Output directory not found")
M041 = ("Loaded meta.json from file")
M042 = ("Successfully created package '{name}'")
M043 = ("To build the package, run `python setup.py sdist` in this "
"directory.")
M044 = ("Package directory already exists")
M045 = ("Please delete the directory and try again, or use the `--force` "
"flag to overwrite existing directories.")
M046 = ("Generating meta.json")
M047 = ("Enter the package settings for your model. The following "
"information will be read from your model data: pipeline, vectors.")
M048 = ("No '{key}' setting found in meta.json")
M049 = ("This setting is required to build your package.")
M050 = ("Training data not found")
M051 = ("Development data not found")
M052 = ("Not a valid meta.json format")
M053 = ("Expected dict but got: {meta_type}")
9 changes: 5 additions & 4 deletions spacy/cli/convert.py
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path

from .converters import conllu2json, iob2json, conll_ner2json
from ._messages import Messages
from ..util import prints

# Converters are matched by file extension. To add a converter, add a new
Expand Down Expand Up @@ -32,14 +33,14 @@ def convert(input_file, output_dir, n_sents=1, morphology=False, converter='auto
input_path = Path(input_file)
output_path = Path(output_dir)
if not input_path.exists():
prints(input_path, title="Input file not found", exits=1)
prints(input_path, title=Messages.M028, exits=1)
if not output_path.exists():
prints(output_path, title="Output directory not found", exits=1)
prints(output_path, title=Messages.M029, exits=1)
if converter == 'auto':
converter = input_path.suffix[1:]
if converter not in CONVERTERS:
prints("Can't find converter for %s" % converter,
title="Unknown format", exits=1)
prints(Messages.M031.format(converter=converter),
title=Messages.M030, exits=1)
func = CONVERTERS[converter]
func(input_path, output_path,
n_sents=n_sents, use_morphology=morphology)
5 changes: 3 additions & 2 deletions spacy/cli/converters/conll_ner2json.py
@@ -1,6 +1,7 @@
# coding: utf8
from __future__ import unicode_literals

from .._messages import Messages
from ...compat import json_dumps, path2str
from ...util import prints
from ...gold import iob_to_biluo
Expand All @@ -18,8 +19,8 @@ def conll_ner2json(input_path, output_path, n_sents=10, use_morphology=False):
output_file = output_path / output_filename
with output_file.open('w', encoding='utf-8') as f:
f.write(json_dumps(docs))
prints("Created %d documents" % len(docs),
title="Generated output file %s" % path2str(output_file))
prints(Messages.M033.format(n_docs=len(docs)),
title=Messages.M032.format(name=path2str(output_file)))


def read_conll_ner(input_path):
Expand Down
5 changes: 3 additions & 2 deletions spacy/cli/converters/conllu2json.py
@@ -1,6 +1,7 @@
# coding: utf8
from __future__ import unicode_literals

from .._messages import Messages
from ...compat import json_dumps, path2str
from ...util import prints

Expand Down Expand Up @@ -32,8 +33,8 @@ def conllu2json(input_path, output_path, n_sents=10, use_morphology=False):
output_file = output_path / output_filename
with output_file.open('w', encoding='utf-8') as f:
f.write(json_dumps(docs))
prints("Created %d documents" % len(docs),
title="Generated output file %s" % path2str(output_file))
prints(Messages.M033.format(n_docs=len(docs)),
title=Messages.M032.format(name=path2str(output_file)))


def read_conllx(input_path, use_morphology=False, n=0):
Expand Down
5 changes: 3 additions & 2 deletions spacy/cli/converters/iob2json.py
Expand Up @@ -2,6 +2,7 @@
from __future__ import unicode_literals
from cytoolz import partition_all, concat

from .._messages import Messages
from ...compat import json_dumps, path2str
from ...util import prints
from ...gold import iob_to_biluo
Expand All @@ -18,8 +19,8 @@ def iob2json(input_path, output_path, n_sents=10, *a, **k):
output_file = output_path / output_filename
with output_file.open('w', encoding='utf-8') as f:
f.write(json_dumps(docs))
prints("Created %d documents" % len(docs),
title="Generated output file %s" % path2str(output_file))
prints(Messages.M033.format(n_docs=len(docs)),
title=Messages.M032.format(name=path2str(output_file)))


def read_iob(raw_sents):
Expand Down
27 changes: 9 additions & 18 deletions spacy/cli/download.py
Expand Up @@ -8,6 +8,7 @@
import ujson

from .link import link
from ._messages import Messages
from ..util import prints, get_package_path
from ..compat import url_read, HTTPError
from .. import about
Expand All @@ -32,9 +33,7 @@ def download(model, direct=False):
version = get_version(model_name, compatibility)
dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name,
v=version))
if dl != 0:
# if download subprocess doesn't return 0, exit with the respective
# exit code before doing anything else
if dl != 0: # if download subprocess doesn't return 0, exit
sys.exit(dl)
try:
# Get package path here because link uses
Expand All @@ -48,22 +47,15 @@ def download(model, direct=False):
# Dirty, but since spacy.download and the auto-linking is
# mostly a convenience wrapper, it's best to show a success
# message and loading instructions, even if linking fails.
prints(
"Creating a shortcut link for 'en' didn't work (maybe "
"you don't have admin permissions?), but you can still "
"load the model via its full package name:",
"nlp = spacy.load('%s')" % model_name,
title="Download successful but linking failed")
prints(Messages.M001.format(name=model_name), title=Messages.M002)


def get_json(url, desc):
try:
data = url_read(url)
except HTTPError as e:
msg = ("Couldn't fetch %s. Please find a model for your spaCy "
"installation (v%s), and download it manually.")
prints(msg % (desc, about.__version__), about.__docs_models__,
title="Server error (%d: %s)" % (e.code, e.reason), exits=1)
prints(Messages.M004.format(desc, about.__version__),
title=Messages.M003.format(e.code, e.reason), exits=1)
return ujson.loads(data)


Expand All @@ -73,17 +65,16 @@ def get_compatibility():
comp_table = get_json(about.__compatibility__, "compatibility table")
comp = comp_table['spacy']
if version not in comp:
prints("No compatible models found for v%s of spaCy." % version,
title="Compatibility error", exits=1)
prints(Messages.M006.format(version=version), title=Messages.M005,
exits=1)
return comp[version]


def get_version(model, comp):
model = model.rsplit('.dev', 1)[0]
if model not in comp:
version = about.__version__
msg = "No compatible model found for '%s' (spaCy v%s)."
prints(msg % (model, version), title="Compatibility error", exits=1)
prints(Messages.M007.format(name=model, version=about.__version__),
title=Messages.M005, exits=1)
return comp[model][0]


Expand Down
9 changes: 4 additions & 5 deletions spacy/cli/evaluate.py
Expand Up @@ -4,6 +4,7 @@
import plac
from timeit import default_timer as timer

from ._messages import Messages
from ..gold import GoldCorpus
from ..util import prints
from .. import util
Expand Down Expand Up @@ -33,10 +34,9 @@ def evaluate(model, data_path, gpu_id=-1, gold_preproc=False, displacy_path=None
data_path = util.ensure_path(data_path)
displacy_path = util.ensure_path(displacy_path)
if not data_path.exists():
prints(data_path, title="Evaluation data not found", exits=1)
prints(data_path, title=Messages.M034, exits=1)
if displacy_path and not displacy_path.exists():
prints(displacy_path, title="Visualization output directory not found",
exits=1)
prints(displacy_path, title=Messages.M035, exits=1)
corpus = GoldCorpus(data_path, data_path)
nlp = util.load_model(model)
dev_docs = list(corpus.dev_docs(nlp, gold_preproc=gold_preproc))
Expand All @@ -52,8 +52,7 @@ def evaluate(model, data_path, gpu_id=-1, gold_preproc=False, displacy_path=None
render_ents = 'ner' in nlp.meta.get('pipeline', [])
render_parses(docs, displacy_path, model_name=model,
limit=displacy_limit, deps=render_deps, ents=render_ents)
msg = "Generated %s parses as HTML" % displacy_limit
prints(displacy_path, title=msg)
prints(displacy_path, title=Messages.M036.format(n=displacy_limit))


def render_parses(docs, output_path, model_name='', limit=250, deps=True,
Expand Down
5 changes: 3 additions & 2 deletions spacy/cli/info.py
Expand Up @@ -5,9 +5,10 @@
import platform
from pathlib import Path

from ._messages import Messages
from ..compat import path2str
from .. import about
from .. import util
from .. import about


@plac.annotations(
Expand All @@ -25,7 +26,7 @@ def info(model=None, markdown=False):
model_path = util.get_data_path() / model
meta_path = model_path / 'meta.json'
if not meta_path.is_file():
util.prints(meta_path, title="Can't find model meta.json", exits=1)
util.prints(meta_path, title=Messages.M020, exits=1)
meta = util.read_json(meta_path)
if model_path.resolve() != model_path:
meta['link'] = path2str(model_path)
Expand Down

0 comments on commit 3141e04

Please sign in to comment.