-
Notifications
You must be signed in to change notification settings - Fork 20
/
score.py
166 lines (138 loc) · 5.75 KB
/
score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""
This includes function for scoring models applied to a SpaCy corpus.
"""
from __future__ import annotations
from time import time
from copy import copy
from functools import partial
from typing import Callable, Dict, Iterable, List, Optional, Union
import pandas as pd
from spacy.language import Language
from spacy.scorer import Scorer
from spacy.training import Corpus, Example, dont_augment
from spacy.tokens import Doc, Span
from ..utils import flatten_dict
def no_misc_getter(doc: Doc, attr: str) -> Iterable[Span]:
"""A utility getter for scoring entities without including MISC
Args:
doc (Doc): a SpaCy Doc
attr (str): attribute to be extracted
Returns:
Iterable[Span]
"""
spans = getattr(doc, attr)
for span in spans:
if span.label_ == "MISC":
continue
yield span
def dep_getter(token, attr):
dep = getattr(token, attr)
dep = token.vocab.strings.as_string(dep).lower()
return dep
def score(
corpus: Corpus,
apply_fn: Union[Callable[[Iterable[Example], List[Example]]], Language],
score_fn: List[Union[Callable[[Iterable[Example]], dict], str]] = [
"token",
"pos",
"ents",
"dep",
],
augmenters: List[Callable[[Language, Example], Iterable[Example]]] = [],
k: int = 1,
nlp: Optional[Language] = None,
**kwargs,
) -> pd.DataFrame:
"""scores a models performance on a given corpus with potentially augmentations applied to it.
Args:
corpus (Corpus): A spacy Corpus
apply_fn (Union[Callable, Language]): A wrapper function for the model you wish to score. The model should
take in a list of spacy Examples (Iterable[Example]) and output a tagged version of it (Iterable[Example]).
A SpaCy pipeline (Language) can be provided as is.
score_fn (List[Union[Callable[[Iterable[Example]], dict], str]], optional): A scoring function which takes in a list of
examples (Iterable[Example]) and return a dictionary of performance scores. Four potiential
strings are valid. "ents" for measuring the performance of entity spans. "pos" for measuring
the performance of fine-grained (tag_acc), and coarse-grained (pos_acc) pos-tags. "token" for measuring
the performance of tokenization. "dep" for measuring the performance of dependency parsing. "nlp" for measuring
the performance of all components in the specified nlp pipeline. Defaults to ["token", "pos", "ents", "dep"].
augmenters (List[Callable[[Language, Example], Iterable[Example]]], optional): A spaCy style augmenters
which should be applied to the corpus or a list thereof. defaults to [], indicating no augmenters.
k (int, optional): Number of times it should run the augmentation and test the performance on
the corpus. Defaults to 1.
nlp (Optional[Language], optional): A spacy processing pipeline. If None it will use an empty
Danish pipeline. Defaults to None. Used for loading the calling the corpus.
Returns:
pandas.DataFrame: returns a pandas dataframe containing the performance metrics.
Example:
>>> from spacy.training.augment import create_lower_casing_augmenter
>>> from dacy.datasets import dane
>>> test = dane(splits=["test")
>>> nlp = dacy.load("da_dacy_small_tft-0.0.0")
>>> scores = score(test, augmenter=[create_lower_casing_augmenter(0.5)], apply_fn = nlp)
"""
if callable(augmenters):
augmenters = [augmenters]
if len(augmenters) == 0:
augmenters = [dont_augment]
def __apply_nlp(examples):
examples = ((e.x.text, e.y) for e in examples)
doc_tuples = nlp_.pipe(examples, as_tuples=True)
return [Example(x, y) for x, y in doc_tuples]
if isinstance(apply_fn, Language):
nlp_ = apply_fn
apply_fn = __apply_nlp
if nlp is None:
from spacy.lang.da import Danish
nlp = Danish()
scorer = Scorer(nlp)
def ents_scorer(examples):
scores = Scorer.score_spans(examples, attr="ents")
scores_no_misc = Scorer.score_spans(
examples, attr="ents", getter=no_misc_getter
)
scores["ents_excl_MISC"] = {
k: scores_no_misc[k] for k in ["ents_p", "ents_r", "ents_f"]
}
return scores
def pos_scorer(examples):
scores = Scorer.score_token_attr(examples, attr="pos")
scores_ = Scorer.score_token_attr(examples, attr="tag")
for k in scores_:
scores[k] = scores_[k]
return scores
def_scorers = {
"ents": ents_scorer,
"pos": pos_scorer,
"token": Scorer.score_tokenization,
"nlp": scorer.score,
"dep": partial(
Scorer.score_deps,
attr="dep",
getter=dep_getter,
ignore_labels=("p", "punct"),
),
}
def __score(augmenter):
corpus_ = copy(corpus)
corpus_.augmenter = augmenter
scores_ls = []
for i in range(k):
s = time()
examples = apply_fn(corpus_(nlp))
speed = time() - s
scores = {"wall_time": speed}
for fn in score_fn:
if isinstance(fn, str):
fn = def_scorers[fn]
scores.update(fn(examples))
scores = flatten_dict(scores)
scores_ls.append(scores)
# and collapse list to dict
for key in scores.keys():
scores[key] = [s[key] if key in s else None for s in scores_ls]
scores["k"] = list(range(k))
return pd.DataFrame(scores)
for i, aug in enumerate(augmenters):
scores_ = __score(aug)
scores = pd.concat([scores, scores_]) if i != 0 else scores_
return scores