diff --git a/docs/models/pos.md b/docs/models/pos.md index 7158f83..a848f55 100644 --- a/docs/models/pos.md +++ b/docs/models/pos.md @@ -1,6 +1,6 @@ Part of Speech Tagging ====================== -This section is concerned with public available Part of Speech taggers in Danish. +This section is concerned with public available Part of Speech (POS) taggers in Danish. | Model | Train Data | License | Trained by | Tags | DaNLP | |-------|-------|-------|-------|-------|-------| @@ -10,7 +10,7 @@ This section is concerned with public available Part of Speech taggers in Danish The Danish UD treebank uses 17 [universal part of speech tags](): -`ADJ`: Adjective, `ADP`: Adposition , `ADV`: Adverb, `AUX`: Auxiliary verb, `CONJ`: Coordinating conjunction, `DET`: Determiner, `INTJ`: Interjection, `NOUN`: Noun, `NUM`: Numeral, `PART`: Particle `PRON`: Pronoun `PROPN`: Proper noun `PUNCT`: Punctuation `SCONJ`: Subordinating conjunction `SYM`: Symbol `VERB`: Verb `X`: Other +`ADJ`: Adjective, `ADP`: Adposition , `ADV`: Adverb, `AUX`: Auxiliary verb, `CCONJ`: Coordinating conjunction, `DET`: Determiner, `INTJ`: Interjection, `NOUN`: Noun, `NUM`: Numeral, `PART`: Particle `PRON`: Pronoun `PROPN`: Proper noun `PUNCT`: Punctuation `SCONJ`: Subordinating conjunction `SYM`: Symbol `VERB`: Verb `X`: Other A medium blog using Part of Speech tagging on Danish, can be found [here](). @@ -23,7 +23,7 @@ This project provides a trained part of speech tagging model for Danish using th The code for training can be found on Flairs GitHub, and the following parameters are set: `learning_rate=1`, `mini_batch_size=32`, `max_epochs=150`, `hidden_size=256`. -The flair pos tagger can be used by loading it with the `load_flair_pos_model` method. Please note the the text should be tokenized before hand, this can for example be done using spacy. +The flair pos tagger can be used by loading it with the `load_flair_pos_model` method. Please note that the text should be tokenized before hand, this can for example be done using spacy. ```python from danlp.models import load_flair_pos_model @@ -47,7 +47,7 @@ print(sentence.to_tagged_string()) ##### :wrench:SpaCy -Read more about the spaCy model in the dedicated [spaCy docs]() , it has also been trained using the the data [Danish Dependency Treebank]() . +Read more about the spaCy model in the dedicated [spaCy docs]() , it has also been trained using the [Danish Dependency Treebank]() data. Below is a small getting started snippet for using the Spacy pos tagger: @@ -75,13 +75,28 @@ Read more about the polyglot model [here]() folder, where the details score from each class is calculated. +Accuracy scores is reported below and can be reproduced using `pos_benchmarks.py` in the [example]() folder, where the details score from each class is calculated. + +#### DaNLP + +| Model | Accuracy | +|-----------------------------|------------| +| Flair | **97.97** | +| SpaCy | 96.15 | + +#### Polyglot + +The tags predicted with the polyglot model differ slightly from the universal PoS-tags. The model predicts : +* `CONJ` instead of `CCONJ` +* `VERB` instead of `AUX` for the auxiliary and modal verbs (i.e. `være`, `have`, `kunne`, `ville`, `skulle`, `måtte`, `burde`) + +We calculated the scores for the original predictions and for the corrected version. + +| Model | Accuracy | +| -------- | ---------- | +| Polyglot | 76.76 | +| Polyglot (corrected output) | 83.4 | -| Model | Micro-F1 | -| -------- | ---------- | -| Polyglot | 0.7380 | -| Flair | **0.9667** | -| SpaCy | 0.9550 | diff --git a/examples/benchmarks/pos_benchmarks.py b/examples/benchmarks/pos_benchmarks.py index 953de3e..50912b7 100644 --- a/examples/benchmarks/pos_benchmarks.py +++ b/examples/benchmarks/pos_benchmarks.py @@ -6,9 +6,9 @@ from danlp.datasets import DDT from danlp.models import load_spacy_model, load_flair_pos_model -from seqeval.metrics import classification_report +from tabulate import tabulate -# bechmarking polyglotmodel requires +# benchmarking polyglotmodel requires from polyglot.tag import POSTagger from polyglot.text import WordList @@ -27,8 +27,38 @@ sentences_tokens = [] for sent in ccorpus_conll[2]: sentences_tokens.append([token.form for token in sent._tokens]) - - + + + +def print_accuracy_scores(tags_true, tags_pred): + + # flatening tags lists + tags_true = [tag for sent in tags_true for tag in sent] + tags_pred = [tag for sent in tags_pred for tag in sent] + + # list of all tags + labels = sorted(list(set(tags_true))) + + headers = ["label", "accuracy", "support"] + tab = [] + correct_tags = {l:[] for l in labels} + # counting correct predictions per tag + for label in labels: + correct_tags[label] = [t == p for t, p in zip(tags_true, tags_pred) if t == label] + acc = round(sum(correct_tags[label])/len(correct_tags[label])*100, 2) if len(correct_tags[label])>0 else 0 + tab.append([label, acc, len(correct_tags[label])]) + tab.append(['', '', '']) + total_examples = sum(len(correct_tags[l]) for l in correct_tags) + + micro_acc = round( sum(sum(correct_tags[l]) for l in correct_tags) / total_examples *100, 2) + tab.append(["micro average", micro_acc, total_examples]) + + macro_acc = round( sum([sum(correct_tags[l])/len(correct_tags[l]) for l in labels if len(correct_tags[l])>0]) / len(labels) *100, 2) + tab.append(["macro average", macro_acc, total_examples]) + + print("\n", tabulate(tab, headers=headers, colalign=["right", "decimal", "right"]), "\n") + + def benchmark_flair_mdl(): tagger = load_flair_pos_model() @@ -43,11 +73,7 @@ def benchmark_flair_mdl(): assert len(tags_pred)==num_sentences assert sum([len(s) for s in tags_pred])==num_tokens - - print(classification_report(tags_true, tags_pred, - digits=4)) - - + print_accuracy_scores(tags_true, tags_pred) def benchmark_spacy_mdl(): @@ -74,48 +100,56 @@ def benchmark_spacy_mdl(): assert len(tags_pred)==num_sentences assert sum([len(s) for s in tags_pred])==num_tokens - - print(classification_report(tags_true, tags_pred, - digits=4)) - + print_accuracy_scores(tags_true, tags_pred) - -def benchmark_polyglot_mdl(): + +auxiliary_verbs = ["være", "er", "var", "været"] +auxiliary_verbs += ["have", "har", "havde", "haft"] +auxiliary_verbs += ["kunne", "kan", "kunnet"] +auxiliary_verbs += ["ville", "vil", "villet"] +auxiliary_verbs += ["skulle", "skal", "skullet"] +auxiliary_verbs += ["måtte", "må", "måttet"] +auxiliary_verbs += ["burde", "bør", "burdet"] + +def benchmark_polyglot_mdl(corrected_output=False): """ - Running ployglot requires these packages: + Running polyglot requires these packages: # Morfessor==2.0.6 # PyICU==2.4.2 # pycld2==0.41 # polyglot """ + + def udify_tag(tag, word): + if tag == "CONJ": + return "CCONJ" + if tag == "VERB" and word in auxiliary_verbs: + return "AUX" + return tag start = time.time() tags_pred = [] for tokens in sentences_tokens: word_list = WordList(tokens, language='da') - ne_chunker = POSTagger(lang='da') - word_ent_tuples = list(ne_chunker.annotate(word_list)) - - tags_pred.append([entity for word, entity in word_ent_tuples]) - print('**Polyglot model**') + tagger = POSTagger(lang='da') + word_tag_tuples = list(tagger.annotate(word_list)) + tags_pred.append([udify_tag(tag, word) if corrected_output else tag for word, tag in word_tag_tuples]) + print('**Polyglot model'+(' (corrected output) ' if corrected_output else '')+'**') print("Made predictions on {} sentences and {} tokens in {}s".format( num_sentences, num_tokens, time.time() - start)) - + assert len(tags_pred)==num_sentences assert sum([len(s) for s in tags_pred])==num_tokens - - - print(classification_report(tags_true, tags_pred, - digits=4)) - - - - - - + + print_accuracy_scores(tags_true, tags_pred) + + + + if __name__ == '__main__': benchmark_polyglot_mdl() + benchmark_polyglot_mdl(corrected_output=True) benchmark_spacy_mdl() benchmark_flair_mdl() \ No newline at end of file