Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add new metas Co-authored-by: Explosion Bot <contact@explosion.ai>
- Loading branch information
Showing
16 changed files
with
3,998 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
{ | ||
"lang": "de", | ||
"name": "core_news_md", | ||
"license": "MIT", | ||
"author": "Explosion", | ||
"url": "https://explosion.ai", | ||
"email": "contact@explosion.ai", | ||
"sources": [ | ||
{ | ||
"name": "TIGER Corpus", | ||
"url": "https://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/tiger.html", | ||
"license": "commercial (licensed by Explosion)" | ||
}, | ||
{ | ||
"name": "WikiNER", | ||
"url": "https://figshare.com/articles/Learning_multilingual_named_entity_recognition_from_Wikipedia/5462500", | ||
"license": "CC BY 4.0" | ||
} | ||
], | ||
"description": "German multi-task CNN trained on the TIGER and WikiNER corpus. Assigns context-specific token vectors, POS tags, dependency parse and named entities. Supports identification of PER, LOC, ORG and MISC entities.", | ||
"notes": "Because the model is trained on Wikipedia, it may perform inconsistently on many genres, such as social media text. The NER accuracy refers to the \"silver standard\" annotations in the WikiNER corpus. Accuracy on these annotations tends to be higher than correct human annotations.", | ||
"pipeline": [ | ||
"tagger", | ||
"parser", | ||
"ner" | ||
], | ||
"version": "2.2.0", | ||
"spacy_version": ">=2.2.0", | ||
"parent_package": "spacy", | ||
"accuracy": { | ||
"las": 89.363889358, | ||
"uas": 91.2568747384, | ||
"token_acc": 95.8813352983, | ||
"tags_acc": 96.4395314305, | ||
"ents_f": 83.4238864149, | ||
"ents_p": 83.7778142256, | ||
"ents_r": 83.0729364314 | ||
}, | ||
"speed": { | ||
"cpu": 7066.2062392444, | ||
"gpu": null, | ||
"nwords": 696811 | ||
}, | ||
"labels": { | ||
"tagger": [ | ||
"$(", | ||
"$,", | ||
"$.", | ||
"ADJA", | ||
"ADJD", | ||
"ADV", | ||
"APPO", | ||
"APPR", | ||
"APPRART", | ||
"APZR", | ||
"ART", | ||
"CARD", | ||
"FM", | ||
"ITJ", | ||
"KOKOM", | ||
"KON", | ||
"KOUI", | ||
"KOUS", | ||
"NE", | ||
"NN", | ||
"NNE", | ||
"PDAT", | ||
"PDS", | ||
"PIAT", | ||
"PIS", | ||
"PPER", | ||
"PPOSAT", | ||
"PPOSS", | ||
"PRELAT", | ||
"PRELS", | ||
"PRF", | ||
"PROAV", | ||
"PTKA", | ||
"PTKANT", | ||
"PTKNEG", | ||
"PTKVZ", | ||
"PTKZU", | ||
"PWAT", | ||
"PWAV", | ||
"PWS", | ||
"TRUNC", | ||
"VAFIN", | ||
"VAIMP", | ||
"VAINF", | ||
"VAPP", | ||
"VMFIN", | ||
"VMINF", | ||
"VMPP", | ||
"VVFIN", | ||
"VVIMP", | ||
"VVINF", | ||
"VVIZU", | ||
"VVPP", | ||
"XY", | ||
"_SP" | ||
], | ||
"parser": [ | ||
"ROOT", | ||
"ac", | ||
"adc", | ||
"ag", | ||
"ams", | ||
"app", | ||
"avc", | ||
"cc", | ||
"cd", | ||
"cj", | ||
"cm", | ||
"cp", | ||
"cvc", | ||
"da", | ||
"dep", | ||
"dm", | ||
"ep", | ||
"ju", | ||
"mnr", | ||
"mo", | ||
"ng", | ||
"nk", | ||
"nmc", | ||
"oa", | ||
"oc", | ||
"og", | ||
"op", | ||
"par", | ||
"pd", | ||
"pg", | ||
"ph", | ||
"pm", | ||
"pnc", | ||
"punct", | ||
"rc", | ||
"re", | ||
"rs", | ||
"sb", | ||
"sbp", | ||
"svp", | ||
"uc", | ||
"vo" | ||
], | ||
"ner": [ | ||
"LOC", | ||
"MISC", | ||
"ORG", | ||
"PER" | ||
] | ||
}, | ||
"vectors": { | ||
"width": 300, | ||
"vectors": 20000, | ||
"keys": 276087, | ||
"name": "de_core_news_md.vectors" | ||
}, | ||
"size": "214 MB", | ||
"checksum": "09f79aa1fb1fe2e36ca5172c4b17d824e8d6454b98b2e62fae85de5ec193c1d8" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
{ | ||
"lang": "de", | ||
"name": "core_news_sm", | ||
"license": "MIT", | ||
"author": "Explosion", | ||
"url": "https://explosion.ai", | ||
"email": "contact@explosion.ai", | ||
"sources": [ | ||
{ | ||
"name": "TIGER Corpus", | ||
"url": "https://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/tiger.html", | ||
"license": "commercial (licensed by Explosion)" | ||
}, | ||
{ | ||
"name": "WikiNER", | ||
"url": "https://figshare.com/articles/Learning_multilingual_named_entity_recognition_from_Wikipedia/5462500", | ||
"license": "CC BY 4.0" | ||
} | ||
], | ||
"description": "German multi-task CNN trained on the TIGER and WikiNER corpus. Assigns context-specific token vectors, POS tags, dependency parse and named entities. Supports identification of PER, LOC, ORG and MISC entities.", | ||
"notes": "Because the model is trained on Wikipedia, it may perform inconsistently on many genres, such as social media text. The NER accuracy refers to the \"silver standard\" annotations in the WikiNER corpus. Accuracy on these annotations tends to be higher than correct human annotations.", | ||
"pipeline": [ | ||
"tagger", | ||
"parser", | ||
"ner" | ||
], | ||
"version": "2.2.0", | ||
"spacy_version": ">=2.2.0", | ||
"parent_package": "spacy", | ||
"accuracy": { | ||
"las": 88.6286373015, | ||
"uas": 90.7476889414, | ||
"token_acc": 95.8813352983, | ||
"tags_acc": 96.2851014684, | ||
"ents_f": 83.1103988837, | ||
"ents_p": 83.5658931723, | ||
"ents_r": 82.6598432197 | ||
}, | ||
"speed": { | ||
"cpu": 7419.0226285104, | ||
"gpu": null, | ||
"nwords": 696811 | ||
}, | ||
"labels": { | ||
"tagger": [ | ||
"$(", | ||
"$,", | ||
"$.", | ||
"ADJA", | ||
"ADJD", | ||
"ADV", | ||
"APPO", | ||
"APPR", | ||
"APPRART", | ||
"APZR", | ||
"ART", | ||
"CARD", | ||
"FM", | ||
"ITJ", | ||
"KOKOM", | ||
"KON", | ||
"KOUI", | ||
"KOUS", | ||
"NE", | ||
"NN", | ||
"NNE", | ||
"PDAT", | ||
"PDS", | ||
"PIAT", | ||
"PIS", | ||
"PPER", | ||
"PPOSAT", | ||
"PPOSS", | ||
"PRELAT", | ||
"PRELS", | ||
"PRF", | ||
"PROAV", | ||
"PTKA", | ||
"PTKANT", | ||
"PTKNEG", | ||
"PTKVZ", | ||
"PTKZU", | ||
"PWAT", | ||
"PWAV", | ||
"PWS", | ||
"TRUNC", | ||
"VAFIN", | ||
"VAIMP", | ||
"VAINF", | ||
"VAPP", | ||
"VMFIN", | ||
"VMINF", | ||
"VMPP", | ||
"VVFIN", | ||
"VVIMP", | ||
"VVINF", | ||
"VVIZU", | ||
"VVPP", | ||
"XY", | ||
"_SP" | ||
], | ||
"parser": [ | ||
"ROOT", | ||
"ac", | ||
"adc", | ||
"ag", | ||
"ams", | ||
"app", | ||
"avc", | ||
"cc", | ||
"cd", | ||
"cj", | ||
"cm", | ||
"cp", | ||
"cvc", | ||
"da", | ||
"dep", | ||
"dm", | ||
"ep", | ||
"ju", | ||
"mnr", | ||
"mo", | ||
"ng", | ||
"nk", | ||
"nmc", | ||
"oa", | ||
"oc", | ||
"og", | ||
"op", | ||
"par", | ||
"pd", | ||
"pg", | ||
"ph", | ||
"pm", | ||
"pnc", | ||
"punct", | ||
"rc", | ||
"re", | ||
"rs", | ||
"sb", | ||
"sbp", | ||
"svp", | ||
"uc", | ||
"vo" | ||
], | ||
"ner": [ | ||
"LOC", | ||
"MISC", | ||
"ORG", | ||
"PER" | ||
] | ||
}, | ||
"size": "14 MB", | ||
"checksum": "8b79574382b1e06b24f67e76d652d60d00750405c23e7a078dc4cc53aad5e219" | ||
} |
Oops, something went wrong.