CodE AlltagS is part of CodE Alltag, a German-language email corpus. It has been collected via voluntary email donation without restricting the type or topic of the emails and contains 800 pseudonymized emails from 460 different donors (mostly German native speakers). For pseudonymization, privacy-sensitive text spans were annotated manually before substituting them with realistic surrogates automatically.
If you use CodE Alltag, please cite:
@inproceedings{krieg-holz-etal-2016,
title = "{C}od{E} Alltag: A {G}erman-Language {E}-Mail Corpus",
author = "Krieg-Holz, Ulrike and
Schuschnig, Christian and
Matthies, Franz and
Redling, Benjamin and
Hahn, Udo",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/L16-1404",
pages = "2543--2550",
}
@inproceedings{eder-etal-2020,
title = "{C}od{E} Alltag 2.0 {---} A Pseudonymized {G}erman-Language Email Corpus",
author = "Eder, Elisabeth and
Krieg-Holz, Ulrike and
Hahn, Udo",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.550",
pages = "4466--4477",
}