From 1d8ad1862ca01c1e1393815fb7084d7c1208bd9c Mon Sep 17 00:00:00 2001 From: "Panagiotis H.M. Issaris" Date: Mon, 20 Mar 2023 17:48:58 +0100 Subject: [PATCH] Add lorem provider for `nl_BE` (#1820) --- faker/providers/lorem/nl_BE/__init__.py | 1015 +++++++++++++++++++++++ faker/providers/lorem/nl_NL/__init__.py | 14 + tests/providers/test_lorem.py | 73 ++ 3 files changed, 1102 insertions(+) create mode 100644 faker/providers/lorem/nl_BE/__init__.py create mode 100644 faker/providers/lorem/nl_NL/__init__.py diff --git a/faker/providers/lorem/nl_BE/__init__.py b/faker/providers/lorem/nl_BE/__init__.py new file mode 100644 index 0000000000..efc33ef961 --- /dev/null +++ b/faker/providers/lorem/nl_BE/__init__.py @@ -0,0 +1,1015 @@ +from typing import Dict + +from .. import Provider as LoremProvider + + +class Provider(LoremProvider): + """Implement lorem provider for ``nl_BE`` locale. + + Source: https://nl.wiktionary.org/wiki/WikiWoordenboek:Lijst_met_1000_basiswoorden + """ + + word_list = ( + "aan", + "aanbod", + "aanraken", + "aanval", + "aap", + "aardappel", + "aarde", + "aardig", + "acht", + "achter", + "actief", + "activiteit", + "ademen", + "af", + "afgelopen", + "afhangen", + "afmaken", + "afname", + "afspraak", + "afval", + "al", + "algemeen", + "alleen", + "alles", + "als", + "alsjeblieft", + "altijd", + "ander", + "andere", + "anders", + "angst", + "antwoord", + "antwoorden", + "appel", + "arm", + "auto", + "avond", + "avondeten", + "baan", + "baby", + "bad", + "bal", + "bang", + "bank", + "basis", + "bed", + "bedekken", + "bedreiging", + "bedreven", + "been", + "beer", + "beest", + "beetje", + "begin", + "begrijpen", + "begrip", + "behalve", + "beide", + "beker", + "bel", + "belangrijk", + "bellen", + "belofte", + "beneden", + "benzine", + "berg", + "beroemd", + "beroep", + "bescherm", + "beslissen", + "best", + "betalen", + "beter", + "bevatten", + "bewegen", + "bewolkt", + "bezoek", + "bibliotheek", + "bieden", + "bij", + "bijna", + "bijten", + "bijvoorbeeld", + "bijzonder", + "binnen", + "binnenkort", + "blad", + "blauw", + "blazen", + "blij", + "blijven", + "bloed", + "bloem", + "bodem", + "boek", + "boerderij", + "boete", + "boom", + "boon", + "boord", + "boos", + "bord", + "borstelen", + "bos", + "bot", + "bouwen", + "boven", + "branden", + "brandstof", + "breed", + "breken", + "brengen", + "brief", + "broer", + "broek", + "brood", + "brug", + "bruikbaar", + "bruiloft", + "bruin", + "bui", + "buiten", + "bureau", + "buren", + "bus", + "buurman", + "buurvrouw", + "cadeau", + "chocolade", + "cirkel", + "comfortabel", + "compleet", + "computer", + "conditie", + "controle", + "cool", + "correct", + "daar", + "daarom", + "dag", + "dak", + "dan", + "dansen", + "dapper", + "dat", + "de", + "deel", + "deken", + "deksel", + "delen", + "derde", + "deze", + "dichtbij", + "dienen", + "diep", + "dier", + "dik", + "ding", + "dit", + "dochter", + "doen", + "dom", + "donker", + "dood", + "door", + "doorzichtig", + "doos", + "dorp", + "draad", + "draaien", + "dragen", + "drie", + "drijven", + "drinken", + "drogen", + "dromen", + "droog", + "druk", + "dubbel", + "dun", + "dus", + "duur", + "duwen", + "echt", + "een", + "één", + "eend", + "eenheid", + "eenzaam", + "eerste", + "eeuw", + "effect", + "ei", + "eigen", + "eiland", + "einde", + "eis", + "elektrisch", + "elk", + "en", + "enkele", + "enthousiast", + "erg", + "eten", + "even", + "examen", + "extreem", + "falen", + "familie", + "feest", + "feit", + "fel", + "fijn", + "film", + "fit", + "fles", + "foto", + "fout", + "fris", + "fruit", + "gaan", + "gat", + "gebeuren", + "gebeurtenis", + "gebied", + "geboorte", + "geboren", + "gebruik", + "gebruikelijk", + "gebruiken", + "gedrag", + "gedragen", + "geel", + "geen", + "gehoorzamen", + "geit", + "geld", + "geliefde", + "gelijk", + "geloof", + "geluid", + "geluk", + "gemak", + "gemakkelijk", + "gemeen", + "genieten", + "genoeg", + "genot", + "gerecht", + "gereedschap", + "geschikt", + "gespannen", + "geur", + "gevaar", + "gevaarlijk", + "gevangenis", + "geven", + "gevolg", + "gewicht", + "gewoon", + "gezicht", + "gezond", + "gif", + "gisteren", + "glad", + "glas", + "glimlach", + "god", + "goed", + "goedkoop", + "goud", + "graf", + "grap", + "grappig", + "gras", + "grens", + "grijs", + "groeien", + "groen", + "groente", + "groep", + "grof", + "grond", + "groot", + "grootmoeder", + "grootvader", + "haan", + "haar", + "haast", + "hal", + "halen", + "half", + "hallo", + "hamer", + "hand", + "hard", + "hart", + "haten", + "hebben", + "heel", + "heet", + "helder", + "helft", + "help", + "hem", + "hemel", + "hen", + "herfst", + "herinneren", + "hert", + "het", + "heuvel", + "hier", + "hij", + "hobby", + "hoe", + "hoed", + "hoek", + "hoeveel", + "hoeveelheid", + "hoewel", + "hond", + "honderd", + "honger", + "hoofd", + "hoog", + "hoogte", + "hoop", + "horen", + "hotel", + "houden", + "huilen", + "huis", + "hun", + "huren", + "hut", + "huur", + "idee", + "ieder", + "iedereen", + "iemand", + "iets", + "ijs", + "ijzer", + "ik", + "in", + "instrument", + "ja", + "jaar", + "jagen", + "jas", + "jij", + "jong", + "jongen", + "jouw", + "jullie", + "kaars", + "kaart", + "kaas", + "kamer", + "kans", + "kant", + "kantoor", + "kap", + "kast", + "kasteel", + "kat", + "kennen", + "kennis", + "keuken", + "keus", + "kiezen", + "kijken", + "kind", + "kip", + "kist", + "klaar", + "klas", + "klasse", + "kleden", + "klein", + "kleren", + "kleur", + "klimmen", + "klok", + "kloppen", + "klopt", + "knie", + "knippen", + "koers", + "koffer", + "koffie", + "kok", + "koken", + "kom", + "komen", + "koning", + "koningin", + "koorts", + "kop", + "kopen", + "kort", + "kost", + "kosten", + "koud", + "kraam", + "kracht", + "krant", + "krijgen", + "kruis", + "kuil", + "kunnen", + "kunst", + "laag", + "laat", + "laatst", + "lach", + "lachen", + "ladder", + "laken", + "lamp", + "land", + "lang", + "langs", + "langzaam", + "laten", + "leeftijd", + "leeg", + "leerling", + "leeuw", + "leger", + "leiden", + "lenen", + "lengte", + "lepel", + "leren", + "les", + "leuk", + "leven", + "lezen", + "lichaam", + "licht", + "liefde", + "liegen", + "liggen", + "lijk", + "lijken", + "liniaal", + "links", + "lip", + "list", + "lomp", + "lood", + "lopen", + "los", + "lot", + "lucht", + "lui", + "luisteren", + "lunch", + "maag", + "maal", + "maaltijd", + "maan", + "maand", + "maar", + "maat", + "machine", + "maken", + "makkelijk", + "mama", + "man", + "mand", + "manier", + "map", + "markeren", + "markt", + "me", + "medicijn", + "meel", + "meer", + "meerdere", + "meest", + "meisje", + "melk", + "meneer", + "mengsel", + "mensen", + "mes", + "met", + "meubel", + "mevrouw", + "middel", + "midden", + "mij", + "mijn", + "miljoen", + "min", + "minder", + "minuut", + "mis", + "missen", + "mits", + "model", + "modern", + "moeder", + "moeilijk", + "moeten", + "mogelijk", + "mogen", + "moment", + "mond", + "mooi", + "moord", + "moorden", + "morgen", + "munt", + "muziek", + "na", + "naald", + "naam", + "naar", + "naast", + "nacht", + "nat", + "natuur", + "natuurlijk", + "nee", + "neer", + "negen", + "nek", + "nemen", + "net", + "netjes", + "neus", + "niet", + "niets", + "nieuw", + "nieuws", + "nobel", + "noch", + "nodig", + "noemen", + "nog", + "nood", + "nooit", + "noord", + "noot", + "normaal", + "nu", + "nul", + "nummer", + "object", + "oceaan", + "ochtend", + "oefening", + "of", + "offer", + "olie", + "olifant", + "om", + "oma", + "onder", + "onderwerp", + "onderzoek", + "oneven", + "ongeluk", + "ons", + "ontsnappen", + "ontbijt", + "ontdekken", + "ontmoeten", + "ontvangen", + "ontwikkelen", + "onze", + "oog", + "ooit", + "ook", + "oom", + "oor", + "oorlog", + "oorzaak", + "oost", + "op", + "opa", + "opeens", + "open", + "openlijk", + "opleiding", + "opnemen", + "oranje", + "orde", + "oud", + "ouder", + "over", + "overal", + "overeenkomen", + "overleden", + "overvallen", + "paar", + "paard", + "pad", + "pagina", + "pan", + "papa", + "papier", + "park", + "partner", + "pas", + "passeren", + "pen", + "peper", + "per", + "perfect", + "periode", + "persoon", + "piano", + "pijn", + "pistool", + "plaat", + "plaatje", + "plaats", + "plafond", + "plank", + "plant", + "plastic", + "plat", + "plattegrond", + "plein", + "plus", + "poes", + "politie", + "poort", + "populair", + "positie", + "postzegel", + "potlood", + "praten", + "presenteren", + "prijs", + "prins", + "prinses", + "privé", + "proberen", + "probleem", + "product", + "provincie", + "publiek", + "punt", + "raak", + "raam", + "radio", + "raken", + "rapport", + "recht", + "rechtdoor", + "rechts", + "rechtvaardig", + "redden", + "reeds", + "regen", + "reiken", + "reizen", + "rekenmachine", + "rennen", + "repareren", + "rest", + "restaurant", + "resultaat", + "richting", + "rijk", + "rijst", + "rijzen", + "ring", + "rok", + "rond", + "rood", + "rook", + "rots", + "roze", + "rubber", + "ruiken", + "ruimte", + "samen", + "sap", + "schaap", + "schaar", + "schaduw", + "scheiden", + "scherp", + "schetsen", + "schieten", + "schijnen", + "schip", + "school", + "schoon", + "schouder", + "schreeuw", + "schreeuwen", + "schrijven", + "schudden", + "seconde", + "sex", + "signaal", + "simpel", + "sinds", + "slaapkamer", + "slapen", + "slecht", + "sleutel", + "slim", + "slot", + "sluiten", + "smaak", + "smal", + "sneeuw", + "snel", + "snelheid", + "snijden", + "soep", + "sok", + "soms", + "soort", + "sorry", + "speciaal", + "spel", + "spelen", + "sport", + "spreken", + "springen", + "staal", + "stad", + "stap", + "start", + "station", + "steen", + "stelen", + "stem", + "stempel", + "ster", + "sterk", + "steun", + "stil", + "stilte", + "stoel", + "stof", + "stoffig", + "stom", + "stop", + "storm", + "straat", + "straffen", + "structuur", + "student", + "studie", + "stuk", + "succes", + "suiker", + "taal", + "taart", + "tafel", + "tak", + "tamelijk", + "tand", + "tante", + "tas", + "taxi", + "te", + "team", + "teen", + "tegen", + "teken", + "tekenen", + "telefoon", + "televisie", + "tellen", + "tennis", + "terug", + "terugkomst", + "terwijl", + "test", + "tevreden", + "thee", + "thuis", + "tien", + "tijd", + "titel", + "toekomst", + "toen", + "toename", + "totaal", + "traan", + "tram", + "trein", + "trekken", + "trouwen", + "trui", + "tuin", + "tussen", + "tweede", + "u", + "uit", + "uitleggen", + "uitnodigen", + "uitvinden", + "uitzoeken", + "uur", + "vaak", + "vaarwel", + "vader", + "vak", + "vakantie", + "vallen", + "vals", + "van", + "vandaag", + "vangen", + "vanmorgen", + "vannacht", + "varken", + "vast", + "vechten", + "veel", + "veer", + "veilig", + "ver", + "veranderen", + "verandering", + "verder", + "verdienen", + "verdrietig", + "verenigen", + "verf", + "vergelijkbaar", + "vergelijken", + "vergelijking", + "vergeten", + "vergeven", + "vergissen", + "verhaal", + "verhoging", + "verjaardag", + "verkeerd", + "verkopen", + "verlaten", + "verleden", + "verliezen", + "vernietigen", + "veroveren", + "verrassen", + "vers", + "verschil", + "verschrikkelijk", + "verspreiden", + "verstand", + "verstoppen", + "versturen", + "vertellen", + "vertrekken", + "vertrouwen", + "verwachten", + "verwijderen", + "verzamelen", + "verzameling", + "vet", + "vier", + "vierkant", + "vies", + "vijand", + "vijf", + "vijver", + "vinden", + "vinger", + "vis", + "vlag", + "vlees", + "vlieg", + "vliegtuig", + "vloer", + "voeden", + "voedsel", + "voelen", + "voet", + "voetbal", + "vogel", + "vol", + "volgende", + "volgorde", + "voor", + "voorbeeld", + "voorkomen", + "voorzichtig", + "voorzien", + "vork", + "vorm", + "vos", + "vouwen", + "vraag", + "vragen", + "vrede", + "vreemd", + "vreemde", + "vriend", + "vriendelijk", + "vriezen", + "vrij", + "vrijheid", + "vroeg", + "vroeger", + "vrouw", + "vullen", + "vuur", + "waar", + "waarom", + "waarschijnlijk", + "wachten", + "wakker", + "wanneer", + "want", + "wapen", + "warm", + "wassen", + "wat", + "water", + "we", + "week", + "weer", + "weg", + "welke", + "welkom", + "wens", + "wereld", + "werelddeel", + "werk", + "west", + "wetenschap", + "wie", + "wiel", + "wij", + "wijn", + "wijs", + "wild", + "willen", + "wind", + "winkel", + "winnen", + "winter", + "wissen", + "wit", + "wolf", + "wolk", + "wonder", + "woord", + "woud", + "wreed", + "zaak", + "zacht", + "zak", + "zand", + "zee", + "zeep", + "zeer", + "zeggen", + "zeil", + "zeker", + "zelfde", + "zes", + "zetten", + "zeven", + "ziek", + "ziekenhuis", + "ziel", + "zien", + "zij", + "zijn", + "zilver", + "zingen", + "zinken", + "zitten", + "zo", + "zoals", + "zoeken", + "zoet", + "zomer", + "zon", + "zonder", + "zonnig", + "zoon", + "zorg", + "zorgen", + "zou", + "zout", + "zuid", + "zulke", + "zullen", + "zus", + "zwaar", + "zwak", + "zwembad", + "zwemmen", + ) + + parts_of_speech: Dict[str, tuple] = {} diff --git a/faker/providers/lorem/nl_NL/__init__.py b/faker/providers/lorem/nl_NL/__init__.py new file mode 100644 index 0000000000..0c8e88e9c6 --- /dev/null +++ b/faker/providers/lorem/nl_NL/__init__.py @@ -0,0 +1,14 @@ +from typing import Dict + +from .. import Provider as LoremProvider +from ..nl_BE import Provider as LoremProviderNL_BE + + +class Provider(LoremProvider): + """Implement lorem provider for ``nl_NL`` locale. + + Source: https://nl.wiktionary.org/wiki/WikiWoordenboek:Lijst_met_1000_basiswoorden + """ + + word_list = LoremProviderNL_BE.word_list + parts_of_speech: Dict[str, tuple] = {} diff --git a/tests/providers/test_lorem.py b/tests/providers/test_lorem.py index 0ea3793350..40761a27f0 100644 --- a/tests/providers/test_lorem.py +++ b/tests/providers/test_lorem.py @@ -9,6 +9,7 @@ from faker.providers.lorem.de_DE import Provider as DeDeLoremProvider from faker.providers.lorem.en_US import Provider as EnUsLoremProvider from faker.providers.lorem.fa_IR import Provider as FaIrLoremProvider +from faker.providers.lorem.nl_BE import Provider as NlBeLoremProvider class TestLoremProvider: @@ -572,3 +573,75 @@ def test_words(self, faker, num_samples): for _ in range(num_samples): words = faker.words(num_words) assert all(isinstance(word, str) and word in DeAtLoremProvider.word_list for word in words) + + +class TestNlBe: + """Test ```nl_BE``` lorem provider + + Copied from the TestDeDe class, but with the word_list from the NlBeLoremProvider. + """ + + word_list = [word.lower() for word in NlBeLoremProvider.word_list] + + def test_paragraph(self, faker, num_samples): + num_sentences = 10 + for _ in range(num_samples): + paragraph = faker.paragraph(nb_sentences=num_sentences) + assert isinstance(paragraph, str) + words = paragraph.replace(".", "").split() + assert all(word.lower() in self.word_list for word in words) + + def test_paragraphs(self, faker, num_samples): + num_paragraphs = 5 + for _ in range(num_samples): + paragraphs = faker.paragraphs(nb=num_paragraphs) + for paragraph in paragraphs: + assert isinstance(paragraph, str) + words = paragraph.replace(".", "").split() + assert all(word.lower() in self.word_list for word in words) + + def test_sentence(self, faker, num_samples): + num_words = 10 + for _ in range(num_samples): + sentence = faker.sentence(nb_words=num_words) + assert isinstance(sentence, str) + words = sentence.replace(".", "").split() + assert all(word.lower() in self.word_list for word in words) + + def test_sentences(self, faker, num_samples): + num_sentences = 5 + for _ in range(num_samples): + sentences = faker.sentences(nb=num_sentences) + for sentence in sentences: + assert isinstance(sentence, str) + words = sentence.replace(".", "").split() + assert all(word.lower() in self.word_list for word in words) + + def test_text(self, faker, num_samples): + num_chars = 25 + for _ in range(num_samples): + text = faker.text(max_nb_chars=num_chars) + assert isinstance(text, str) + words = re.sub(r"[.\n]+", " ", text).split() + assert all(word.lower() in self.word_list for word in words) + + def test_texts(self, faker, num_samples): + num_texts = 5 + num_chars = 25 + for _ in range(num_samples): + texts = faker.texts(max_nb_chars=num_chars, nb_texts=num_texts) + for text in texts: + assert isinstance(text, str) + words = re.sub(r"[.\n]+", " ", text).split() + assert all(word.lower() in self.word_list for word in words) + + def test_word(self, faker, num_samples): + for _ in range(num_samples): + word = faker.word() + assert isinstance(word, str) and word in NlBeLoremProvider.word_list + + def test_words(self, faker, num_samples): + num_words = 5 + for _ in range(num_samples): + words = faker.words(num_words) + assert all(isinstance(word, str) and word in NlBeLoremProvider.word_list for word in words)