Skip to content

Commit

Permalink
Added english part-of-speech functionality. (#1648)
Browse files Browse the repository at this point in the history
English verbs, nouns, adjectives and adverbs can now specifically be generated.
  • Loading branch information
e-panourgia committed May 9, 2022
1 parent f210b3b commit 8b807eb
Show file tree
Hide file tree
Showing 23 changed files with 2,323 additions and 44 deletions.
44 changes: 23 additions & 21 deletions faker/providers/lorem/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class Provider(BaseProvider):
def words(
self,
nb: int = 3,
part_of_speech: str = None,
ext_word_list: Optional[Sequence[str]] = None,
unique: bool = False,
) -> List[str]:
Expand All @@ -40,6 +41,12 @@ def words(
without replacement. If ``unique`` is ``False``, |random_choices| is
used instead, and the list returned may contain duplicates.
``part_of_speech`` is a parameter that defines to what part of speech
the returned word belongs. If ``ext_word_list`` is not ``None``, then
``part_of_speech`` is ignored. If the value of ``part_of_speech`` does
not correspond to an existent part of speech according to the set locale,
then an exception is raised.
.. warning::
Depending on the length of a locale provider's built-in word list or
on the length of ``ext_word_list`` if provided, a large ``nb`` can
Expand All @@ -50,14 +57,23 @@ def words(
:sample: nb=5, ext_word_list=['abc', 'def', 'ghi', 'jkl']
:sample: nb=4, ext_word_list=['abc', 'def', 'ghi', 'jkl'], unique=True
"""
word_list = ext_word_list if ext_word_list else self.word_list # type: ignore[attr-defined]
if ext_word_list is not None:
word_list = ext_word_list
elif part_of_speech:
if part_of_speech not in self.parts_of_speech: # type: ignore[attr-defined]
raise ValueError(f"{part_of_speech} is not recognized as a part of speech.")
else:
word_list = self.parts_of_speech[part_of_speech] # type: ignore[attr-defined]
else:
word_list = self.word_list # type: ignore[attr-defined]

if unique:
unique_samples = cast(List[str], self.random_sample(word_list, length=nb))
return unique_samples
samples = cast(List[str], self.random_choices(word_list, length=nb))
return samples

def word(self, ext_word_list: Optional[Sequence[str]] = None) -> str:
def word(self, part_of_speech: str = None, ext_word_list: Optional[Sequence[str]] = None) -> str:
"""Generate a word.
This method uses |words| under the hood with the ``nb`` argument set to
Expand All @@ -66,13 +82,10 @@ def word(self, ext_word_list: Optional[Sequence[str]] = None) -> str:
:sample:
:sample: ext_word_list=['abc', 'def', 'ghi', 'jkl']
"""
return self.words(1, ext_word_list)[0]
return self.words(1, part_of_speech, ext_word_list)[0]

def sentence(
self,
nb_words: int = 6,
variable_nb_words: bool = True,
ext_word_list: Optional[Sequence[str]] = None,
self, nb_words: int = 6, variable_nb_words: bool = True, ext_word_list: Optional[Sequence[str]] = None
) -> str:
"""Generate a sentence.
Expand Down Expand Up @@ -116,10 +129,7 @@ def sentences(self, nb: int = 3, ext_word_list: Optional[Sequence[str]] = None)
return [self.sentence(ext_word_list=ext_word_list) for _ in range(0, nb)]

def paragraph(
self,
nb_sentences: int = 3,
variable_nb_sentences: bool = True,
ext_word_list: Optional[Sequence[str]] = None,
self, nb_sentences: int = 3, variable_nb_sentences: bool = True, ext_word_list: Optional[Sequence[str]] = None
) -> str:
"""Generate a paragraph.
Expand All @@ -145,12 +155,7 @@ def paragraph(
if variable_nb_sentences:
nb_sentences = self.randomize_nb_elements(nb_sentences, min=1)

para = self.word_connector.join(
self.sentences(
nb_sentences,
ext_word_list=ext_word_list,
)
)
para = self.word_connector.join(self.sentences(nb_sentences, ext_word_list=ext_word_list))

return para

Expand Down Expand Up @@ -225,10 +230,7 @@ def text(self, max_nb_chars: int = 200, ext_word_list: Optional[Sequence[str]] =
return "".join(text)

def texts(
self,
nb_texts: int = 3,
max_nb_chars: int = 200,
ext_word_list: Optional[Sequence[str]] = None,
self, nb_texts: int = 3, max_nb_chars: int = 200, ext_word_list: Optional[Sequence[str]] = None
) -> List[str]:
"""Generate a list of text strings.
Expand Down
4 changes: 4 additions & 0 deletions faker/providers/lorem/ar_AA/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Dict

from .. import Provider as LoremProvider


Expand Down Expand Up @@ -699,3 +701,5 @@ class Provider(LoremProvider):
"٢٠٠٤",
"٣٠",
)

parts_of_speech: Dict[str, tuple] = {}
4 changes: 4 additions & 0 deletions faker/providers/lorem/az_AZ/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Dict

from .. import Provider as LoremProvider


Expand Down Expand Up @@ -100,3 +102,5 @@ class Provider(LoremProvider):
"cümlə",
"böyük",
)

parts_of_speech: Dict[str, tuple] = {}
4 changes: 4 additions & 0 deletions faker/providers/lorem/bn_BD/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Dict

from .. import Provider as LoremProvider


Expand Down Expand Up @@ -231,3 +233,5 @@ class Provider(LoremProvider):
"কোনও",
"কিছু",
)

parts_of_speech: Dict[str, tuple] = {}
4 changes: 4 additions & 0 deletions faker/providers/lorem/cs_CZ/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Dict

from .. import Provider as LoremProvider


Expand Down Expand Up @@ -1965,3 +1967,5 @@ class Provider(LoremProvider):
"žák",
"žít",
)

parts_of_speech: Dict[str, tuple] = {}
4 changes: 4 additions & 0 deletions faker/providers/lorem/da_DK/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Dict

from .. import Provider as LoremProvider


Expand Down Expand Up @@ -1006,3 +1008,5 @@ class Provider(LoremProvider):
"programmmere",
"brygge",
)

parts_of_speech: Dict[str, tuple] = {}
4 changes: 4 additions & 0 deletions faker/providers/lorem/de_DE/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Dict

from .. import Provider as LoremProvider


Expand Down Expand Up @@ -521,3 +523,5 @@ class Provider(LoremProvider):
"über",
"überall",
)

parts_of_speech: Dict[str, tuple] = {}
4 changes: 4 additions & 0 deletions faker/providers/lorem/el_GR/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Dict

from .. import Provider as LoremProvider


Expand Down Expand Up @@ -389,3 +391,5 @@ class Provider(LoremProvider):
"ύψος",
"ώρα",
)

parts_of_speech: Dict[str, tuple] = {}
4 changes: 2 additions & 2 deletions faker/providers/lorem/en_PH/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ class Provider(LoremProvider):

def english_word(self) -> str:
"""Generate an English word."""
return self.word(self.english_word_list)
return self.word(ext_word_list=self.english_word_list)

def english_words(self, nb: int = 3, unique: bool = False) -> List[str]:
"""Generate a list of English words.
:sample: nb=5
:sample: nb=5, unique=True
"""
return self.words(nb, self.english_word_list, unique)
return self.words(nb=nb, ext_word_list=self.english_word_list, unique=unique)

def english_sentence(self, nb_words: int = 6, variable_nb_words: bool = True) -> str:
"""Generate a sentence in English.
Expand Down
Loading

0 comments on commit 8b807eb

Please sign in to comment.