From 72af029aa773856b77c6acf168e1758a6f4efb46 Mon Sep 17 00:00:00 2001 From: vgautam Date: Sun, 17 May 2020 15:13:52 -0700 Subject: [PATCH] Dump pandas requirement --- README.md | 8 +++--- requirements.txt | 1 - src/syllabifier/syllabifyARPA.py | 18 +++---------- tests/test_syllabifier.py | 46 ++++++++++++++++---------------- 4 files changed, 30 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 13e5230..5d2cf07 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,12 @@ Syllabifier is a Python module to syllabify your English pronunciations. Currently only ARPABET syllabification is supported. -It will take an ARPABET transcription in array or string form and return a Pandas Series or Python -list with the syllables chunked. +It will take an ARPABET transcription in array or string form and return a list with the syllables +chunked. ## Dependencies * python>=3.5 -* pandas>=0.20.0 * jupyter>=1.0.0 (only if you want to run the test notebook locally) ## How to Use @@ -17,7 +16,6 @@ list with the syllables chunked. * Import the function `from syllabifier import syllabifyARPA`. * Function parameters * A 2-letter ARPABET transcription in string form (with phones delimited by spaces) or as a Python list (stress markers on the vowels are optional) - * (Optional) bool return_list to return in Python list form instead of the default Pandas Series * (Optional) bool silence_warnings to suppress ValueErrors thrown because of unsyllabifiable input * Sample calls are in the Jupyter Notebook test.ipynb, using CMU Pronouncing Dictionary data. @@ -26,7 +24,7 @@ list with the syllables chunked. * **tests/test.ipynb**: Jupyter Notebook demonstrating sample calls to syllabifyARPA using CMUDict data * **tests/cmudict.txt**: Very large text file containing over 100,000 ARPABET-syllabified English words * **tests/cmusubset.txt**: Subset of ~60 words and transcriptions from the CMU Dictionary text file for testing convenience -* **tests/test_syllabifyARPA.py**: Unit and integration tests for the package +* **tests/test_syllabifier.py**: Unit and integration tests for the package ## ARPABET ARPABET is a method of transcribing General American English phonetically with only ASCII characters. Refer [here](https://en.wikipedia.org/wiki/ARPABET) for a table of mappings between IPA and ARPABET. This syllabifier accepts only the 2-letter ARPABET codes but case does not matter. diff --git a/requirements.txt b/requirements.txt index 3ec47b3..e69de29 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +0,0 @@ -pandas>=0.20.0 diff --git a/src/syllabifier/syllabifyARPA.py b/src/syllabifier/syllabifyARPA.py index 92a1064..2d9ed49 100644 --- a/src/syllabifier/syllabifyARPA.py +++ b/src/syllabifier/syllabifyARPA.py @@ -8,7 +8,6 @@ # Vasundhara Gautam # October 3rd, 2017 -import pandas as pd import re import sys @@ -38,7 +37,7 @@ # Optional stress markers (0,1,2) after the vowel for flexibility VOWELS_REGEX = re.compile(r'(?:AA|AE|AH|AO|AW|AY|EH|ER|EY|IH|IY|OW|OY|UW|UH)[012]?') -def syllabifyARPA(arpa_arr, return_list=False, silence_warnings=False): +def syllabifyARPA(arpa_arr, silence_warnings=False): """ Syllabifies ARPABET transcriptions according to General American English syllabification rules. @@ -46,15 +45,11 @@ def syllabifyARPA(arpa_arr, return_list=False, silence_warnings=False): Args: arpa_arr: A string or array of ARPABET phones with optional stress markers on the vowels. - return_list: Boolean (default False) to return list of syllable strings silence_warnings: Boolean (default False) to suppress ValueErrors Returns: - Pandas Series of dtype 'Object' with syllables in each row. - If return_list set to True, returns a Python list of strings containing - the syllables. - In case the input is unsyllabifiable, an empty Series or list is - returned. + List of strings with syllables in each row. + In case the input is unsyllabifiable, an empty list is returned. Raises: ValueError if input contains non-ARPABET phonemes, no vowels or if it @@ -65,9 +60,7 @@ def handleError(string): if not silence_warnings: raise ValueError(string) - ret = [] #pd.Series(None) - if return_list: - ret = [] + ret = [] try: arpa_arr = arpa_arr.split() # Allows for phoneme array and string input @@ -116,10 +109,7 @@ def handleError(string): 'syllabification rules.' % word) return ret - #ret = pd.Series([' '.join(syllable) for syllable in final_arr]) ret = [' '.join(syllable) for syllable in final_arr] - if return_list: - ret = list(ret) return ret diff --git a/tests/test_syllabifier.py b/tests/test_syllabifier.py index d3f055a..8ddc088 100644 --- a/tests/test_syllabifier.py +++ b/tests/test_syllabifier.py @@ -11,57 +11,57 @@ def test_syllabifyARPA(): test_string = 'HH AE NG M AE N' - assert syllabifyARPA(test_string, return_list=True) == ['HH AE NG', 'M AE N'] + assert syllabifyARPA(test_string) == ['HH AE NG', 'M AE N'] def test_unsyllabifiable(): test_string = 'M G L AA' - assert not syllabifyARPA(test_string, return_list=True, silence_warnings=True) + assert not syllabifyARPA(test_string, silence_warnings=True) def test_empty(): test_string = '' - assert not syllabifyARPA(test_string, return_list=True, silence_warnings=True) + assert not syllabifyARPA(test_string, silence_warnings=True) def test_no_vowel(): test_string = 'K S' - assert not syllabifyARPA(test_string, return_list=True, silence_warnings=True) + assert not syllabifyARPA(test_string, silence_warnings=True) def test_sixths(): test_string = 'S IH K S TH S' - assert syllabifyARPA(test_string, return_list=True) == ['S IH K S TH S'] + assert syllabifyARPA(test_string) == ['S IH K S TH S'] def test_lowercase(): test_string = 'ow' - assert syllabifyARPA(test_string, return_list=True) == ['OW'] + assert syllabifyARPA(test_string) == ['OW'] def test_mixedcase(): test_string = 'oW' - assert syllabifyARPA(test_string, return_list=True) == ['OW'] + assert syllabifyARPA(test_string) == ['OW'] def test_non_arpabet(): test_string = 'GH IY' - assert not syllabifyARPA(test_string, return_list=True, silence_warnings=True) + assert not syllabifyARPA(test_string, silence_warnings=True) def test_array(): test_array = ['K', 'AE', 'T'] - assert syllabifyARPA(test_array, return_list=True) == ['K AE T'] + assert syllabifyARPA(test_array) == ['K AE T'] def test_weird_array(): test_array = ['K AE', 'T'] - assert not syllabifyARPA(test_array, return_list=True, silence_warnings=True) + assert not syllabifyARPA(test_array, silence_warnings=True) def test_CVC_syllables(): for syllable in itertools.product(legal_onsets, VOWELS, legal_codas): - assert syllabifyARPA(list(syllable), return_list=True) + assert syllabifyARPA(list(syllable)) def test_SZ_extension(): @@ -70,8 +70,8 @@ def test_SZ_extension(): for syllable in itertools.product(legal_onsets, VOWELS, sz_extendable_codas): s_extended = list(syllable) + ['S'] z_extended = list(syllable) + ['Z'] - assert (syllabifyARPA(s_extended, return_list=True, silence_warnings=True) != - syllabifyARPA(z_extended, return_list=True, silence_warnings=True)) + assert (syllabifyARPA(s_extended, silence_warnings=True) != + syllabifyARPA(z_extended, silence_warnings=True)) def test_TD_extension(): @@ -80,45 +80,45 @@ def test_TD_extension(): for syllable in itertools.product(legal_onsets, VOWELS, td_extendable_codas): t_extended = list(syllable) + ['T'] d_extended = list(syllable) + ['D'] - assert (syllabifyARPA(t_extended, return_list=True, silence_warnings=True) != - syllabifyARPA(d_extended, return_list=True, silence_warnings=True)) + assert (syllabifyARPA(t_extended, silence_warnings=True) != + syllabifyARPA(d_extended, silence_warnings=True)) def test_CCVC_syllables(): syllables = ['P L EY', 'D R IY M', 'HH Y UW JH', 'S L IY P', 'G W AA M', 'T W IH N', 'K R AW D'] for syllable in syllables: - assert syllabifyARPA(syllable, return_list=True) == [syllable] + assert syllabifyARPA(syllable) == [syllable] def test_CVCC_syllables(): syllables = ['HH EH L P', 'W EH L SH', 'F IH F TH', 'L AH NG Z', 'B EH L T', 'T AE K T'] # TODO:FIX for mensch for syllable in syllables: - assert syllabifyARPA(syllable, return_list=True) == [syllable] + assert syllabifyARPA(syllable) == [syllable] def test_V_syllables(): syllables = ['OW', 'AY', 'UW', 'AA', 'EY'] for syllable in syllables: - assert syllabifyARPA(syllable, return_list=True) == [syllable] + assert syllabifyARPA(syllable) == [syllable] def test_CVCCC_syllables(): syllables = ['Y EH L P S', 'K AE L K S', 'W AO R M TH', 'W IH L S T', 'JH IH NG K S'] for syllable in syllables: - assert syllabifyARPA(syllable, return_list=True) == [syllable] + assert syllabifyARPA(syllable) == [syllable] def test_CCVCC_syllables(): syllables = ['TH W AO R T', 'K L AH T S', 'B L AH N T', 'F Y OW R D', 'G R IH N CH'] for syllable in syllables: - assert syllabifyARPA(syllable, return_list=True) == [syllable] + assert syllabifyARPA(syllable) == [syllable] def test_s_clusters(): syllables = ['S P OW R T', 'S M AY L', 'S F IH NG K S', 'S P Y UW', 'S T AH D', 'S K W EY R'] for syllable in syllables: - assert syllabifyARPA(syllable, return_list=True) == [syllable] + assert syllabifyARPA(syllable) == [syllable] prons_syllabified = { 'S F R AH JH IH S T IH K S': ['S F R AH', 'JH IH', 'S T IH K S'], 'B L AE S T IH D': ['B L AE', 'S T IH D'], @@ -126,7 +126,7 @@ def test_s_clusters(): 'S T EH TH AH S K OW P': ['S T EH', 'TH AH', 'S K OW P'] } for pron, syllabification in prons_syllabified.items(): - assert syllabifyARPA(pron, return_list=True) == syllabification + assert syllabifyARPA(pron) == syllabification def test_long_words(): @@ -137,7 +137,7 @@ def test_long_words(): 'F L AE JH AH L EY SH AH N Z': ['F L AE', 'JH AH', 'L EY', 'SH AH N Z'] } for pron, syllabification in prons_syllabified.items(): - assert syllabifyARPA(pron, return_list=True) == syllabification + assert syllabifyARPA(pron) == syllabification # def test_word_boundaries():