Merge branch 'rl-0.2.0'

amenezes · Nov 23, 2023 · 2e750ca · 2e750ca
2 parents 9222833 + 19f5cfb
commit 2e750ca
Show file tree

Hide file tree

Showing 12 changed files with 93 additions and 58 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -6,7 +6,7 @@ jobs:
   tests:
     strategy:
       matrix:
-        python-version: ['3.10', '3.11']
+        python-version: ['3.11', '3.12']
         os: [ubuntu]
       fail-fast: true
     runs-on: ${{ matrix.os }}-latest

diff --git a/.tool-versions b/.tool-versions
@@ -0,0 +1 @@
+python 3.12.0
diff --git a/README.md b/README.md
@@ -10,11 +10,16 @@ A Python tool to assist text analysis.
 
 ## Usage
 
-```python
+``` py
+import logging
+
 import spacy
 
 from text_grade import Document, formulas
 
+
+logging.basicConfig(level=logging.DEBUG)
+
 TEXTO = """
 O algoritmo de Flesch é uma fórmula matemática que é usada para avaliar a legibilidade de um texto em inglês. Ele foi desenvolvido por Rudolf Flesch, um escritor e lexicógrafo austríaco, e é comumente usado por editores, escritores e professores para avaliar a qualidade e a facilidade de leitura de um texto.
 

diff --git a/setup.cfg b/setup.cfg
@@ -24,8 +24,8 @@ classifiers =
   License :: OSI Approved :: Apache Software License
   Operating System :: OS Independent
   Programming Language :: Python :: 3 :: Only
-  Programming Language :: Python :: 3.10
   Programming Language :: Python :: 3.11
+  Programming Language :: Python :: 3.12
   Programming Language :: Python :: Implementation :: CPython
   Programming Language :: Python :: Implementation :: PyPy
   Topic :: Software Development :: Libraries
@@ -37,10 +37,11 @@ install_requires =
   pandas >= 2.0.1
   spacy >= 3.0.0
   pyphen >= 0.14.0
-python_requires = >= 3.10
+python_requires = >= 3.11
 
 [options.extras_require]
 docs = mkdocs-material
+plot = seaborn>=0.12.0
 all = mkdocs-material; seaborn>=0.12.0
 
 [flake8]

diff --git a/tests/test_filter.py b/tests/test_filter.py
@@ -96,20 +96,21 @@ def test_has_number_with_punct(nlp, string, expected):
     assert filter.has_numbers_with_punct(doc[0])
 
 
-# @pytest.mark.parametrize(
-#     "string, expected",
-#     [
-#         ("2-22", False),
-#         ("2.22", False),
-#         ("0000.0000", False),
-#         ("2:2", False),
-#         ("2?2", False),
-#         ("222", False),
-#         ("^22", True),
-#         ("!22", True),
-#         ("$2222", True),
-#     ],
-# )
-# def test_has_punct_with_numbers(nlp, string, expected):
-#     doc = nlp(string)
-#     assert filter.has_punct_with_numbers(doc[0]) == expected
+@pytest.mark.parametrize(
+    "string, expected",
+    [
+        ("2-22", False),
+        ("2.22", False),
+        ("0000.0000", False),
+        ("2:2", False),
+        ("222", False),
+        # ("2!2", True),
+        # ("2?2", True),
+        # ("^22", True),
+        # ("!22", True),
+        # ("$2222", True),
+    ],
+)
+def test_has_punct_with_numbers(nlp, string, expected):
+    doc = nlp(string)
+    assert filter.has_punct_with_numbers(doc[0]) == expected
diff --git a/text_grade/__init__.py b/text_grade/__init__.py
@@ -1,6 +1,6 @@
-from . import filter, formulas, plot
-from .document import Document
-from .grade import Grade
+from text_grade import filter, formulas, plot
+from text_grade.document import Document
+from text_grade.grade import Grade
 
-__version__ = "0.1.0"
+__version__ = "0.2.0"
 __all__ = ["__version__", "Document", "formulas", "filter", "Grade", "plot"]
diff --git a/text_grade/document.py b/text_grade/document.py
@@ -7,8 +7,8 @@
 from spacy.tokens.span import Span
 from spacy.tokens.token import Token
 
-from ._logger import logger
-from .filter import (
+from text_grade._logger import logger
+from text_grade.filter import (
     has_numbers_with_punct,
     has_punct_with_numbers,
     have_letter_and_number_together,

diff --git a/text_grade/filter.py b/text_grade/filter.py
@@ -4,7 +4,7 @@
 number_and_letter_together_at_end = re.compile(r"([a-zA-Z]{1,2}[0-9]+)")
 number_and_letter_together_at_begin = re.compile(r"([0-9]+[a-zA-Z]{1,2})")
 numbers_with_punct = re.compile(r"([0-9]+.?[0-9]+)")
-punct_with_numbers = re.compile(r"(\^|\$|!|@|#|%|¨|&|\*|\(|\))[0-9]+")
+punct_with_numbers = re.compile(r"(\^|\$|!|\?|@|#|%|¨|&|\*|\(|\))[0-9]+")
 string_is_date = re.compile(r"((^[0-9]{1,2}.)?([0-9]{1,2}).([0-9]{2,4})$)")
 
 

diff --git a/text_grade/formulas.py b/text_grade/formulas.py
@@ -1,6 +1,6 @@
-from ._logger import logger
-from .document import Document
-from .score import Score
+from text_grade._logger import logger
+from text_grade.document import Document
+from text_grade.score import Score
 
 
 def flesch_index_pt_br(document: Document) -> Score:

diff --git a/text_grade/grade.py b/text_grade/grade.py
@@ -1,12 +1,9 @@
-from enum import Enum
+from enum import StrEnum
 
 
-class Grade(str, Enum):
+class Grade(StrEnum):
     VERY_EASY: str = "very easy"
     EASY: str = "easy"
     FAIRLY_DIFFICULT: str = "fairly difficult"
     VERY_DIFFICULT: str = "very difficult"
     UNKNOWN: str = "unknown"
-
-    def __str__(self) -> str:
-        return str.__str__(self)
diff --git a/text_grade/plot.py b/text_grade/plot.py
@@ -1,73 +1,101 @@
-from typing import Iterator
+from typing import Any, Iterator
 
 import pandas as pd
-import seaborn as sns
 
-from .document import Document
+from text_grade._logger import logger
+from text_grade.document import Document
+
+try:
+    import seaborn as sns
+except ModuleNotFoundError:
+    logger.warning("seaborn package not found!")
+
+
+def _execute(func, *args, **kwargs) -> Any:
+    try:
+        return func(*args, **kwargs)
+    except NameError as err:
+        logger.error("seaborn package not found!")
+        raise err
 
 
 def characters_x_words(documents: Iterator[Document]):
-    return sns.scatterplot(
+    return _execute(
+        sns.scatterplot,
         data=pd.concat([doc.to_df() for doc in documents]),
         x="n_words",
         y="n_characters",
     )
 
 
 def characters_boxplot(documents: Iterator[Document]):
-    return sns.boxplot(
+    return _execute(
+        sns.boxplot,
         data=pd.DataFrame(
             [doc.characters() for doc in documents], columns=["characters"]
-        )
+        ),
     )
 
 
 def words_boxplot(documents: Iterator[Document]):
-    return sns.boxplot(
-        data=pd.DataFrame([doc.characters() for doc in documents], columns=["words"])
+    return _execute(
+        sns.boxplot,
+        data=pd.DataFrame([doc.characters() for doc in documents], columns=["words"]),
     )
 
 
 def sentences_boxplot(documents: Iterator[Document]):
-    return sns.boxplot(
+    return _execute(
+        sns.boxplot,
         data=pd.DataFrame(
             [doc.characters() for doc in documents], columns=["sentences"]
-        )
+        ),
     )
 
 
 def sentences_x_words(documents: Iterator[Document]):
-    return sns.relplot(
-        data=pd.concat([doc.to_df() for doc in documents]), x="n_sentences", y="n_words"
+    return _execute(
+        sns.relplot,
+        data=pd.concat([doc.to_df() for doc in documents]),
+        x="n_sentences",
+        y="n_words",
     )
 
 
 def words_x_characters(documents: Iterator[Document]):
-    return sns.relplot(
+    return _execute(
+        sns.relplot,
         data=pd.concat([doc.to_df() for doc in documents]),
         x="n_words",
         y="n_characters",
     )
 
 
 def words_x_sentences(documents: Iterator[Document]):
-    return sns.scatterplot(
-        data=pd.concat([doc.to_df() for doc in documents]), x="n_sentences", y="n_words"
+    return _execute(
+        sns.scatterplot,
+        data=pd.concat([doc.to_df() for doc in documents]),
+        x="n_sentences",
+        y="n_words",
     )
 
 
 def syllables_x_words(documents: Iterator[Document]):
-    return sns.scatterplot(
-        data=pd.concat([doc.to_df() for doc in documents]), x="n_words", y="syllables"
+    return _execute(
+        sns.scatterplot,
+        data=pd.concat([doc.to_df() for doc in documents]),
+        x="n_words",
+        y="syllables",
     )
 
 
 def unique_words_distribution(documents: Iterator[Document]):
-    pass
+    raise NotImplementedError
 
 
 def score_count(documents: Iterator[Document], formula):
-    return sns.countplot(
+    return _execute(
+        sns.countplot,
         data=pd.DataFrame(
             [
                 (score.value, str(score.grade))
@@ -80,7 +108,8 @@ def score_count(documents: Iterator[Document], formula):
 
 
 def score_stripplot(documents: Iterator[Document], formula):
-    return sns.stripplot(
+    return _execute(
+        sns.stripplot,
         data=pd.DataFrame(
             [
                 (score.value, str(score.grade))
@@ -94,7 +123,8 @@ def score_stripplot(documents: Iterator[Document], formula):
 
 
 def score_boxplot(documents: Iterator[Document], formula):
-    return sns.boxplot(
+    return _execute(
+        sns.boxplot,
         data=pd.DataFrame(
             [
                 (score.value, str(score.grade))

diff --git a/text_grade/score.py b/text_grade/score.py
@@ -1,4 +1,4 @@
-from .grade import Grade
+from text_grade.grade import Grade
 
 
 class Score: