Skip to content

Commit

Permalink
Merge branch 'rl-0.2.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
amenezes committed Nov 23, 2023
2 parents 9222833 + 19f5cfb commit 2e750ca
Show file tree
Hide file tree
Showing 12 changed files with 93 additions and 58 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
tests:
strategy:
matrix:
python-version: ['3.10', '3.11']
python-version: ['3.11', '3.12']
os: [ubuntu]
fail-fast: true
runs-on: ${{ matrix.os }}-latest
Expand Down
1 change: 1 addition & 0 deletions .tool-versions
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python 3.12.0
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@ A Python tool to assist text analysis.

## Usage

```python
``` py
import logging

import spacy

from text_grade import Document, formulas


logging.basicConfig(level=logging.DEBUG)

TEXTO = """
O algoritmo de Flesch é uma fórmula matemática que é usada para avaliar a legibilidade de um texto em inglês. Ele foi desenvolvido por Rudolf Flesch, um escritor e lexicógrafo austríaco, e é comumente usado por editores, escritores e professores para avaliar a qualidade e a facilidade de leitura de um texto.
Expand Down
5 changes: 3 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ classifiers =
License :: OSI Approved :: Apache Software License
Operating System :: OS Independent
Programming Language :: Python :: 3 :: Only
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
Programming Language :: Python :: Implementation :: CPython
Programming Language :: Python :: Implementation :: PyPy
Topic :: Software Development :: Libraries
Expand All @@ -37,10 +37,11 @@ install_requires =
pandas >= 2.0.1
spacy >= 3.0.0
pyphen >= 0.14.0
python_requires = >= 3.10
python_requires = >= 3.11

[options.extras_require]
docs = mkdocs-material
plot = seaborn>=0.12.0
all = mkdocs-material; seaborn>=0.12.0

[flake8]
Expand Down
35 changes: 18 additions & 17 deletions tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,20 +96,21 @@ def test_has_number_with_punct(nlp, string, expected):
assert filter.has_numbers_with_punct(doc[0])


# @pytest.mark.parametrize(
# "string, expected",
# [
# ("2-22", False),
# ("2.22", False),
# ("0000.0000", False),
# ("2:2", False),
# ("2?2", False),
# ("222", False),
# ("^22", True),
# ("!22", True),
# ("$2222", True),
# ],
# )
# def test_has_punct_with_numbers(nlp, string, expected):
# doc = nlp(string)
# assert filter.has_punct_with_numbers(doc[0]) == expected
@pytest.mark.parametrize(
"string, expected",
[
("2-22", False),
("2.22", False),
("0000.0000", False),
("2:2", False),
("222", False),
# ("2!2", True),
# ("2?2", True),
# ("^22", True),
# ("!22", True),
# ("$2222", True),
],
)
def test_has_punct_with_numbers(nlp, string, expected):
doc = nlp(string)
assert filter.has_punct_with_numbers(doc[0]) == expected
8 changes: 4 additions & 4 deletions text_grade/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from . import filter, formulas, plot
from .document import Document
from .grade import Grade
from text_grade import filter, formulas, plot
from text_grade.document import Document
from text_grade.grade import Grade

__version__ = "0.1.0"
__version__ = "0.2.0"
__all__ = ["__version__", "Document", "formulas", "filter", "Grade", "plot"]
4 changes: 2 additions & 2 deletions text_grade/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from spacy.tokens.span import Span
from spacy.tokens.token import Token

from ._logger import logger
from .filter import (
from text_grade._logger import logger
from text_grade.filter import (
has_numbers_with_punct,
has_punct_with_numbers,
have_letter_and_number_together,
Expand Down
2 changes: 1 addition & 1 deletion text_grade/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
number_and_letter_together_at_end = re.compile(r"([a-zA-Z]{1,2}[0-9]+)")
number_and_letter_together_at_begin = re.compile(r"([0-9]+[a-zA-Z]{1,2})")
numbers_with_punct = re.compile(r"([0-9]+.?[0-9]+)")
punct_with_numbers = re.compile(r"(\^|\$|!|@|#|%|¨|&|\*|\(|\))[0-9]+")
punct_with_numbers = re.compile(r"(\^|\$|!|\?|@|#|%|¨|&|\*|\(|\))[0-9]+")
string_is_date = re.compile(r"((^[0-9]{1,2}.)?([0-9]{1,2}).([0-9]{2,4})$)")


Expand Down
6 changes: 3 additions & 3 deletions text_grade/formulas.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from ._logger import logger
from .document import Document
from .score import Score
from text_grade._logger import logger
from text_grade.document import Document
from text_grade.score import Score


def flesch_index_pt_br(document: Document) -> Score:
Expand Down
7 changes: 2 additions & 5 deletions text_grade/grade.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
from enum import Enum
from enum import StrEnum


class Grade(str, Enum):
class Grade(StrEnum):
VERY_EASY: str = "very easy"
EASY: str = "easy"
FAIRLY_DIFFICULT: str = "fairly difficult"
VERY_DIFFICULT: str = "very difficult"
UNKNOWN: str = "unknown"

def __str__(self) -> str:
return str.__str__(self)
72 changes: 51 additions & 21 deletions text_grade/plot.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,101 @@
from typing import Iterator
from typing import Any, Iterator

import pandas as pd
import seaborn as sns

from .document import Document
from text_grade._logger import logger
from text_grade.document import Document

try:
import seaborn as sns
except ModuleNotFoundError:
logger.warning("seaborn package not found!")


def _execute(func, *args, **kwargs) -> Any:
try:
return func(*args, **kwargs)
except NameError as err:
logger.error("seaborn package not found!")
raise err


def characters_x_words(documents: Iterator[Document]):
return sns.scatterplot(
return _execute(
sns.scatterplot,
data=pd.concat([doc.to_df() for doc in documents]),
x="n_words",
y="n_characters",
)


def characters_boxplot(documents: Iterator[Document]):
return sns.boxplot(
return _execute(
sns.boxplot,
data=pd.DataFrame(
[doc.characters() for doc in documents], columns=["characters"]
)
),
)


def words_boxplot(documents: Iterator[Document]):
return sns.boxplot(
data=pd.DataFrame([doc.characters() for doc in documents], columns=["words"])
return _execute(
sns.boxplot,
data=pd.DataFrame([doc.characters() for doc in documents], columns=["words"]),
)


def sentences_boxplot(documents: Iterator[Document]):
return sns.boxplot(
return _execute(
sns.boxplot,
data=pd.DataFrame(
[doc.characters() for doc in documents], columns=["sentences"]
)
),
)


def sentences_x_words(documents: Iterator[Document]):
return sns.relplot(
data=pd.concat([doc.to_df() for doc in documents]), x="n_sentences", y="n_words"
return _execute(
sns.relplot,
data=pd.concat([doc.to_df() for doc in documents]),
x="n_sentences",
y="n_words",
)


def words_x_characters(documents: Iterator[Document]):
return sns.relplot(
return _execute(
sns.relplot,
data=pd.concat([doc.to_df() for doc in documents]),
x="n_words",
y="n_characters",
)


def words_x_sentences(documents: Iterator[Document]):
return sns.scatterplot(
data=pd.concat([doc.to_df() for doc in documents]), x="n_sentences", y="n_words"
return _execute(
sns.scatterplot,
data=pd.concat([doc.to_df() for doc in documents]),
x="n_sentences",
y="n_words",
)


def syllables_x_words(documents: Iterator[Document]):
return sns.scatterplot(
data=pd.concat([doc.to_df() for doc in documents]), x="n_words", y="syllables"
return _execute(
sns.scatterplot,
data=pd.concat([doc.to_df() for doc in documents]),
x="n_words",
y="syllables",
)


def unique_words_distribution(documents: Iterator[Document]):
pass
raise NotImplementedError


def score_count(documents: Iterator[Document], formula):
return sns.countplot(
return _execute(
sns.countplot,
data=pd.DataFrame(
[
(score.value, str(score.grade))
Expand All @@ -80,7 +108,8 @@ def score_count(documents: Iterator[Document], formula):


def score_stripplot(documents: Iterator[Document], formula):
return sns.stripplot(
return _execute(
sns.stripplot,
data=pd.DataFrame(
[
(score.value, str(score.grade))
Expand All @@ -94,7 +123,8 @@ def score_stripplot(documents: Iterator[Document], formula):


def score_boxplot(documents: Iterator[Document], formula):
return sns.boxplot(
return _execute(
sns.boxplot,
data=pd.DataFrame(
[
(score.value, str(score.grade))
Expand Down
2 changes: 1 addition & 1 deletion text_grade/score.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .grade import Grade
from text_grade.grade import Grade


class Score:
Expand Down

0 comments on commit 2e750ca

Please sign in to comment.