Skip to content

Commit

Permalink
Improve calling standard functions
Browse files Browse the repository at this point in the history
  • Loading branch information
ryangawei committed Aug 1, 2020
1 parent a85200a commit 8c710c2
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 54 deletions.
15 changes: 0 additions & 15 deletions texthero/_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,3 @@ def wrapper(*args, **kwargs):
return wrapper

return decorator


def root_caller(target_module):
"""
A decorator to call functions with the same name from `texthero.target_module`. It can
be used for multilingual support when a function can be reused by many languages.
"""

@wrapt.decorator
def wrapper(wrapped, instance, args, kwargs):
root_func = getattr(target_module, wrapped.__name__)
return root_func(*args, **kwargs)

return wrapper
73 changes: 34 additions & 39 deletions texthero/lang/hero_zh/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,18 @@

from spacy.lang.zh import Chinese
import texthero as hero
from texthero._helper import root_caller

# Standard functions supported Chinese
from texthero.preprocessing import (
fillna,
has_content,
drop_no_content,
remove_whitespace,
remove_html_tags,
replace_urls,
remove_urls,
phrases
)

from typing import List, Callable

Expand All @@ -23,6 +34,25 @@
warnings.filterwarnings(action="ignore", category=UserWarning, module="gensim")


__all__ = [
"fillna",
"has_content",
"drop_no_content",
"remove_whitespace",
"remove_html_tags",
"replace_urls",
"remove_urls",
"phrases",
"clean",
"get_default_pipeline",
"remove_hashtags",
"remove_tags",
"replace_hashtags",
"replace_tags",
"tokenize",
]


def get_default_pipeline() -> List[Callable[[pd.Series], pd.Series]]:
"""
Return a list contaning all the methods used in the default cleaning pipeline.
Expand Down Expand Up @@ -60,11 +90,11 @@ def clean(s: pd.Series, pipeline=None) -> pd.Series:
--------
For the default pipeline:
>>> import texthero as hero
>>> import texthero.lang.hero_zh as hero
>>> import pandas as pd
>>> s = pd.Series("Uper 9dig. he her ÄÖÜ")
>>> s = pd.Series("我昨天吃烤鸭去了。 挺好吃的。")
>>> hero.clean(s)
0 uper 9dig aou
0 [我, 昨天, 吃, 烤鸭, 去, 了, 。, 挺好吃, 的, 。]
dtype: object
"""
if not pipeline:
Expand All @@ -73,41 +103,6 @@ def clean(s: pd.Series, pipeline=None) -> pd.Series:
return hero.preprocessing.clean(s, pipeline)


@root_caller(hero.preprocessing)
def fillna(s: pd.Series) -> pd.Series:
pass


@root_caller(hero.preprocessing)
def has_content(s: pd.Series):
pass


@root_caller(hero.preprocessing)
def drop_no_content(s: pd.Series):
pass


@root_caller(hero.preprocessing)
def remove_html_tags(s: pd.Series) -> pd.Series:
pass


@root_caller(hero.preprocessing)
def remove_whitespace(s: pd.Series) -> pd.Series:
pass


@root_caller(hero.preprocessing)
def replace_urls(s: pd.Series, symbol: str) -> pd.Series:
pass


@root_caller(hero.preprocessing)
def remove_urls(s: pd.Series) -> pd.Series:
pass


def replace_tags(s: pd.Series, symbol: str) -> pd.Series:
"""Replace all tags from a given Pandas Series with symbol.
Expand Down

0 comments on commit 8c710c2

Please sign in to comment.