Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,7 @@

[Longhu Qin](https://github.com/KenelmQLH)

[Pingzhi Li](https://github.com/pingzhiLi)

The stared contributors are the corresponding authors.

The stared contributors are the corresponding authors.
2 changes: 1 addition & 1 deletion EduNLP/SIF/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _is_formula_legal(self, formula_str):

"""
legal_tags = ['FormFigureID', 'FormFigureBase64', 'FigureID', 'FigureBase64',
'SIFBlank', 'SIFChoice', 'SIFTag', 'SIFSep', 'SIFUnderline']
'SIFBlank', 'SIFChoice', 'SIFTag', 'SIFSep', 'SIFUnderline', 'textf']
for tag in legal_tags:
if tag in formula_str:
return True
Expand Down
10 changes: 9 additions & 1 deletion EduNLP/SIF/segment/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,11 @@ def __init__(self, item, figures: dict = None):
self._ques_mark_segments = []
self._tag_segments = []
self._sep_segments = []
segments = re.split(r"(\$.+?\$)", item)

# remove $\textf{*} from the item$
item_no_textf = "".join(re.split(r"\$\\textf\{([^,]+?),b?d?i?t?u?w?}\$", item))

segments = re.split(r"(\$.+?\$)", item_no_textf)
for segment in segments:
if not segment:
continue
Expand Down Expand Up @@ -294,6 +298,10 @@ def seg(item, figures=None, symbol=None):
>>> s2 = seg(test_item_1_str_2, symbol="fgm")
>>> s2.tag_segments
['\\SIFTag{stem}', '\\SIFTag{options}']
>>> test_item_2 = r"已知$y=x$,则以下说法中$\textf{正确,b}$的是"
>>> s2 = seg(test_item_2)
>>> s2.text_segments
['已知', ',则以下说法中正确的是']
"""
segments = SegmentList(item, figures)
if symbol is not None:
Expand Down
5 changes: 5 additions & 0 deletions EduNLP/SIF/sif.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,11 @@ def sif4sci(item: str, figures: (dict, bool) = None, safe=True, symbol: str = No
[]
>>> tl3.ques_mark_segments
[['\\SIFChoice']]
>>> test_item_3 = r"已知$y=x$,则以下说法中$\textf{正确,b}$的是"
>>> tl4 = sif4sci(test_item_3)
Warning: there is some chinese characters in formula!
>>> tl4.text_segments
[['已知'], ['说法', '中', '正确']]
"""
try:
if safe is True and is_sif(item) is not True:
Expand Down
6 changes: 5 additions & 1 deletion tests/test_sif/test_segement.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ def test_segment(figure0, figure1, figure0_base64, figure1_base64):
r"如图所示,则$\FormFigureBase64{%s}$的面积是$\SIFBlank$。$\FigureBase64{%s}$" % (figure0_base64, figure1_base64),
figures=True
)

with pytest.raises(TypeError):
s.append("123")
seg_test_text = seg(
r"如图所示,有三组$\textf{机器人,bu}$在踢$\textf{足球,b}$",
figures=True
)
assert seg_test_text.text_segments == ['如图所示,有三组机器人在踢足球']