# sif_addition

In [1]:
from EduNLP.SIF import is_sif, to_sif,sif4sci



## is_sif

In [2]:
text = '若$x,y$满足约束条件' \
     '$\\left\\{\\begin{array}{c}2 x+y-2 \\leq 0 \\\\ x-y-1 \\geq 0 \\\\ y+1 \\geq 0\\end{array}\\right.$，' \
    '则$z=x+7 y$的最大值$\\SIFUnderline$'
    
is_sif(text)


True

In [3]:
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x（单位...'
is_sif(text)

False

In [4]:
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x（单位...'
is_sif(text, return_parser=True)

(False, <EduNLP.SIF.parser.parser.Parser at 0x2a3083fa978>)

## to_sif

In [5]:
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x（单位...'
to_sif(text)

'某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$（单位...'

In [6]:
import time
# ------------不使用‘加速’机制--------------- #
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x（单位...'*150
start = time.time()
if not is_sif(text):
    siftext = to_sif(text)
print("[1]siftext : {} ,consume time [{}s]".format(siftext[:35], time.time() - start))

# ------------使用‘加速’机制--------------- #
start = time.time()
ret = is_sif(text, return_parser=True)
print("[2]return : ", ret)
if ret[0] is not True:
    siftext = to_sif(text, parser=ret[1])
print("[2]siftext : {} ,consume time [{}s]".format(siftext[:35], time.time() - start))

[1]siftext : 某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$（单位... ,consume time [0.018142223358154297s]
[2]return :  (False, <EduNLP.SIF.parser.parser.Parser object at 0x000002A30840FC88>)
[2]siftext : 某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$（单位... ,consume time [0.008990764617919922s]


## sif4sci
 to_symbolize:
 - "t": text
 - "f": formula
 - "g": figure
 - "m": question mark

In [7]:
test_item = r"如图所示，则$\bigtriangleup ABC$的面积是$\SIFBlank$。$\FigureID{1}$"
t1 = sif4sci(test_item)
t1

['如图所示', '\\bigtriangleup', 'ABC', '面积', '\\SIFBlank', \FigureID{1}]

In [8]:
t1.describe()

{'t': 2, 'f': 2, 'g': 1, 'm': 1}

In [9]:
with t1.filter('fgm'):
    print(t1)

['如图所示', '面积']


In [10]:
with t1.filter(keep='t'):
    print(t1)

['如图所示', '面积']


In [11]:
with t1.filter():
    print(t1)

['如图所示', '\\bigtriangleup', 'ABC', '面积', '\\SIFBlank', \FigureID{1}]


In [12]:
t1.text_tokens

['如图所示', '面积']

In [13]:
t1.formula_tokens

['\\bigtriangleup', 'ABC']

In [14]:
t1.figure_tokens

[\FigureID{1}]

In [15]:
t1.ques_mark_tokens

['\\SIFBlank']

In [16]:
sif4sci(test_item, symbol="gm", tokenization_params={"formula_params": {"method": "ast"}})

['如图所示', <Formula: \bigtriangleup ABC>, '面积', '[MARK]', '[FIGURE]']

In [17]:
sif4sci(test_item, symbol="tfgm")

['[TEXT]', '[FORMULA]', '[TEXT]', '[MARK]', '[TEXT]', '[FIGURE]']

In [18]:
sif4sci(test_item, symbol="gm", tokenization_params={"formula_params": {"method": "ast", "return_type": "list"}})

['如图所示', '\\bigtriangleup', 'A', 'B', 'C', '面积', '[MARK]', '[FIGURE]']

In [19]:
test_item_1 = {
    "stem": r"若$x=2$, $y=\sqrt{x}$，则下列说法正确的是$\SIFChoice$",
    "options": [r"$x < y$", r"$y = x$", r"$y < x$"]
}

In [20]:
tls = [
        sif4sci(e, symbol="gm",
        tokenization_params={
        "formula_params": {
                "method": "ast", "return_type": "list", "ord2token": True, "var_numbering": True,
                "link_variable": False}
        })
        for e in ([test_item_1["stem"]] + test_item_1["options"])
]

In [21]:
tls

[['mathord_0', '=', 'textord', 'mathord_1', '=', 'mathord_0', '{ }', '\\sqrt', '说法', '正确', '[MARK]'],
 ['mathord_0', '<', 'mathord_1'],
 ['mathord_0', '=', 'mathord_1'],
 ['mathord_0', '<', 'mathord_1']]

In [22]:
tls[1:]

[['mathord_0', '<', 'mathord_1'],
 ['mathord_0', '=', 'mathord_1'],
 ['mathord_0', '<', 'mathord_1']]

In [23]:
from EduNLP.utils import dict2str4sif

test_item_1_str = dict2str4sif(test_item_1, tag_mode="head", add_list_no_tag=False)
test_item_1_str 

'$\\SIFTag{stem}$若$x=2$, $y=\\sqrt{x}$，则下列说法正确的是$\\SIFChoice$$\\SIFTag{options}$$x < y$$\\SIFSep$$y = x$$\\SIFSep$$y < x$'

In [24]:
tl1 = sif4sci(
    test_item_1_str, 
    symbol="gm", 
    tokenization_params={
        "formula_params": {"method": "ast", "return_type": "list", "ord2token": True}
    })
    

In [25]:
tl1.get_segments()[0]

['\\SIFTag{stem}']

In [26]:
tl1.get_segments()[1:3]

[['[TEXT_BEGIN]', '[TEXT_END]'],
 ['[FORMULA_BEGIN]', 'mathord', '=', 'textord', '[FORMULA_END]']]

In [27]:
tl1.get_segments(add_seg_type=False)[0:3]

[['\\SIFTag{stem}'],
 ['mathord', '=', 'textord'],
 ['mathord', '=', 'mathord', '{ }', '\\sqrt']]

In [28]:
test_item_2 = {"options": [r"$x < y$", r"$y = x$", r"$y < x$"]}

In [29]:
test_item_2_str = dict2str4sif(test_item_2, tag_mode="head", add_list_no_tag=False)

In [30]:
test_item_2_str

'$\\SIFTag{options}$$x < y$$\\SIFSep$$y = x$$\\SIFSep$$y < x$'

In [31]:
tl2 = sif4sci(test_item_2_str, symbol="gms",
     tokenization_params={"formula_params": {"method": "ast", "return_type": "list"}})
tl2 

['\\SIFTag{options}', 'x', '<', 'y', '[SEP]', 'y', '=', 'x', '[SEP]', 'y', '<', 'x']

In [32]:
tl2.get_segments(add_seg_type=False)

[['\\SIFTag{options}'],
 ['x', '<', 'y'],
 ['[SEP]'],
 ['y', '=', 'x'],
 ['[SEP]'],
 ['y', '<', 'x']]

In [33]:
tl2.get_segments(add_seg_type=False, drop="s")

[['\\SIFTag{options}'], ['x', '<', 'y'], ['y', '=', 'x'], ['y', '<', 'x']]

In [34]:
tl3 = sif4sci(test_item_1["stem"], symbol="gs")
tl3.text_segments

[['说法', '正确']]

In [35]:
tl3.formula_segments

[['x', '=', '2'], ['y', '=', '\\sqrt', '{', 'x', '}']]

In [36]:
tl3.figure_segments

[]

In [37]:
tl3.ques_mark_segments

[['\\SIFChoice']]