# sif_addition

In [1]:
from EduNLP.SIF import is_sif, to_sif,sif4sci



## is_sif

In [2]:
text = '若$x,y$满足约束条件' \
     '$\\left\\{\\begin{array}{c}2 x+y-2 \\leq 0 \\\\ x-y-1 \\geq 0 \\\\ y+1 \\geq 0\\end{array}\\right.$，' \
    '则$z=x+7 y$的最大值$\\SIFUnderline$'
    
is_sif(text)


(True,
 '若$x,y$满足约束条件$\\left\\{\\begin{array}{c}2 x+y-2 \\leq 0 \\\\ x-y-1 \\geq 0 \\\\ y+1 \\geq 0\\end{array}\\right.$，则$z=x+7 y$的最大值$\\SIFUnderline$')

In [3]:
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x（单位...'
is_sif(text)

(False, '某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$（单位...')

## to_sif

In [4]:
text = '某校一个课外学习小组为研究某作物的发芽率y和温度x（单位...'
to_sif(text)

'某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$（单位...'

## sif4sci
 to_symbolize:
 - "t": text
 - "f": formula
 - "g": figure
 - "m": question mark

In [5]:
test_item = r"如图所示，则$\bigtriangleup ABC$的面积是$\SIFBlank$。$\FigureID{1}$"
t1 = sif4sci(test_item)
t1

['如图所示', '\\bigtriangleup', 'ABC', '面积', '\\SIFBlank', \FigureID{1}]

In [6]:
t1.describe()

{'t': 2, 'f': 2, 'g': 1, 'm': 1}

In [7]:
with t1.filter('fgm'):
    print(t1)

['如图所示', '面积']


In [8]:
with t1.filter(keep='t'):
    print(t1)

['如图所示', '面积']


In [9]:
with t1.filter():
    print(t1)

['如图所示', '\\bigtriangleup', 'ABC', '面积', '\\SIFBlank', \FigureID{1}]


In [10]:
t1.text_tokens

['如图所示', '面积']

In [11]:
t1.formula_tokens

['\\bigtriangleup', 'ABC']

In [12]:
t1.figure_tokens

[\FigureID{1}]

In [13]:
t1.ques_mark_tokens

['\\SIFBlank']

In [14]:
sif4sci(test_item, symbol="gm", tokenization_params={"formula_params": {"method": "ast"}})

['如图所示', <Formula: \bigtriangleup ABC>, '面积', '[MARK]', '[FIGURE]']

In [15]:
sif4sci(test_item, symbol="tfgm")

['[TEXT]', '[FORMULA]', '[TEXT]', '[MARK]', '[TEXT]', '[FIGURE]']

In [16]:
sif4sci(test_item, symbol="gm", tokenization_params={"formula_params": {"method": "ast", "return_type": "list"}})

['如图所示', '\\bigtriangleup', 'A', 'B', 'C', '面积', '[MARK]', '[FIGURE]']

In [17]:
test_item_1 = {
    "stem": r"若$x=2$, $y=\sqrt{x}$，则下列说法正确的是$\SIFChoice$",
    "options": [r"$x < y$", r"$y = x$", r"$y < x$"]
}

In [18]:
tls = [
        sif4sci(e, symbol="gm",
        tokenization_params={
        "formula_params": {
                "method": "ast", "return_type": "list", "ord2token": True, "var_numbering": True,
                "link_variable": False}
        })
        for e in ([test_item_1["stem"]] + test_item_1["options"])
]

In [19]:
tls

[['mathord_0', '=', 'textord', 'mathord_1', '=', 'mathord_0', '{ }', '\\sqrt', '说法', '正确', '[MARK]'],
 ['mathord_0', '<', 'mathord_1'],
 ['mathord_0', '=', 'mathord_1'],
 ['mathord_0', '<', 'mathord_1']]

In [20]:
tls[1:]

[['mathord_0', '<', 'mathord_1'],
 ['mathord_0', '=', 'mathord_1'],
 ['mathord_0', '<', 'mathord_1']]

In [21]:
from EduNLP.utils import dict2str4sif

test_item_1_str = dict2str4sif(test_item_1, tag_mode="head", add_list_no_tag=False)
test_item_1_str 

'$\\SIFTag{stem}$若$x=2$, $y=\\sqrt{x}$，则下列说法正确的是$\\SIFChoice$$\\SIFTag{options}$$x < y$$\\SIFSep$$y = x$$\\SIFSep$$y < x$'

In [22]:
tl1 = sif4sci(
    test_item_1_str, 
    symbol="gm", 
    tokenization_params={
        "formula_params": {"method": "ast", "return_type": "list", "ord2token": True}
    })
    

In [23]:
tl1.get_segments()[0]

['\\SIFTag{stem}']

In [24]:
tl1.get_segments()[1:3]

[['[TEXT_BEGIN]', '[TEXT_END]'],
 ['[FORMULA_BEGIN]', 'mathord', '=', 'textord', '[FORMULA_END]']]

In [25]:
tl1.get_segments(add_seg_type=False)[0:3]

[['\\SIFTag{stem}'],
 ['mathord', '=', 'textord'],
 ['mathord', '=', 'mathord', '{ }', '\\sqrt']]

In [26]:
test_item_2 = {"options": [r"$x < y$", r"$y = x$", r"$y < x$"]}

In [27]:
test_item_2_str = dict2str4sif(test_item_2, tag_mode="head", add_list_no_tag=False)

In [28]:
test_item_2_str

'$\\SIFTag{options}$$x < y$$\\SIFSep$$y = x$$\\SIFSep$$y < x$'

In [29]:
tl2 = sif4sci(test_item_2_str, symbol="gms",
     tokenization_params={"formula_params": {"method": "ast", "return_type": "list"}})
tl2 

['\\SIFTag{options}', 'x', '<', 'y', '[SEP]', 'y', '=', 'x', '[SEP]', 'y', '<', 'x']

In [30]:
tl2.get_segments(add_seg_type=False)

[['\\SIFTag{options}'],
 ['x', '<', 'y'],
 ['[SEP]'],
 ['y', '=', 'x'],
 ['[SEP]'],
 ['y', '<', 'x']]

In [31]:
tl2.get_segments(add_seg_type=False, drop="s")

[['\\SIFTag{options}'], ['x', '<', 'y'], ['y', '=', 'x'], ['y', '<', 'x']]

In [32]:
tl3 = sif4sci(test_item_1["stem"], symbol="gs")
tl3.text_segments

[['说法', '正确']]

In [33]:
tl3.formula_segments

[['x', '=', '2'], ['y', '=', '\\sqrt', '{', 'x', '}']]

In [34]:
tl3.figure_segments

[]

In [35]:
tl3.ques_mark_segments

[['\\SIFChoice']]