diff --git a/EduNLP/SIF/parser/parser.py b/EduNLP/SIF/parser/parser.py index 471bb450..b9b1e269 100644 --- a/EduNLP/SIF/parser/parser.py +++ b/EduNLP/SIF/parser/parser.py @@ -15,7 +15,7 @@ class Parser: description_list use Parser to process and describe the txt """ - def __init__(self, data): + def __init__(self, data, check_formula=True): self.lookahead = 0 self.head = 0 self.text = data @@ -26,6 +26,7 @@ def __init__(self, data): self.warnning = 0 self.fomula_illegal_flag = 0 self.fomula_illegal_message = '' + self.check_formula = check_formula # 定义特殊变量 self.len_bracket = len('$\\SIFChoice$') @@ -254,8 +255,9 @@ def get_token(self): if self.head >= len(self.text): self.call_error() return self.error - # 检查 latex 公式的完整性和可解析性 - if not self._is_formula_legal(self.text[formula_start:self.head]): + + # 检查latex公式的完整性和可解析性 + if self.check_formula and not self._is_formula_legal(self.text[formula_start:self.head]): self.call_error() return self.error self.head += 1 diff --git a/EduNLP/SIF/sif.py b/EduNLP/SIF/sif.py index 68787131..41518966 100644 --- a/EduNLP/SIF/sif.py +++ b/EduNLP/SIF/sif.py @@ -10,7 +10,7 @@ __all__ = ["is_sif", "to_sif", "sif4sci"] -def is_sif(item): +def is_sif(item, check_formula=True, return_parser=False): r""" the part aims to check whether the input is sif format @@ -18,13 +18,23 @@ def is_sif(item): ---------- item:str a raw item which respects stem + check_formula: bool + whether to check the formulas when parsing item. + + True if check the validity of formulas in item + False if not check the validity of formulas in item, which is faster + return_parser: bool + whether to put the parsed item in return. + + when True, the format of return is (bool, Parser) + when False, the format of return is bool Returns ------- bool - when item can not be parsed correctly, raise Error; - when item doesn't need to be modified, return Ture; - when item needs to be modified, return False; + when item can not be parsed correctly, raise ValueError; + when item is in stardarded format originally, return Ture (and the Parser of item); + when item isn't in stardarded format originally, return False (and the Parser of item); Examples -------- @@ -34,19 +44,22 @@ def is_sif(item): >>> is_sif(text) True >>> text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...' - >>> is_sif(text) - False + >>> ret = is_sif(text, return_parser=True) + >>> ret # doctest: +ELLIPSIS + (False, ) """ - item_parser = Parser(item) + item_parser = Parser(item, check_formula) item_parser.description_list() if item_parser.fomula_illegal_flag: raise ValueError(item_parser.fomula_illegal_message) - if item_parser.error_flag == 0 and item_parser.modify_flag == 0: - return True - return False + ret = True if item_parser.error_flag == 0 and item_parser.modify_flag == 0 else False + if return_parser is True: + return ret, item_parser + else: + return ret -def to_sif(item): +def to_sif(item, check_formula=True, parser: Parser = None): r""" the part aims to switch item to sif formate @@ -54,6 +67,10 @@ def to_sif(item): ---------- items:str a raw item which respects stem + check_formula: bool + whether to check the formulas when parsing item (only work when parser=None). + parser: Parser + the parser of item returned from is_sif. Returns ------- @@ -66,14 +83,20 @@ def to_sif(item): >>> siftext = to_sif(text) >>> siftext '某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$(单位...' + >>> ret = is_sif(text, return_parser=True) + >>> ret # doctest: +ELLIPSIS + (False, ) + >>> to_sif(text, parser=ret[1]) + '某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$(单位... + """ - item_parser = Parser(item) - item_parser.description_list() - item = item_parser.text - return item + if parser is not None: + return parser.text + else: + return is_sif(item, check_formula, return_parser=True)[1].text -def sif4sci(item: str, figures: (dict, bool) = None, safe=True, symbol: str = None, tokenization=True, +def sif4sci(item: str, figures: (dict, bool) = None, mode: int = 2, symbol: str = None, tokenization=True, tokenization_params=None, errors="raise"): r""" @@ -84,12 +107,15 @@ def sif4sci(item: str, figures: (dict, bool) = None, safe=True, symbol: str = No item:str a raw item which respects stem figures:dict - {"FigureID": Base64 encoding of the figure} + when it is a dict, it means the id-to-instance for figures in 'FormFigureID{...}' format, + when it is a bool, it means whether to instantiate figures in 'FormFigureBase64{...}' format - safe:bool - Check whether the text conforms to the sif format + mode: int + when safe = 2, use is_sif and check formula in item + when safe = 1, use is_sif but don't check formula in item + when safe = 0, don't use is_sif and don't check anything in item - symbol:str + symbol: str select the methods to symbolize: "t": text "f": formula @@ -98,17 +124,26 @@ def sif4sci(item: str, figures: (dict, bool) = None, safe=True, symbol: str = No "a": tag "s": sep - tokenization:bool - True: tokenize the item + tokenization: bool + whether to tokenize item after segmentation tokenization_params: - method: which tokenizer to be used, "linear" or "ast" - - The parameters only useful for "linear": None + the dict of text_params, formula_params and figure_params in tokenization + For formula_params: + method: which tokenizer to be used, "linear" or "ast" + The parameters only useful for "linear": + skip_figure_formula: whether to skip the formula in figure format + symbolize_figure_formula: whether to symbolize the formula in figure format + The parameters only useful for "ast": + ord2token: whether to transfer the variables (mathord) and constants (textord) to special tokens. + var_numbering: whether to use number suffix to denote different variables + return_type: 'list' or 'ast' + More parameters can be found in the definition in SIF.tokenization.formula + For figure_params: + figure_instance:whether to return instance of figures in tokens + For text_params: + See definition in SIF.tokenization.text - The parameters only useful for "ast": - ord2token: whether to transfer the variables (mathord) and constants (textord) to special tokens. - var_numbering: whether to use number suffix to denote different variables errors: warn, raise, @@ -214,8 +249,15 @@ def sif4sci(item: str, figures: (dict, bool) = None, safe=True, symbol: str = No [['已知'], ['说法', '中', '正确']] """ try: - if safe is True and is_sif(item) is not True: - item = to_sif(item) + if mode in [1, 2]: + check_formula = True if mode == 1 else False + sif, item_parser = is_sif(item, check_formula=check_formula, return_parser=True) + if sif is not True: + item = to_sif(item, parser=item_parser) + elif mode != 0: + raise KeyError( + "Unknown mode %s, use only 0 or 1 or 2." % mode + ) ret = seg(item, figures, symbol) diff --git a/examples/sif/sif.ipynb b/examples/sif/sif.ipynb index 25affe58..3758f3f8 100644 --- a/examples/sif/sif.ipynb +++ b/examples/sif/sif.ipynb @@ -34,12 +34,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "source": [ - "item = {\n", - " \"stem\": r\"如图来自古希腊数学家希波克拉底所研究的几何图形.此图由三个半圆构成,三个半圆的直径分别为直角三角形$ABC$的斜边$BC$, 直角边$AB$, $AC$.$\\bigtriangleup ABC$的三边所围成的区域记为$I$,黑色部分记为$II$, 其余部分记为$III$.在整个图形中随机取一点,此点取自$I,II,III$的概率分别记为$p_1,p_2,p_3$,则$\\SIFChoice$$\\FigureID{1}$\",\n", - " \"options\": [\"$p_1=p_2$\", \"$p_1=p_3$\", \"$p_2=p_3$\", \"$p_1=p_2+p_3$\"]\n", - "}\n", + "item = {\r\n", + " \"stem\": r\"如图来自古希腊数学家希波克拉底所研究的几何图形.此图由三个半圆构成,三个半圆的直径分别为直角三角形$ABC$的斜边$BC$, 直角边$AB$, $AC$.$\\bigtriangleup ABC$的三边所围成的区域记为$I$,黑色部分记为$II$, 其余部分记为$III$.在整个图形中随机取一点,此点取自$I,II,III$的概率分别记为$p_1,p_2,p_3$,则$\\SIFChoice$$\\FigureID{1}$\",\r\n", + " \"options\": [\"$p_1=p_2$\", \"$p_1=p_3$\", \"$p_2=p_3$\", \"$p_1=p_2+p_3$\"]\r\n", + "}\r\n", "item[\"stem\"]" ], "outputs": [ @@ -51,7 +51,7 @@ ] }, "metadata": {}, - "execution_count": 5 + "execution_count": 1 } ], "metadata": { @@ -70,24 +70,24 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "source": [ - "from PIL import Image\n", - "img = Image.open(\"../../asset/_static/item_figure.png\")\n", - "figures = {\"1\": img}\n", + "from PIL import Image\r\n", + "img = Image.open(\"../../asset/_static/item_figure.png\")\r\n", + "figures = {\"1\": img}\r\n", "img" ], "outputs": [ { "output_type": "execute_result", "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOYAAACICAYAAADzlJeRAAAe3klEQVR4nO2df1RT9/3/nyE/SWJyBdSUOgs72mKP1XhK24j2CGon4FagbtTMtpZt6rDzR7rjKt+2C+rnHGxdBbr2xB9zgc4j1NMJbpXg2hlsp2DnjrSHVaqcgquWbFqNLUgQkvf3DwgiSSA/7uUm8f04530O3Ny87zu5ed7X+8fr/XoJCCEEFAolrIjhuwEUCsUTKkwKJQyhwqRQwhAR3w2IBJqamuBwOHy+rtFokJKSMo4tokQ7VJgAGhoa0NzcDLvdjn//+99oa2uDw+FAa2srAIBhmFHf73K58O233wIAYmNj8cADDwAAZs+ejblz50Kr1UKn00Emk3H6OSjRg+Bum5Vtbm5Gc3MzrFYrPvnkE7S2toJhGPT09KC3t5f160mlUsTGxqKrqwsMw2D69OlYunQpdDodMjMzWb8eJTqIemHa7XbU19fjT3/6E/72t79BoVDA6XSiq6uL13YxDIOuri48/vjjyM/PR2ZmJpKSknhtEyV8iEph2mw21NbW4tChQzh16hSkUulQVzMckcvliImJgUwmQ3Z2NgwGA7RaLd/NovBIVAmzuroar732Gs6fPw8AuHnzJs8tCg61Wg2FQoFXX30VK1asGHOMS4k+Il6YNpsNf/jDH7Bz504ACGvLGChyuRxOpxOLFy/G+vXr6Zj0LiJihdnU1IQ33ngDdXV1cLlcoy5nRAMMw0AkEmHnzp14/vnn+W4OhWMiTpjNzc0oLCxEa2sr7HY7380Zd5RKJWQyGRVolBMxwrTZbFi/fj0++OAD3Lhxg+/m8A4VaHQT9sK02+347W9/i8rKyqgaP7KFUqmESqXCn//8Z+h0Or6bQ2GJsPaV3bdvH5KTk7Fv3z4qSh90dXXh66+/RlZWFjZs2HBXdu+jEhKGdHZ2Ep1OR9RqNQFAi59FJpORSZMmEYvFwuLdqCNlZRdYrI/iD2FnMfft24dZs2ahqamJjiUDxOFw4MqVK9Dr9XjqqadYsJ5tKJ+fjUNsNI4SGHw/Gdxcv36d5OXlUSvJUhGLxWTSpEmksbEx6HtyoSyNACBr6li80RS/CAuL2dDQgPvvvx/vv/8+tZIs0dfXhytXruBHP/oRSkpKAq+grRw78QrK0thvG2VseJ+VLS0txbZt2+ikBYeoVCrMmzcP1dXVfrr3WbB2/nlsPnk/dgr+Dw9eOImN07lrX0NDA4Db+17/+c9/4uuvv/Z67uzZs5GcnAwASEpKuqNEFXya65/97GeEYRjeu313QxGLxSQ5OZmcO3duzPtSt2YNqSOEkAtlJA2Df7NAT08PsVqtxGg0Ep1OR2JjYwkAwjAMYRiGiESigD6TUqkkDMMQpVJJABCtVkuys7OJ0WgMqQsfDvAizOvXr5NHH3106MbQMn4lLi6OHDlyxOe9cY8rb5fQhHnu3Dny8ssvk+TkZCISicbtQcwwDImNjSUZGRnEZDKR9vb2ED7F+DPuwmxvbycpKSlEIpHw/iO9W8uUKVPIO++843lzLpSRNcOXRurWEKSVkUAXSxobG0lhYSFJSEggKpUqYEvIdpHL5USpVJLExERiMpnI9evXQ/kJjwvjKsyzZ8+SxMRE3n+YtIBMnDiRvP3227dvzoUykjZChBfK0vwWZk9PDzGbzSQxMTGshydyuZwoFAqSnZ3N8novu4ybMM+ePUs0Gg3vN4aW2yUuLm5InAPH0siAwbxAytKGn+u7O9vZ2Uk2bdpEJkyYMDTWi5TCMAxJSEggZrN5nFTgP+MizPb2dmopw7TExcWRjRs3BnxPOzs7ycqVK4lcLue9qxpqUSqVYSdQzoXZ3t5OkpKSeP/yafFdGIbxW5w9PT3EaDSS+Ph4IhaLeW87m8Ut0JqaGk414Q+cCpOKMnIKwzBk165do97PqqoqotFoiEKh4L29XBa1Wk10Oh2vM7mcCfP69eskJSWF9y+ZFv/LlClTvC6luDcVqFQq3ts4noVhGGI0GklPTw9XMvEJJ54/DocDjzzyCFpaWtiuOuqRy+WQSCQecW6Tk5OhVqvvODcnJwcA8NFHH+G///0vrl27ht7eXvT19QUdnnPKlCloaGgYiixfX1+PZ555Bt98802QnyiyUSgUiI+Px5EjR8Y1ciEnwnzqqafw/vvvo6+vj+2qowaGYYYiuLtFl5OTM5RuQavVshodr6GhAXa7Hc3Nzaivr4fNZsNXX30FAIiJiUF/f//QuSqVCrt27cJf/vIXfPTRR9RdEkB8fDxKSkqwevXqcbke68Kkvq+eiEQiKJVKdHV1ITU1FUuXLkV6enpY5Dxxi7WjowN//etfceLECXz33XdwuVx3iJUyEFZ00aJFOHjwIPfpLtjsFzc2NpKEhATexwZ8F7frmUgkIjqdjpSUlESU7+bhw4eJRqMharWayOVyIpVKSUxMDO/fazgUsVhMZsyY4ZfPcSiwJszOzk4ybdo03r84vopMJiNKpZJotdqIE+JwTCYTSU9PJ+3t7aSqqoqsWLGCnDt3jphMJpKZmUkkEknULZMEU6ZMmULOnj3L2X1gRZg9PT0kNTWV9y+Lj6JWq0liYiLZvn17xDlKD6enp4esWLGC/PKXvyQ9PT3k448/Jh9//DHZtGkTKS0tveM8s9k8tDtEKBTyfg/4KhqNhrMHMCvCNBqNUb+2NVKMKpWKPPfcc5w+NceLc+fOEa1WS6qqqoYE6S49PT0kMzOTWK1Wj/e1t7eT7du3k8TERCKXy3m/L3yUhISEUXfrBEvIwmxvbw9rp2U2i1KpJNnZ2WHhGcIWVVVVRKfTkXPnznmI0l06OzuJVqslnZ2dPus5e/Ysee6554hSqSRSqZT3ezWexdf6byiEPCs7b948NDU1hVJFWCMSiSCXy5Gbm4utW7dGzU55h8OBoqIi2Gw2mEymMdecRSIRioqKYLFYRp2RdDgceOutt1BSUoLu7m5Oco6GI1OmTMHevXvx5JNPslJfSMLct28fNm/eHJVxemQyGWQyGX7yk59g27Zt0Gg0fDeJNWw2G/R6PXJycpCamur3+86cOYOLFy+itLR0zHMdDgcqKirw6quvoru7Gz09PaE0OSKIj4/Hhx9+yIojQtDCtNlsmDt3Lmw2W8iNCCcUCgViYmKwbt06bNmyJepS4NXX18NgMMBsNge1Trl//34sXLgwoLQMFRUV2Lx5M3p6etDd3R3wNSOJpKQkWK3WkHtWQUfJW7duXVS5aYnFYjAMg+LiYvzvf//Djh07ok6UxcXFKC8vR2NjY9DOAyaTCZWVlQENX55//nlcuXIFBw4cQHJyMmJjY4O6diTQ0dGBH/zgByFnnwvKYjY3NyM9PT1qurAMw2DZsmV46623ok6MwIB3T15eHhYuXIglS5aEXN/UqVORl5cHq9Ua1Pf1u9/9Djt27IDdbofT6Qy5PeGGRCKBVqvF6dOng64jKGEuWrQIVqs16IuGCxMmTMC9994Ls9kctQl5mpqaUFBQAJPJBJFIxFq9/f392Lp1a9C/A5vNhnXr1uH48eNR84AfjkqlwpYtW1BUVBTU+wMWZjRYS7FYDIVCgW3btmH9+vV8N4czysrKcOTIEVRVVaGtrY31+v/xj3/gxo0bwQWUHqShoQErV67E9evXo26CKJTJoIDHmC+++GJEi5JhGOTn56O9vT1qRWm326HX63Hx4kVYLBZORAkACxYsQGtrK2pra4OuIz09HZcvX8ZvfvMbxMfHs9e4MOCbb75BTk5OUOPNgCxmJFtLsViMyZMn47333ovabisAtLa2oqCgABs3bsTUqVM5v96sWbOQlZUFs9kc8k4Zm82GvLw8fPbZZ7h58yZLLeQXsViM/Px8HDhwIKD3BWQxI9VaMgyDRYsWoaWlJapFWVFRAb1eD7PZPC6iBICWlhaYzWbo9fqQZyI1Gg0aGxuxefPmqJmE6+vrw9GjR1FfXx/Q+/y2mE1NTcjKyoq4fZZxcXF45ZVXYDAY+G4KZzgcDhgMBjgcDphMJpw5c2bc23D16lVUVlaipqaGlfqamprw5JNP4sqVK6zUxzfJycn48ssv/T7fb4tpMpkiSpRisRjTpk3DyZMno1qUHR0dyMjIwJw5c/Dzn/+cF1ECQEJCAlJSUrBjxw5W6tPpdDh//jx0Oh3kcjkrdfLJ1atX8fvf/97v8/0SpsPhwKFDkZO+dOLEifjhD3+IL774gvcIAVxSW1uLvLw8mEwmzJo1i+/mYNmyZTh27NhQ9q5QYRgmarq23333HYqLi/3u7vslzOrqalbXwLgkISEB5eXlOHz4MPfhH3ikqKgIlZWVsFqtQQfe4oKamhoYDAZ0dHSwVmdxcTEsFgsSExNZq5MPent7/e5R+DXGnDlzJlpbW0NuGNdoNBpUVVUhPT2d76ZwhtsBfenSpViwYAHfzfGKvztRAqWjowNZWVkR8Vv0hUqlwhdffDHmpogxLWZra6vPJKLhRGJiIiwWS1SL0j0BZzQaw1aUwIBX0KpVq1BYWMhqvUlJSWhsbERqamrE9OBG4nQ6/Vo6GVOYe/bsCfs1pXvvvRcnT54c17if482OHTtgMBhgsVgi4kc5ffp0yGQy7N69m9V6GYbBxx9/jLS0tIh0hu/u7kZJScmYY80xhXngwIGwDWMokUiQkpKClpaWqNnAPBK3A/qNGzewc+dOzrx4uKC0tBTvvvsu6xvpZTIZTpw4gSeeeMIjCHYk4HQ6x1zXHHWM2dHRgYceeiisJhfcyOVyzJkzB3V1dRE/Y+eL5uZmFBQUwGg0IiEhge/mBMXUqVOh1+tRU1PDyWbzTZs2oaKiIuIcX8Za1xzVYtbX18PlcrHeqFCRSCSYM2cOjh8/HrWi3L17NwoLC1FTUxOxogSAS5cuoaSkBAUFBSF7BnmjrKwMRqMREydOZL1uLrl27Rqam5t9vj6qMA8dOhSW48v7778fx48fj8rlEIfDgYKCAnz66aewWq24dOkS300KGZFIhKVLlwa9BWosDAYDnnvuuYjq1nZ3d6OystLn6z67sg6HAxMnTuTkKRcKKSkpaGxsjEpL6XZAX7t2LaZPn853c1jn7bffRk5ODlasWMFJ/Xq9HrW1tWH3m/VFQkKCT5dDnxazqakp7CzStGnTgt41H+7U1tYObWiORlECgNlsxmuvvcbZOmRVVRUeffTRiJi1BgaWlXxNjPkU5ocffhhWkz6JiYk4ceJEVEWrc2MwGFBZWQmLxRJW3znbnDlzBjU1NdDr9Zz5XR87dgyPPPIIxGIxJ/WzSVdXF959912vr/nsyoZT+BCNRgOLxRJ165Tu/YdPP/10QGEkIx22d6KMxG63Y968eRHhIeSrO+vTYv7rX//itEH+kpCQgJqamqgTZUNDAzIyMlBaWnpXiRIYuKdz5sxBcXExJ/UzDAOr1Ypp06ZxUj+bOBwOr37FXoVpt9vDIoK2SqXCiy++GHWbm4uLi7F169aQwkhGOkuWLMHp06cD3kDsLxqNBseOHUNcXBwn9bOJt904XoXZ3NzMu7uTWCyGVqvlbIqdD+x2O7KysgAA27dvHzMtQbRTVVWFoqIiVneiDMe9P1SlUnFSPxt0dXXhk08+8TjuU5h8RyybPHkyjhw5wmsb2KSpqQkZGRnYuHEjK7Fdo4GWlhZUVVUhLy+PsyWO1atXY/HixWE9U+ut1+BVmGfPnuW1K5uQkID33nsvapZFdu/ejaKiItTU1ECpVPLdnLDi6tWreOmll1BQUMDZNQ4ePBjWezm/+uorjweTV2F+9tln49Igb0TTuNLhcECv1+PTTz+FxWKJCi8eLpg6dSo0Gg3Kyso4qV8mk4X1eFOpVHq453kVJl8/ILFYjMWLF0fFuLK1tRXz5s1DTk4OVq5cyVssnkihpKSE1bAkIwnn8abL5fIYZ/ucleWD+Ph4HDx4kJdrs0l1dTX0ej2qqqrGLYxkpHPmzBmYzWYYDAbOMsi5x5tCoZCT+oOlt7fX4zPfIUzi6se3Xd9BoIoF1ApgHD3yGIaB2WwOOzfAQHA4HCgsLMSRI0fQ2NiIq1ev8t2kiKKtrQ0mk4mVGLW++OMf/xh2Xdre3l6cP3/+jmN3CFMQI8K1K/+DTCwGbnYD4+QLLBaL8dhjjyEzM3N8LsgB7ng0c+bMwQsvvEC7rkHS39+PnJwczoYzDMPgjTfeCLsu7UgvJY+ubMfFi4iJCTptZlCoVCpUVFSM6zXZpL6+Hnl5eSgpKQmLMJKRTmpqKux2O2e/iWeffRbf//73Oak7WDo7O+/432Nxx27/dvAvMaCWDPzpugV81wfIZIBUCPQOWlOxGJAPntM7zMIOPw4ncGOY6R3x2gSXGEaj0dM5nThxy+kCAQBBDCRCIQQjj0MAsUiEGBD09/fDCcHA+MHZDycAxIggFbhunx8jgjRGENAXNhbFxcU4ffo0rFbrXe8wwCYmkwlZWVlISUnhZIbebDZj8eLFuHbtGut1B8PIYY+HabTbrw9GLegDbt4C4BwQJQA4nED/rUEBigfGoDe6gRu3AKkMEA8elwsHusI3uoF+IaBwe/p7vjbpnslesm4R9DsJYoRiSEUiCOGCk4w8LoY4hqDPRQAIIBLGQAACp9OJGJEYUmEMBK5+9LoA8dD/TrAVj8FutyMjIwMA8PLLL1NRsox7MqiwsJCTyUitVouVK1eG7S6U0fusfX1AL24LSyYEnH23/46RDEwSqSUAhIBw8LjLCQyehlu3AJHw9nuGvaaWSlFd9a6nWIgTzhghRAJgQHTigb+JE06BAMJBoxcjGCk2AYRC0bAPJYAwZtDSDuIKOE2vJ01NTZg3bx6MRiP14uGQS5cuobS0FHl5eZzU//rrr2PChAmc1B0qvoXpFpbDOSAsMQCh884Jof5bgxZzsLhfixllOnrwNZFIhPzlyzFn9hzv5xF3d3XkceL9+DhRVlYGg8EAq9Ua1m5e0QKXYUlkMhn2798flt5YQ8J0ufrQ6yIDyxXEOeyUQaspV9y2lsCgYCXDllQGu7YOJwDh7eNCIdDvvP2ewddUKhXeLC8FiRF6eToIICDu7isAkAFLJxBCCIL+IbNHfLyffdxhJC9evBhxYSQjHTYS5PoiNzc3LCfshn7TAggAVz/iJ09GTP+ICRKHE8AIa4k+oNcJSBWD3Vnh4OuDY1P3cbET6O67/Z6bt6BQxWHz/9sCgUQGydBkDEF/fx/6BwUoEQrgdPaht78Pvf1OYKhbGwO4+geOuwSD7yfod7pAQOB09sM15v+B0dzcjKysLKxatQrLly8P8N0UNuAyLInJZAo7q+kRwaChoQF5eXkjBtxiQNbH2rpmXFwcLl++HBHOBBUVFdizZw/MZjN1GOCZhIQE6PV6NDY2sv7b4Ttix8hIBv71AmVCwDn2af6gUChQVFQU9qJ0e/GcOHECVquVijIMuHr1KoxGI/R6Pet179q1i1erOdJ100OYdwhGMdgdheP2ZFCISKVS/OpXv2KnMo4Il2SwFE/YTpDrRqvVYtKkSazWGQgjDZWHMDUaze1wF90jZltDJBKsZbglg6V4wnaCXDe7du3iLXv1mBYzKSmJsxCK4W4twzUZLMUTLhLk5ubmYsqUKazVFwgjrbXXMSYXDr7hbC1tNhvmzZsHtVqNX//619SLJwJoaWmByWRiPScKH1ZTLpdj9uzZdxzzKsz4+HjWLy4UCvHMM8+wXm+oNDQ0ICsrC6WlpWGdDJbiCRcJcnNzczF58mTW6vMHiUTiEZ7VqzC5MOcPP/xw2EVR37FjB7Zu3QqLxXLXhpGMdLhIkPvSSy+Nq1dXT08PUlJS7jjmVZhs77pXq9XYsGEDq3WGwvBksNu3b6dePBEO2wlyc3NzIZFIxj6RJaRSqUfgOa/CfOCBB1h9YgiFwrDZBN3c3IyMjAysWrUKy5Yt47s5FBZgOyyJRqPBY489xkLL/OPBBx/0OOZVmAsWLGBtsVUkEmH58uVhMekTLclgKZ6wnSB3w4YNkEqlLLRsdEQiERYvXuxx3KswdTodawGfFQoF1q1bx0pdwRKNyWApnrC5EyUzM3NcjIlSqfS6ddCrMBmGYW1m9p577uE1IZA7jOTChQtpGMm7gNTUVNhsNlRXV4dUj0wmQ05ODkut8o3D4fAaocGnrywbYpLL5di4cWPI9QRLdXU1CgoKUFVVFbXJYCmesLUTpbCwkHOrqdVqvV7DpzCfeOKJkPvYTqeTs7Teo+FwOGAwGHDkyBFYLBbqgH6XwVaCXJ1Ox2mEA6lUiqefftrraz6FqdPpQs74NXPmzHHPP2Kz2ZCVlYX77rsPL7zwAvXiuUu5dOkSjEZjyDlR1qxZw1KLPJFKpT5XK0btyjqdwe/1EolE474c4U4GW1JSctclg6V4wkaC3CVLlnAWsCs+Pt7DscCNT2HKZDI8/vjjQV/U12wTV9BksBRvhJogV6fTcZJSYazVilE3Sj/77LNBO7Q7nc5xydhFk8FSxiKUBLkymQxz585lv1EAfvGLX/h8bVRhZmZmBt2dTU1N5XxGyx1GkiaDpYxGqAly8/PzIRCwGyh84cKFo86/jCpMhmHw0EMPBXxRqVSKJ598MuD3BUJZWRmKiopgtVrDLpASJfwIJUFueno6q+NMhmG8BDm/kzFj/qxatSrg/WmjzTaFit1uh16vx8WLF2GxWKgDOsVvgk2Qy7aDzMSJE8fUx5jCzM3NDfjCEonE52xTKLS2tiIrKws5OTlYvnw59eKhBEywCXLZGmeq1Wrs2rVrzPPGFGYwnvaPPvpoQOf7gzsZrNlspslgKUET7E6UH//4x6xcPy4uzi9j51f4yg0bNkCtVvt1YZFIFNIyy0hoMlgK2wSTIHfJkiUhp6f011oCXgI+++Lee+/F119/7dfFKyoqguoCj6SjowMFBQV4+umnacQ6CuucOXMGFy9eRGlp6ZjnOhwOyOVy+CkXryQnJ+PLL7/061y/HwEGgwEKhWLM8wghrIwvaTJYCtcEkiBXJpOFtPynVqtx+PBhv8/3W5ijLYYO5+bNmyELs6ioCOXl5bBardSLh8IpJpMJlZWVfoUlCTZmlUgkQn5+fkCzu34Lk2EY/PSnPx0z5Mj3vvc9vy8+EpvNhoyMDKjVapoMljIuBJIgN9iZWZVKhTfffDOg9wQ0ml23bt2Y3dmZM2cG1AA3TU1NyMrKgtFopGEkKeOKvwlytVptwB5AarUa+/fvD7gbHJAwtVot8vPzfVrNYGdk3clgLRYLTQZL4QV/wpI88sgjAdUpFouxaNGioCZCA57/3bZtm09PIIVCEdD4kiaDpYQTYyXITUlJCchi3nPPPTh48GBQbQlYmBqNBqtWrfJqmgOZkaXJYCnhyGhhSZKSkvxeLomLi8OxY8eCnskNasX09ddf92o1XS6XXw2pqKhAYWEhzGYzDSNJCStaWlpgNpt9Oh/4EwharVZjx44dIa1OBCVMmUyGoqIij4kgh8Mx6pSyO4wkTQZLCWdGS5A7Vhws97hy9erVIbXBb8+fkTgcDsyePRsXLly447iv6lpbW1FQUIC1a9fSiHWUiODo0aNQq9XYsmXL0LH4+Hhcu3bN6/kSiQQPP/wwjh8/HvJe5KCd/2QyGd555507Nnv6inZQW1uLgoICmEwmKkpKxOAtQe5oET1mz57NiiiBEIQJDMRDefbZZ4ca4q3/bTAYUFlZCYvFQpPBUiKOkQlyfW2YTklJwQcffMBa1I7Q3OUxMBHk9vYZvh3LnQz2vvvuo8lgKRHLyAS53nZZpaSkoLGxkdVQrSELUyaT4dChQ3eY+OHJYGkYSUqkMzxB7vAJT3dAALZFCbAgTGDAIygtLQ0zZsygyWApUYk7Qa5UKoVQKERsbCy0Wi0nogQCEKZlrQACwYgyvxxuX50lS5bAarWitbUVf//736kXDyXqKC0txX/+8x/I5XLk5OTg9OnTnGUaCGi5pK18PmZ8/grIniwAFqwVZGPvmjqQPVk4cOAAtmzZgsuXL7PeSK1Wy8oXkJ6ePvR3RUVFUHFGKRSRSIS+vj5OrxGAMNtQPn8GPn+FYM9AfGVY1gqQjbpBoVIo0U9HRwc6OjrueMgDACxrIcjeO/TvmjqCPViLtdiDYOTh/xiz7SgOnVqD3MGLtJXPR/beNJRtpqKk3D0kJSWNEGUbyucLIMgG6ggBGSy5tQIIslvw4IzgruO3MNuOHsIp7EX24PhyFSpByElspP4ClLuWgV7kJpThAtmD4SYqa08d1mAW7g9SH34Ksw1HD53CmrrBJ8KFMmDTDMwvpxM8lLuXtvJV2HQqDWWVG+Gpvxl4sCwXwfYn/RtjtpVj/ozP8cqwp4JlrQDZe9egbsSTgkK5Oxi0lrO4mWPxy2K2HT2EU2uGq78N51sApD2IILvQFEqEcwGfnwLSgh1EjoEfcTwGu7Gv3JalZe0MbDoFrKnzZsIplLuAtvPg0sl0TIspEAyIcG/2bceC7L1pKLtAgpoGplCigun3YxaAU59f8Pqypfy2800wBL0fk0K56xlcu0wru4CTQ8sTnuv9wUCFSaGEQls55s/YhFNDB9JQdiH0ZUQqTAolDGFldwmFQmEXKkwKJQz5/6GFNae7ZI7lAAAAAElFTkSuQmCC", "text/plain": [ - "" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOYAAACICAYAAADzlJeRAAAe3klEQVR4nO2df1RT9/3/nyE/SWJyBdSUOgs72mKP1XhK24j2CGon4FagbtTMtpZt6rDzR7rjKt+2C+rnHGxdBbr2xB9zgc4j1NMJbpXg2hlsp2DnjrSHVaqcgquWbFqNLUgQkvf3DwgiSSA/7uUm8f04530O3Ny87zu5ed7X+8fr/XoJCCEEFAolrIjhuwEUCsUTKkwKJQyhwqRQwhAR3w2IBJqamuBwOHy+rtFokJKSMo4tokQ7VJgAGhoa0NzcDLvdjn//+99oa2uDw+FAa2srAIBhmFHf73K58O233wIAYmNj8cADDwAAZs+ejblz50Kr1UKn00Emk3H6OSjRg+Bum5Vtbm5Gc3MzrFYrPvnkE7S2toJhGPT09KC3t5f160mlUsTGxqKrqwsMw2D69OlYunQpdDodMjMzWb8eJTqIemHa7XbU19fjT3/6E/72t79BoVDA6XSiq6uL13YxDIOuri48/vjjyM/PR2ZmJpKSknhtEyV8iEph2mw21NbW4tChQzh16hSkUulQVzMckcvliImJgUwmQ3Z2NgwGA7RaLd/NovBIVAmzuroar732Gs6fPw8AuHnzJs8tCg61Wg2FQoFXX30VK1asGHOMS4k+Il6YNpsNf/jDH7Bz504ACGvLGChyuRxOpxOLFy/G+vXr6Zj0LiJihdnU1IQ33ngDdXV1cLlcoy5nRAMMw0AkEmHnzp14/vnn+W4OhWMiTpjNzc0oLCxEa2sr7HY7380Zd5RKJWQyGRVolBMxwrTZbFi/fj0++OAD3Lhxg+/m8A4VaHQT9sK02+347W9/i8rKyqgaP7KFUqmESqXCn//8Z+h0Or6bQ2GJsPaV3bdvH5KTk7Fv3z4qSh90dXXh66+/RlZWFjZs2HBXdu+jEhKGdHZ2Ep1OR9RqNQFAi59FJpORSZMmEYvFwuLdqCNlZRdYrI/iD2FnMfft24dZs2ahqamJjiUDxOFw4MqVK9Dr9XjqqadYsJ5tKJ+fjUNsNI4SGHw/Gdxcv36d5OXlUSvJUhGLxWTSpEmksbEx6HtyoSyNACBr6li80RS/CAuL2dDQgPvvvx/vv/8+tZIs0dfXhytXruBHP/oRSkpKAq+grRw78QrK0thvG2VseJ+VLS0txbZt2+ikBYeoVCrMmzcP1dXVfrr3WbB2/nlsPnk/dgr+Dw9eOImN07lrX0NDA4Db+17/+c9/4uuvv/Z67uzZs5GcnAwASEpKuqNEFXya65/97GeEYRjeu313QxGLxSQ5OZmcO3duzPtSt2YNqSOEkAtlJA2Df7NAT08PsVqtxGg0Ep1OR2JjYwkAwjAMYRiGiESigD6TUqkkDMMQpVJJABCtVkuys7OJ0WgMqQsfDvAizOvXr5NHH3106MbQMn4lLi6OHDlyxOe9cY8rb5fQhHnu3Dny8ssvk+TkZCISicbtQcwwDImNjSUZGRnEZDKR9vb2ED7F+DPuwmxvbycpKSlEIpHw/iO9W8uUKVPIO++843lzLpSRNcOXRurWEKSVkUAXSxobG0lhYSFJSEggKpUqYEvIdpHL5USpVJLExERiMpnI9evXQ/kJjwvjKsyzZ8+SxMRE3n+YtIBMnDiRvP3227dvzoUykjZChBfK0vwWZk9PDzGbzSQxMTGshydyuZwoFAqSnZ3N8novu4ybMM+ePUs0Gg3vN4aW2yUuLm5InAPH0siAwbxAytKGn+u7O9vZ2Uk2bdpEJkyYMDTWi5TCMAxJSEggZrN5nFTgP+MizPb2dmopw7TExcWRjRs3BnxPOzs7ycqVK4lcLue9qxpqUSqVYSdQzoXZ3t5OkpKSeP/yafFdGIbxW5w9PT3EaDSS+Ph4IhaLeW87m8Ut0JqaGk414Q+cCpOKMnIKwzBk165do97PqqoqotFoiEKh4L29XBa1Wk10Oh2vM7mcCfP69eskJSWF9y+ZFv/LlClTvC6luDcVqFQq3ts4noVhGGI0GklPTw9XMvEJJ54/DocDjzzyCFpaWtiuOuqRy+WQSCQecW6Tk5OhVqvvODcnJwcA8NFHH+G///0vrl27ht7eXvT19QUdnnPKlCloaGgYiixfX1+PZ555Bt98802QnyiyUSgUiI+Px5EjR8Y1ciEnwnzqqafw/vvvo6+vj+2qowaGYYYiuLtFl5OTM5RuQavVshodr6GhAXa7Hc3Nzaivr4fNZsNXX30FAIiJiUF/f//QuSqVCrt27cJf/vIXfPTRR9RdEkB8fDxKSkqwevXqcbke68Kkvq+eiEQiKJVKdHV1ITU1FUuXLkV6enpY5Dxxi7WjowN//etfceLECXz33XdwuVx3iJUyEFZ00aJFOHjwIPfpLtjsFzc2NpKEhATexwZ8F7frmUgkIjqdjpSUlESU7+bhw4eJRqMharWayOVyIpVKSUxMDO/fazgUsVhMZsyY4ZfPcSiwJszOzk4ybdo03r84vopMJiNKpZJotdqIE+JwTCYTSU9PJ+3t7aSqqoqsWLGCnDt3jphMJpKZmUkkEknULZMEU6ZMmULOnj3L2X1gRZg9PT0kNTWV9y+Lj6JWq0liYiLZvn17xDlKD6enp4esWLGC/PKXvyQ9PT3k448/Jh9//DHZtGkTKS0tveM8s9k8tDtEKBTyfg/4KhqNhrMHMCvCNBqNUb+2NVKMKpWKPPfcc5w+NceLc+fOEa1WS6qqqoYE6S49PT0kMzOTWK1Wj/e1t7eT7du3k8TERCKXy3m/L3yUhISEUXfrBEvIwmxvbw9rp2U2i1KpJNnZ2WHhGcIWVVVVRKfTkXPnznmI0l06OzuJVqslnZ2dPus5e/Ysee6554hSqSRSqZT3ezWexdf6byiEPCs7b948NDU1hVJFWCMSiSCXy5Gbm4utW7dGzU55h8OBoqIi2Gw2mEymMdecRSIRioqKYLFYRp2RdDgceOutt1BSUoLu7m5Oco6GI1OmTMHevXvx5JNPslJfSMLct28fNm/eHJVxemQyGWQyGX7yk59g27Zt0Gg0fDeJNWw2G/R6PXJycpCamur3+86cOYOLFy+itLR0zHMdDgcqKirw6quvoru7Gz09PaE0OSKIj4/Hhx9+yIojQtDCtNlsmDt3Lmw2W8iNCCcUCgViYmKwbt06bNmyJepS4NXX18NgMMBsNge1Trl//34sXLgwoLQMFRUV2Lx5M3p6etDd3R3wNSOJpKQkWK3WkHtWQUfJW7duXVS5aYnFYjAMg+LiYvzvf//Djh07ok6UxcXFKC8vR2NjY9DOAyaTCZWVlQENX55//nlcuXIFBw4cQHJyMmJjY4O6diTQ0dGBH/zgByFnnwvKYjY3NyM9PT1qurAMw2DZsmV46623ok6MwIB3T15eHhYuXIglS5aEXN/UqVORl5cHq9Ua1Pf1u9/9Djt27IDdbofT6Qy5PeGGRCKBVqvF6dOng64jKGEuWrQIVqs16IuGCxMmTMC9994Ls9kctQl5mpqaUFBQAJPJBJFIxFq9/f392Lp1a9C/A5vNhnXr1uH48eNR84AfjkqlwpYtW1BUVBTU+wMWZjRYS7FYDIVCgW3btmH9+vV8N4czysrKcOTIEVRVVaGtrY31+v/xj3/gxo0bwQWUHqShoQErV67E9evXo26CKJTJoIDHmC+++GJEi5JhGOTn56O9vT1qRWm326HX63Hx4kVYLBZORAkACxYsQGtrK2pra4OuIz09HZcvX8ZvfvMbxMfHs9e4MOCbb75BTk5OUOPNgCxmJFtLsViMyZMn47333ovabisAtLa2oqCgABs3bsTUqVM5v96sWbOQlZUFs9kc8k4Zm82GvLw8fPbZZ7h58yZLLeQXsViM/Px8HDhwIKD3BWQxI9VaMgyDRYsWoaWlJapFWVFRAb1eD7PZPC6iBICWlhaYzWbo9fqQZyI1Gg0aGxuxefPmqJmE6+vrw9GjR1FfXx/Q+/y2mE1NTcjKyoq4fZZxcXF45ZVXYDAY+G4KZzgcDhgMBjgcDphMJpw5c2bc23D16lVUVlaipqaGlfqamprw5JNP4sqVK6zUxzfJycn48ssv/T7fb4tpMpkiSpRisRjTpk3DyZMno1qUHR0dyMjIwJw5c/Dzn/+cF1ECQEJCAlJSUrBjxw5W6tPpdDh//jx0Oh3kcjkrdfLJ1atX8fvf/97v8/0SpsPhwKFDkZO+dOLEifjhD3+IL774gvcIAVxSW1uLvLw8mEwmzJo1i+/mYNmyZTh27NhQ9q5QYRgmarq23333HYqLi/3u7vslzOrqalbXwLgkISEB5eXlOHz4MPfhH3ikqKgIlZWVsFqtQQfe4oKamhoYDAZ0dHSwVmdxcTEsFgsSExNZq5MPent7/e5R+DXGnDlzJlpbW0NuGNdoNBpUVVUhPT2d76ZwhtsBfenSpViwYAHfzfGKvztRAqWjowNZWVkR8Vv0hUqlwhdffDHmpogxLWZra6vPJKLhRGJiIiwWS1SL0j0BZzQaw1aUwIBX0KpVq1BYWMhqvUlJSWhsbERqamrE9OBG4nQ6/Vo6GVOYe/bsCfs1pXvvvRcnT54c17if482OHTtgMBhgsVgi4kc5ffp0yGQy7N69m9V6GYbBxx9/jLS0tIh0hu/u7kZJScmYY80xhXngwIGwDWMokUiQkpKClpaWqNnAPBK3A/qNGzewc+dOzrx4uKC0tBTvvvsu6xvpZTIZTpw4gSeeeMIjCHYk4HQ6x1zXHHWM2dHRgYceeiisJhfcyOVyzJkzB3V1dRE/Y+eL5uZmFBQUwGg0IiEhge/mBMXUqVOh1+tRU1PDyWbzTZs2oaKiIuIcX8Za1xzVYtbX18PlcrHeqFCRSCSYM2cOjh8/HrWi3L17NwoLC1FTUxOxogSAS5cuoaSkBAUFBSF7BnmjrKwMRqMREydOZL1uLrl27Rqam5t9vj6qMA8dOhSW48v7778fx48fj8rlEIfDgYKCAnz66aewWq24dOkS300KGZFIhKVLlwa9BWosDAYDnnvuuYjq1nZ3d6OystLn6z67sg6HAxMnTuTkKRcKKSkpaGxsjEpL6XZAX7t2LaZPn853c1jn7bffRk5ODlasWMFJ/Xq9HrW1tWH3m/VFQkKCT5dDnxazqakp7CzStGnTgt41H+7U1tYObWiORlECgNlsxmuvvcbZOmRVVRUeffTRiJi1BgaWlXxNjPkU5ocffhhWkz6JiYk4ceJEVEWrc2MwGFBZWQmLxRJW3znbnDlzBjU1NdDr9Zz5XR87dgyPPPIIxGIxJ/WzSVdXF959912vr/nsyoZT+BCNRgOLxRJ165Tu/YdPP/10QGEkIx22d6KMxG63Y968eRHhIeSrO+vTYv7rX//itEH+kpCQgJqamqgTZUNDAzIyMlBaWnpXiRIYuKdz5sxBcXExJ/UzDAOr1Ypp06ZxUj+bOBwOr37FXoVpt9vDIoK2SqXCiy++GHWbm4uLi7F169aQwkhGOkuWLMHp06cD3kDsLxqNBseOHUNcXBwn9bOJt904XoXZ3NzMu7uTWCyGVqvlbIqdD+x2O7KysgAA27dvHzMtQbRTVVWFoqIiVneiDMe9P1SlUnFSPxt0dXXhk08+8TjuU5h8RyybPHkyjhw5wmsb2KSpqQkZGRnYuHEjK7Fdo4GWlhZUVVUhLy+PsyWO1atXY/HixWE9U+ut1+BVmGfPnuW1K5uQkID33nsvapZFdu/ejaKiItTU1ECpVPLdnLDi6tWreOmll1BQUMDZNQ4ePBjWezm/+uorjweTV2F+9tln49Igb0TTuNLhcECv1+PTTz+FxWKJCi8eLpg6dSo0Gg3Kyso4qV8mk4X1eFOpVHq453kVJl8/ILFYjMWLF0fFuLK1tRXz5s1DTk4OVq5cyVssnkihpKSE1bAkIwnn8abL5fIYZ/ucleWD+Ph4HDx4kJdrs0l1dTX0ej2qqqrGLYxkpHPmzBmYzWYYDAbOMsi5x5tCoZCT+oOlt7fX4zPfIUzi6se3Xd9BoIoF1ApgHD3yGIaB2WwOOzfAQHA4HCgsLMSRI0fQ2NiIq1ev8t2kiKKtrQ0mk4mVGLW++OMf/xh2Xdre3l6cP3/+jmN3CFMQI8K1K/+DTCwGbnYD4+QLLBaL8dhjjyEzM3N8LsgB7ng0c+bMwQsvvEC7rkHS39+PnJwczoYzDMPgjTfeCLsu7UgvJY+ubMfFi4iJCTptZlCoVCpUVFSM6zXZpL6+Hnl5eSgpKQmLMJKRTmpqKux2O2e/iWeffRbf//73Oak7WDo7O+/432Nxx27/dvAvMaCWDPzpugV81wfIZIBUCPQOWlOxGJAPntM7zMIOPw4ncGOY6R3x2gSXGEaj0dM5nThxy+kCAQBBDCRCIQQjj0MAsUiEGBD09/fDCcHA+MHZDycAxIggFbhunx8jgjRGENAXNhbFxcU4ffo0rFbrXe8wwCYmkwlZWVlISUnhZIbebDZj8eLFuHbtGut1B8PIYY+HabTbrw9GLegDbt4C4BwQJQA4nED/rUEBigfGoDe6gRu3AKkMEA8elwsHusI3uoF+IaBwe/p7vjbpnslesm4R9DsJYoRiSEUiCOGCk4w8LoY4hqDPRQAIIBLGQAACp9OJGJEYUmEMBK5+9LoA8dD/TrAVj8FutyMjIwMA8PLLL1NRsox7MqiwsJCTyUitVouVK1eG7S6U0fusfX1AL24LSyYEnH23/46RDEwSqSUAhIBw8LjLCQyehlu3AJHw9nuGvaaWSlFd9a6nWIgTzhghRAJgQHTigb+JE06BAMJBoxcjGCk2AYRC0bAPJYAwZtDSDuIKOE2vJ01NTZg3bx6MRiP14uGQS5cuobS0FHl5eZzU//rrr2PChAmc1B0qvoXpFpbDOSAsMQCh884Jof5bgxZzsLhfixllOnrwNZFIhPzlyzFn9hzv5xF3d3XkceL9+DhRVlYGg8EAq9Ua1m5e0QKXYUlkMhn2798flt5YQ8J0ufrQ6yIDyxXEOeyUQaspV9y2lsCgYCXDllQGu7YOJwDh7eNCIdDvvP2ewddUKhXeLC8FiRF6eToIICDu7isAkAFLJxBCCIL+IbNHfLyffdxhJC9evBhxYSQjHTYS5PoiNzc3LCfshn7TAggAVz/iJ09GTP+ICRKHE8AIa4k+oNcJSBWD3Vnh4OuDY1P3cbET6O67/Z6bt6BQxWHz/9sCgUQGydBkDEF/fx/6BwUoEQrgdPaht78Pvf1OYKhbGwO4+geOuwSD7yfod7pAQOB09sM15v+B0dzcjKysLKxatQrLly8P8N0UNuAyLInJZAo7q+kRwaChoQF5eXkjBtxiQNbH2rpmXFwcLl++HBHOBBUVFdizZw/MZjN1GOCZhIQE6PV6NDY2sv7b4Ttix8hIBv71AmVCwDn2af6gUChQVFQU9qJ0e/GcOHECVquVijIMuHr1KoxGI/R6Pet179q1i1erOdJ100OYdwhGMdgdheP2ZFCISKVS/OpXv2KnMo4Il2SwFE/YTpDrRqvVYtKkSazWGQgjDZWHMDUaze1wF90jZltDJBKsZbglg6V4wnaCXDe7du3iLXv1mBYzKSmJsxCK4W4twzUZLMUTLhLk5ubmYsqUKazVFwgjrbXXMSYXDr7hbC1tNhvmzZsHtVqNX//619SLJwJoaWmByWRiPScKH1ZTLpdj9uzZdxzzKsz4+HjWLy4UCvHMM8+wXm+oNDQ0ICsrC6WlpWGdDJbiCRcJcnNzczF58mTW6vMHiUTiEZ7VqzC5MOcPP/xw2EVR37FjB7Zu3QqLxXLXhpGMdLhIkPvSSy+Nq1dXT08PUlJS7jjmVZhs77pXq9XYsGEDq3WGwvBksNu3b6dePBEO2wlyc3NzIZFIxj6RJaRSqUfgOa/CfOCBB1h9YgiFwrDZBN3c3IyMjAysWrUKy5Yt47s5FBZgOyyJRqPBY489xkLL/OPBBx/0OOZVmAsWLGBtsVUkEmH58uVhMekTLclgKZ6wnSB3w4YNkEqlLLRsdEQiERYvXuxx3KswdTodawGfFQoF1q1bx0pdwRKNyWApnrC5EyUzM3NcjIlSqfS6ddCrMBmGYW1m9p577uE1IZA7jOTChQtpGMm7gNTUVNhsNlRXV4dUj0wmQ05ODkut8o3D4fAaocGnrywbYpLL5di4cWPI9QRLdXU1CgoKUFVVFbXJYCmesLUTpbCwkHOrqdVqvV7DpzCfeOKJkPvYTqeTs7Teo+FwOGAwGHDkyBFYLBbqgH6XwVaCXJ1Ox2mEA6lUiqefftrraz6FqdPpQs74NXPmzHHPP2Kz2ZCVlYX77rsPL7zwAvXiuUu5dOkSjEZjyDlR1qxZw1KLPJFKpT5XK0btyjqdwe/1EolE474c4U4GW1JSctclg6V4wkaC3CVLlnAWsCs+Pt7DscCNT2HKZDI8/vjjQV/U12wTV9BksBRvhJogV6fTcZJSYazVilE3Sj/77LNBO7Q7nc5xydhFk8FSxiKUBLkymQxz585lv1EAfvGLX/h8bVRhZmZmBt2dTU1N5XxGyx1GkiaDpYxGqAly8/PzIRCwGyh84cKFo86/jCpMhmHw0EMPBXxRqVSKJ598MuD3BUJZWRmKiopgtVrDLpASJfwIJUFueno6q+NMhmG8BDm/kzFj/qxatSrg/WmjzTaFit1uh16vx8WLF2GxWKgDOsVvgk2Qy7aDzMSJE8fUx5jCzM3NDfjCEonE52xTKLS2tiIrKws5OTlYvnw59eKhBEywCXLZGmeq1Wrs2rVrzPPGFGYwnvaPPvpoQOf7gzsZrNlspslgKUET7E6UH//4x6xcPy4uzi9j51f4yg0bNkCtVvt1YZFIFNIyy0hoMlgK2wSTIHfJkiUhp6f011oCXgI+++Lee+/F119/7dfFKyoqguoCj6SjowMFBQV4+umnacQ6CuucOXMGFy9eRGlp6ZjnOhwOyOVy+CkXryQnJ+PLL7/061y/HwEGgwEKhWLM8wghrIwvaTJYCtcEkiBXJpOFtPynVqtx+PBhv8/3W5ijLYYO5+bNmyELs6ioCOXl5bBardSLh8IpJpMJlZWVfoUlCTZmlUgkQn5+fkCzu34Lk2EY/PSnPx0z5Mj3vvc9vy8+EpvNhoyMDKjVapoMljIuBJIgN9iZWZVKhTfffDOg9wQ0ml23bt2Y3dmZM2cG1AA3TU1NyMrKgtFopGEkKeOKvwlytVptwB5AarUa+/fvD7gbHJAwtVot8vPzfVrNYGdk3clgLRYLTQZL4QV/wpI88sgjAdUpFouxaNGioCZCA57/3bZtm09PIIVCEdD4kiaDpYQTYyXITUlJCchi3nPPPTh48GBQbQlYmBqNBqtWrfJqmgOZkaXJYCnhyGhhSZKSkvxeLomLi8OxY8eCnskNasX09ddf92o1XS6XXw2pqKhAYWEhzGYzDSNJCStaWlpgNpt9Oh/4EwharVZjx44dIa1OBCVMmUyGoqIij4kgh8Mx6pSyO4wkTQZLCWdGS5A7Vhws97hy9erVIbXBb8+fkTgcDsyePRsXLly447iv6lpbW1FQUIC1a9fSiHWUiODo0aNQq9XYsmXL0LH4+Hhcu3bN6/kSiQQPP/wwjh8/HvJe5KCd/2QyGd555507Nnv6inZQW1uLgoICmEwmKkpKxOAtQe5oET1mz57NiiiBEIQJDMRDefbZZ4ca4q3/bTAYUFlZCYvFQpPBUiKOkQlyfW2YTklJwQcffMBa1I7Q3OUxMBHk9vYZvh3LnQz2vvvuo8lgKRHLyAS53nZZpaSkoLGxkdVQrSELUyaT4dChQ3eY+OHJYGkYSUqkMzxB7vAJT3dAALZFCbAgTGDAIygtLQ0zZsygyWApUYk7Qa5UKoVQKERsbCy0Wi0nogQCEKZlrQACwYgyvxxuX50lS5bAarWitbUVf//736kXDyXqKC0txX/+8x/I5XLk5OTg9OnTnGUaCGi5pK18PmZ8/grIniwAFqwVZGPvmjqQPVk4cOAAtmzZgsuXL7PeSK1Wy8oXkJ6ePvR3RUVFUHFGKRSRSIS+vj5OrxGAMNtQPn8GPn+FYM9AfGVY1gqQjbpBoVIo0U9HRwc6OjrueMgDACxrIcjeO/TvmjqCPViLtdiDYOTh/xiz7SgOnVqD3MGLtJXPR/beNJRtpqKk3D0kJSWNEGUbyucLIMgG6ggBGSy5tQIIslvw4IzgruO3MNuOHsIp7EX24PhyFSpByElspP4ClLuWgV7kJpThAtmD4SYqa08d1mAW7g9SH34Ksw1HD53CmrrBJ8KFMmDTDMwvpxM8lLuXtvJV2HQqDWWVG+Gpvxl4sCwXwfYn/RtjtpVj/ozP8cqwp4JlrQDZe9egbsSTgkK5Oxi0lrO4mWPxy2K2HT2EU2uGq78N51sApD2IILvQFEqEcwGfnwLSgh1EjoEfcTwGu7Gv3JalZe0MbDoFrKnzZsIplLuAtvPg0sl0TIspEAyIcG/2bceC7L1pKLtAgpoGplCigun3YxaAU59f8Pqypfy2800wBL0fk0K56xlcu0wru4CTQ8sTnuv9wUCFSaGEQls55s/YhFNDB9JQdiH0ZUQqTAolDGFldwmFQmEXKkwKJQz5/6GFNae7ZI7lAAAAAElFTkSuQmCC" + "" + ] }, "metadata": {}, - "execution_count": 6 + "execution_count": 2 } ], "metadata": { @@ -108,11 +108,20 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "source": [ "from EduNLP.SIF import sif4sci, is_sif, to_sif" ], - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "D:\\MySoftwares\\Anaconda\\envs\\data\\lib\\site-packages\\gensim\\similarities\\__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.\n", + " warnings.warn(msg)\n" + ] + } + ], "metadata": { "collapsed": false, "pycharm": { @@ -129,7 +138,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "source": [ "is_sif(item['stem'])" ], @@ -142,7 +151,7 @@ ] }, "metadata": {}, - "execution_count": 7 + "execution_count": 4 } ], "metadata": {} @@ -156,9 +165,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "source": [ - "text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'\n", + "text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'\r\n", "is_sif(text)" ], "outputs": [ @@ -170,17 +179,17 @@ ] }, "metadata": {}, - "execution_count": 8 + "execution_count": 5 } ], "metadata": {} }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "source": [ - "text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'\n", - "to_sif(text)\n" + "text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'\r\n", + "to_sif(text)\r\n" ], "outputs": [ { @@ -191,7 +200,7 @@ ] }, "metadata": {}, - "execution_count": 9 + "execution_count": 6 } ], "metadata": {} @@ -232,9 +241,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "source": [ - "segments = sif4sci(item[\"stem\"], figures=figures, tokenization=False)\n", + "segments = sif4sci(item[\"stem\"], figures=figures, tokenization=False)\r\n", "segments" ], "outputs": [ @@ -246,7 +255,7 @@ ] }, "metadata": {}, - "execution_count": 12 + "execution_count": 7 } ], "metadata": {} @@ -260,7 +269,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "source": [ "segments.text_segments" ], @@ -283,7 +292,7 @@ ] }, "metadata": {}, - "execution_count": 13 + "execution_count": 8 } ], "metadata": {} @@ -297,9 +306,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 9, "source": [ - "segments.formula_segments\n" + "segments.formula_segments\r\n" ], "outputs": [ { @@ -319,7 +328,7 @@ ] }, "metadata": {}, - "execution_count": 15 + "execution_count": 9 } ], "metadata": {} @@ -333,7 +342,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 10, "source": [ "segments.figure_segments" ], @@ -346,14 +355,14 @@ ] }, "metadata": {}, - "execution_count": 16 + "execution_count": 10 } ], "metadata": {} }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 11, "source": [ "segments.figure_segments[0].figure" ], @@ -361,13 +370,13 @@ { "output_type": "execute_result", "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOYAAACICAYAAADzlJeRAAAe3klEQVR4nO2df1RT9/3/nyE/SWJyBdSUOgs72mKP1XhK24j2CGon4FagbtTMtpZt6rDzR7rjKt+2C+rnHGxdBbr2xB9zgc4j1NMJbpXg2hlsp2DnjrSHVaqcgquWbFqNLUgQkvf3DwgiSSA/7uUm8f04530O3Ny87zu5ed7X+8fr/XoJCCEEFAolrIjhuwEUCsUTKkwKJQyhwqRQwhAR3w2IBJqamuBwOHy+rtFokJKSMo4tokQ7VJgAGhoa0NzcDLvdjn//+99oa2uDw+FAa2srAIBhmFHf73K58O233wIAYmNj8cADDwAAZs+ejblz50Kr1UKn00Emk3H6OSjRg+Bum5Vtbm5Gc3MzrFYrPvnkE7S2toJhGPT09KC3t5f160mlUsTGxqKrqwsMw2D69OlYunQpdDodMjMzWb8eJTqIemHa7XbU19fjT3/6E/72t79BoVDA6XSiq6uL13YxDIOuri48/vjjyM/PR2ZmJpKSknhtEyV8iEph2mw21NbW4tChQzh16hSkUulQVzMckcvliImJgUwmQ3Z2NgwGA7RaLd/NovBIVAmzuroar732Gs6fPw8AuHnzJs8tCg61Wg2FQoFXX30VK1asGHOMS4k+Il6YNpsNf/jDH7Bz504ACGvLGChyuRxOpxOLFy/G+vXr6Zj0LiJihdnU1IQ33ngDdXV1cLlcoy5nRAMMw0AkEmHnzp14/vnn+W4OhWMiTpjNzc0oLCxEa2sr7HY7380Zd5RKJWQyGRVolBMxwrTZbFi/fj0++OAD3Lhxg+/m8A4VaHQT9sK02+347W9/i8rKyqgaP7KFUqmESqXCn//8Z+h0Or6bQ2GJsPaV3bdvH5KTk7Fv3z4qSh90dXXh66+/RlZWFjZs2HBXdu+jEhKGdHZ2Ep1OR9RqNQFAi59FJpORSZMmEYvFwuLdqCNlZRdYrI/iD2FnMfft24dZs2ahqamJjiUDxOFw4MqVK9Dr9XjqqadYsJ5tKJ+fjUNsNI4SGHw/Gdxcv36d5OXlUSvJUhGLxWTSpEmksbEx6HtyoSyNACBr6li80RS/CAuL2dDQgPvvvx/vv/8+tZIs0dfXhytXruBHP/oRSkpKAq+grRw78QrK0thvG2VseJ+VLS0txbZt2+ikBYeoVCrMmzcP1dXVfrr3WbB2/nlsPnk/dgr+Dw9eOImN07lrX0NDA4Db+17/+c9/4uuvv/Z67uzZs5GcnAwASEpKuqNEFXya65/97GeEYRjeu313QxGLxSQ5OZmcO3duzPtSt2YNqSOEkAtlJA2Df7NAT08PsVqtxGg0Ep1OR2JjYwkAwjAMYRiGiESigD6TUqkkDMMQpVJJABCtVkuys7OJ0WgMqQsfDvAizOvXr5NHH3106MbQMn4lLi6OHDlyxOe9cY8rb5fQhHnu3Dny8ssvk+TkZCISicbtQcwwDImNjSUZGRnEZDKR9vb2ED7F+DPuwmxvbycpKSlEIpHw/iO9W8uUKVPIO++843lzLpSRNcOXRurWEKSVkUAXSxobG0lhYSFJSEggKpUqYEvIdpHL5USpVJLExERiMpnI9evXQ/kJjwvjKsyzZ8+SxMRE3n+YtIBMnDiRvP3227dvzoUykjZChBfK0vwWZk9PDzGbzSQxMTGshydyuZwoFAqSnZ3N8novu4ybMM+ePUs0Gg3vN4aW2yUuLm5InAPH0siAwbxAytKGn+u7O9vZ2Uk2bdpEJkyYMDTWi5TCMAxJSEggZrN5nFTgP+MizPb2dmopw7TExcWRjRs3BnxPOzs7ycqVK4lcLue9qxpqUSqVYSdQzoXZ3t5OkpKSeP/yafFdGIbxW5w9PT3EaDSS+Ph4IhaLeW87m8Ut0JqaGk414Q+cCpOKMnIKwzBk165do97PqqoqotFoiEKh4L29XBa1Wk10Oh2vM7mcCfP69eskJSWF9y+ZFv/LlClTvC6luDcVqFQq3ts4noVhGGI0GklPTw9XMvEJJ54/DocDjzzyCFpaWtiuOuqRy+WQSCQecW6Tk5OhVqvvODcnJwcA8NFHH+G///0vrl27ht7eXvT19QUdnnPKlCloaGgYiixfX1+PZ555Bt98802QnyiyUSgUiI+Px5EjR8Y1ciEnwnzqqafw/vvvo6+vj+2qowaGYYYiuLtFl5OTM5RuQavVshodr6GhAXa7Hc3Nzaivr4fNZsNXX30FAIiJiUF/f//QuSqVCrt27cJf/vIXfPTRR9RdEkB8fDxKSkqwevXqcbke68Kkvq+eiEQiKJVKdHV1ITU1FUuXLkV6enpY5Dxxi7WjowN//etfceLECXz33XdwuVx3iJUyEFZ00aJFOHjwIPfpLtjsFzc2NpKEhATexwZ8F7frmUgkIjqdjpSUlESU7+bhw4eJRqMharWayOVyIpVKSUxMDO/fazgUsVhMZsyY4ZfPcSiwJszOzk4ybdo03r84vopMJiNKpZJotdqIE+JwTCYTSU9PJ+3t7aSqqoqsWLGCnDt3jphMJpKZmUkkEknULZMEU6ZMmULOnj3L2X1gRZg9PT0kNTWV9y+Lj6JWq0liYiLZvn17xDlKD6enp4esWLGC/PKXvyQ9PT3k448/Jh9//DHZtGkTKS0tveM8s9k8tDtEKBTyfg/4KhqNhrMHMCvCNBqNUb+2NVKMKpWKPPfcc5w+NceLc+fOEa1WS6qqqoYE6S49PT0kMzOTWK1Wj/e1t7eT7du3k8TERCKXy3m/L3yUhISEUXfrBEvIwmxvbw9rp2U2i1KpJNnZ2WHhGcIWVVVVRKfTkXPnznmI0l06OzuJVqslnZ2dPus5e/Ysee6554hSqSRSqZT3ezWexdf6byiEPCs7b948NDU1hVJFWCMSiSCXy5Gbm4utW7dGzU55h8OBoqIi2Gw2mEymMdecRSIRioqKYLFYRp2RdDgceOutt1BSUoLu7m5Oco6GI1OmTMHevXvx5JNPslJfSMLct28fNm/eHJVxemQyGWQyGX7yk59g27Zt0Gg0fDeJNWw2G/R6PXJycpCamur3+86cOYOLFy+itLR0zHMdDgcqKirw6quvoru7Gz09PaE0OSKIj4/Hhx9+yIojQtDCtNlsmDt3Lmw2W8iNCCcUCgViYmKwbt06bNmyJepS4NXX18NgMMBsNge1Trl//34sXLgwoLQMFRUV2Lx5M3p6etDd3R3wNSOJpKQkWK3WkHtWQUfJW7duXVS5aYnFYjAMg+LiYvzvf//Djh07ok6UxcXFKC8vR2NjY9DOAyaTCZWVlQENX55//nlcuXIFBw4cQHJyMmJjY4O6diTQ0dGBH/zgByFnnwvKYjY3NyM9PT1qurAMw2DZsmV46623ok6MwIB3T15eHhYuXIglS5aEXN/UqVORl5cHq9Ua1Pf1u9/9Djt27IDdbofT6Qy5PeGGRCKBVqvF6dOng64jKGEuWrQIVqs16IuGCxMmTMC9994Ls9kctQl5mpqaUFBQAJPJBJFIxFq9/f392Lp1a9C/A5vNhnXr1uH48eNR84AfjkqlwpYtW1BUVBTU+wMWZjRYS7FYDIVCgW3btmH9+vV8N4czysrKcOTIEVRVVaGtrY31+v/xj3/gxo0bwQWUHqShoQErV67E9evXo26CKJTJoIDHmC+++GJEi5JhGOTn56O9vT1qRWm326HX63Hx4kVYLBZORAkACxYsQGtrK2pra4OuIz09HZcvX8ZvfvMbxMfHs9e4MOCbb75BTk5OUOPNgCxmJFtLsViMyZMn47333ovabisAtLa2oqCgABs3bsTUqVM5v96sWbOQlZUFs9kc8k4Zm82GvLw8fPbZZ7h58yZLLeQXsViM/Px8HDhwIKD3BWQxI9VaMgyDRYsWoaWlJapFWVFRAb1eD7PZPC6iBICWlhaYzWbo9fqQZyI1Gg0aGxuxefPmqJmE6+vrw9GjR1FfXx/Q+/y2mE1NTcjKyoq4fZZxcXF45ZVXYDAY+G4KZzgcDhgMBjgcDphMJpw5c2bc23D16lVUVlaipqaGlfqamprw5JNP4sqVK6zUxzfJycn48ssv/T7fb4tpMpkiSpRisRjTpk3DyZMno1qUHR0dyMjIwJw5c/Dzn/+cF1ECQEJCAlJSUrBjxw5W6tPpdDh//jx0Oh3kcjkrdfLJ1atX8fvf/97v8/0SpsPhwKFDkZO+dOLEifjhD3+IL774gvcIAVxSW1uLvLw8mEwmzJo1i+/mYNmyZTh27NhQ9q5QYRgmarq23333HYqLi/3u7vslzOrqalbXwLgkISEB5eXlOHz4MPfhH3ikqKgIlZWVsFqtQQfe4oKamhoYDAZ0dHSwVmdxcTEsFgsSExNZq5MPent7/e5R+DXGnDlzJlpbW0NuGNdoNBpUVVUhPT2d76ZwhtsBfenSpViwYAHfzfGKvztRAqWjowNZWVkR8Vv0hUqlwhdffDHmpogxLWZra6vPJKLhRGJiIiwWS1SL0j0BZzQaw1aUwIBX0KpVq1BYWMhqvUlJSWhsbERqamrE9OBG4nQ6/Vo6GVOYe/bsCfs1pXvvvRcnT54c17if482OHTtgMBhgsVgi4kc5ffp0yGQy7N69m9V6GYbBxx9/jLS0tIh0hu/u7kZJScmYY80xhXngwIGwDWMokUiQkpKClpaWqNnAPBK3A/qNGzewc+dOzrx4uKC0tBTvvvsu6xvpZTIZTpw4gSeeeMIjCHYk4HQ6x1zXHHWM2dHRgYceeiisJhfcyOVyzJkzB3V1dRE/Y+eL5uZmFBQUwGg0IiEhge/mBMXUqVOh1+tRU1PDyWbzTZs2oaKiIuIcX8Za1xzVYtbX18PlcrHeqFCRSCSYM2cOjh8/HrWi3L17NwoLC1FTUxOxogSAS5cuoaSkBAUFBSF7BnmjrKwMRqMREydOZL1uLrl27Rqam5t9vj6qMA8dOhSW48v7778fx48fj8rlEIfDgYKCAnz66aewWq24dOkS300KGZFIhKVLlwa9BWosDAYDnnvuuYjq1nZ3d6OystLn6z67sg6HAxMnTuTkKRcKKSkpaGxsjEpL6XZAX7t2LaZPn853c1jn7bffRk5ODlasWMFJ/Xq9HrW1tWH3m/VFQkKCT5dDnxazqakp7CzStGnTgt41H+7U1tYObWiORlECgNlsxmuvvcbZOmRVVRUeffTRiJi1BgaWlXxNjPkU5ocffhhWkz6JiYk4ceJEVEWrc2MwGFBZWQmLxRJW3znbnDlzBjU1NdDr9Zz5XR87dgyPPPIIxGIxJ/WzSVdXF959912vr/nsyoZT+BCNRgOLxRJ165Tu/YdPP/10QGEkIx22d6KMxG63Y968eRHhIeSrO+vTYv7rX//itEH+kpCQgJqamqgTZUNDAzIyMlBaWnpXiRIYuKdz5sxBcXExJ/UzDAOr1Ypp06ZxUj+bOBwOr37FXoVpt9vDIoK2SqXCiy++GHWbm4uLi7F169aQwkhGOkuWLMHp06cD3kDsLxqNBseOHUNcXBwn9bOJt904XoXZ3NzMu7uTWCyGVqvlbIqdD+x2O7KysgAA27dvHzMtQbRTVVWFoqIiVneiDMe9P1SlUnFSPxt0dXXhk08+8TjuU5h8RyybPHkyjhw5wmsb2KSpqQkZGRnYuHEjK7Fdo4GWlhZUVVUhLy+PsyWO1atXY/HixWE9U+ut1+BVmGfPnuW1K5uQkID33nsvapZFdu/ejaKiItTU1ECpVPLdnLDi6tWreOmll1BQUMDZNQ4ePBjWezm/+uorjweTV2F+9tln49Igb0TTuNLhcECv1+PTTz+FxWKJCi8eLpg6dSo0Gg3Kyso4qV8mk4X1eFOpVHq453kVJl8/ILFYjMWLF0fFuLK1tRXz5s1DTk4OVq5cyVssnkihpKSE1bAkIwnn8abL5fIYZ/ucleWD+Ph4HDx4kJdrs0l1dTX0ej2qqqrGLYxkpHPmzBmYzWYYDAbOMsi5x5tCoZCT+oOlt7fX4zPfIUzi6se3Xd9BoIoF1ApgHD3yGIaB2WwOOzfAQHA4HCgsLMSRI0fQ2NiIq1ev8t2kiKKtrQ0mk4mVGLW++OMf/xh2Xdre3l6cP3/+jmN3CFMQI8K1K/+DTCwGbnYD4+QLLBaL8dhjjyEzM3N8LsgB7ng0c+bMwQsvvEC7rkHS39+PnJwczoYzDMPgjTfeCLsu7UgvJY+ubMfFi4iJCTptZlCoVCpUVFSM6zXZpL6+Hnl5eSgpKQmLMJKRTmpqKux2O2e/iWeffRbf//73Oak7WDo7O+/432Nxx27/dvAvMaCWDPzpugV81wfIZIBUCPQOWlOxGJAPntM7zMIOPw4ncGOY6R3x2gSXGEaj0dM5nThxy+kCAQBBDCRCIQQjj0MAsUiEGBD09/fDCcHA+MHZDycAxIggFbhunx8jgjRGENAXNhbFxcU4ffo0rFbrXe8wwCYmkwlZWVlISUnhZIbebDZj8eLFuHbtGut1B8PIYY+HabTbrw9GLegDbt4C4BwQJQA4nED/rUEBigfGoDe6gRu3AKkMEA8elwsHusI3uoF+IaBwe/p7vjbpnslesm4R9DsJYoRiSEUiCOGCk4w8LoY4hqDPRQAIIBLGQAACp9OJGJEYUmEMBK5+9LoA8dD/TrAVj8FutyMjIwMA8PLLL1NRsox7MqiwsJCTyUitVouVK1eG7S6U0fusfX1AL24LSyYEnH23/46RDEwSqSUAhIBw8LjLCQyehlu3AJHw9nuGvaaWSlFd9a6nWIgTzhghRAJgQHTigb+JE06BAMJBoxcjGCk2AYRC0bAPJYAwZtDSDuIKOE2vJ01NTZg3bx6MRiP14uGQS5cuobS0FHl5eZzU//rrr2PChAmc1B0qvoXpFpbDOSAsMQCh884Jof5bgxZzsLhfixllOnrwNZFIhPzlyzFn9hzv5xF3d3XkceL9+DhRVlYGg8EAq9Ua1m5e0QKXYUlkMhn2798flt5YQ8J0ufrQ6yIDyxXEOeyUQaspV9y2lsCgYCXDllQGu7YOJwDh7eNCIdDvvP2ewddUKhXeLC8FiRF6eToIICDu7isAkAFLJxBCCIL+IbNHfLyffdxhJC9evBhxYSQjHTYS5PoiNzc3LCfshn7TAggAVz/iJ09GTP+ICRKHE8AIa4k+oNcJSBWD3Vnh4OuDY1P3cbET6O67/Z6bt6BQxWHz/9sCgUQGydBkDEF/fx/6BwUoEQrgdPaht78Pvf1OYKhbGwO4+geOuwSD7yfod7pAQOB09sM15v+B0dzcjKysLKxatQrLly8P8N0UNuAyLInJZAo7q+kRwaChoQF5eXkjBtxiQNbH2rpmXFwcLl++HBHOBBUVFdizZw/MZjN1GOCZhIQE6PV6NDY2sv7b4Ttix8hIBv71AmVCwDn2af6gUChQVFQU9qJ0e/GcOHECVquVijIMuHr1KoxGI/R6Pet179q1i1erOdJ100OYdwhGMdgdheP2ZFCISKVS/OpXv2KnMo4Il2SwFE/YTpDrRqvVYtKkSazWGQgjDZWHMDUaze1wF90jZltDJBKsZbglg6V4wnaCXDe7du3iLXv1mBYzKSmJsxCK4W4twzUZLMUTLhLk5ubmYsqUKazVFwgjrbXXMSYXDr7hbC1tNhvmzZsHtVqNX//619SLJwJoaWmByWRiPScKH1ZTLpdj9uzZdxzzKsz4+HjWLy4UCvHMM8+wXm+oNDQ0ICsrC6WlpWGdDJbiCRcJcnNzczF58mTW6vMHiUTiEZ7VqzC5MOcPP/xw2EVR37FjB7Zu3QqLxXLXhpGMdLhIkPvSSy+Nq1dXT08PUlJS7jjmVZhs77pXq9XYsGEDq3WGwvBksNu3b6dePBEO2wlyc3NzIZFIxj6RJaRSqUfgOa/CfOCBB1h9YgiFwrDZBN3c3IyMjAysWrUKy5Yt47s5FBZgOyyJRqPBY489xkLL/OPBBx/0OOZVmAsWLGBtsVUkEmH58uVhMekTLclgKZ6wnSB3w4YNkEqlLLRsdEQiERYvXuxx3KswdTodawGfFQoF1q1bx0pdwRKNyWApnrC5EyUzM3NcjIlSqfS6ddCrMBmGYW1m9p577uE1IZA7jOTChQtpGMm7gNTUVNhsNlRXV4dUj0wmQ05ODkut8o3D4fAaocGnrywbYpLL5di4cWPI9QRLdXU1CgoKUFVVFbXJYCmesLUTpbCwkHOrqdVqvV7DpzCfeOKJkPvYTqeTs7Teo+FwOGAwGHDkyBFYLBbqgH6XwVaCXJ1Ox2mEA6lUiqefftrraz6FqdPpQs74NXPmzHHPP2Kz2ZCVlYX77rsPL7zwAvXiuUu5dOkSjEZjyDlR1qxZw1KLPJFKpT5XK0btyjqdwe/1EolE474c4U4GW1JSctclg6V4wkaC3CVLlnAWsCs+Pt7DscCNT2HKZDI8/vjjQV/U12wTV9BksBRvhJogV6fTcZJSYazVilE3Sj/77LNBO7Q7nc5xydhFk8FSxiKUBLkymQxz585lv1EAfvGLX/h8bVRhZmZmBt2dTU1N5XxGyx1GkiaDpYxGqAly8/PzIRCwGyh84cKFo86/jCpMhmHw0EMPBXxRqVSKJ598MuD3BUJZWRmKiopgtVrDLpASJfwIJUFueno6q+NMhmG8BDm/kzFj/qxatSrg/WmjzTaFit1uh16vx8WLF2GxWKgDOsVvgk2Qy7aDzMSJE8fUx5jCzM3NDfjCEonE52xTKLS2tiIrKws5OTlYvnw59eKhBEywCXLZGmeq1Wrs2rVrzPPGFGYwnvaPPvpoQOf7gzsZrNlspslgKUET7E6UH//4x6xcPy4uzi9j51f4yg0bNkCtVvt1YZFIFNIyy0hoMlgK2wSTIHfJkiUhp6f011oCXgI+++Lee+/F119/7dfFKyoqguoCj6SjowMFBQV4+umnacQ6CuucOXMGFy9eRGlp6ZjnOhwOyOVy+CkXryQnJ+PLL7/061y/HwEGgwEKhWLM8wghrIwvaTJYCtcEkiBXJpOFtPynVqtx+PBhv8/3W5ijLYYO5+bNmyELs6ioCOXl5bBardSLh8IpJpMJlZWVfoUlCTZmlUgkQn5+fkCzu34Lk2EY/PSnPx0z5Mj3vvc9vy8+EpvNhoyMDKjVapoMljIuBJIgN9iZWZVKhTfffDOg9wQ0ml23bt2Y3dmZM2cG1AA3TU1NyMrKgtFopGEkKeOKvwlytVptwB5AarUa+/fvD7gbHJAwtVot8vPzfVrNYGdk3clgLRYLTQZL4QV/wpI88sgjAdUpFouxaNGioCZCA57/3bZtm09PIIVCEdD4kiaDpYQTYyXITUlJCchi3nPPPTh48GBQbQlYmBqNBqtWrfJqmgOZkaXJYCnhyGhhSZKSkvxeLomLi8OxY8eCnskNasX09ddf92o1XS6XXw2pqKhAYWEhzGYzDSNJCStaWlpgNpt9Oh/4EwharVZjx44dIa1OBCVMmUyGoqIij4kgh8Mx6pSyO4wkTQZLCWdGS5A7Vhws97hy9erVIbXBb8+fkTgcDsyePRsXLly447iv6lpbW1FQUIC1a9fSiHWUiODo0aNQq9XYsmXL0LH4+Hhcu3bN6/kSiQQPP/wwjh8/HvJe5KCd/2QyGd555507Nnv6inZQW1uLgoICmEwmKkpKxOAtQe5oET1mz57NiiiBEIQJDMRDefbZZ4ca4q3/bTAYUFlZCYvFQpPBUiKOkQlyfW2YTklJwQcffMBa1I7Q3OUxMBHk9vYZvh3LnQz2vvvuo8lgKRHLyAS53nZZpaSkoLGxkdVQrSELUyaT4dChQ3eY+OHJYGkYSUqkMzxB7vAJT3dAALZFCbAgTGDAIygtLQ0zZsygyWApUYk7Qa5UKoVQKERsbCy0Wi0nogQCEKZlrQACwYgyvxxuX50lS5bAarWitbUVf//736kXDyXqKC0txX/+8x/I5XLk5OTg9OnTnGUaCGi5pK18PmZ8/grIniwAFqwVZGPvmjqQPVk4cOAAtmzZgsuXL7PeSK1Wy8oXkJ6ePvR3RUVFUHFGKRSRSIS+vj5OrxGAMNtQPn8GPn+FYM9AfGVY1gqQjbpBoVIo0U9HRwc6OjrueMgDACxrIcjeO/TvmjqCPViLtdiDYOTh/xiz7SgOnVqD3MGLtJXPR/beNJRtpqKk3D0kJSWNEGUbyucLIMgG6ggBGSy5tQIIslvw4IzgruO3MNuOHsIp7EX24PhyFSpByElspP4ClLuWgV7kJpThAtmD4SYqa08d1mAW7g9SH34Ksw1HD53CmrrBJ8KFMmDTDMwvpxM8lLuXtvJV2HQqDWWVG+Gpvxl4sCwXwfYn/RtjtpVj/ozP8cqwp4JlrQDZe9egbsSTgkK5Oxi0lrO4mWPxy2K2HT2EU2uGq78N51sApD2IILvQFEqEcwGfnwLSgh1EjoEfcTwGu7Gv3JalZe0MbDoFrKnzZsIplLuAtvPg0sl0TIspEAyIcG/2bceC7L1pKLtAgpoGplCigun3YxaAU59f8Pqypfy2800wBL0fk0K56xlcu0wru4CTQ8sTnuv9wUCFSaGEQls55s/YhFNDB9JQdiH0ZUQqTAolDGFldwmFQmEXKkwKJQz5/6GFNae7ZI7lAAAAAElFTkSuQmCC", "text/plain": [ - "" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOYAAACICAYAAADzlJeRAAAe3klEQVR4nO2df1RT9/3/nyE/SWJyBdSUOgs72mKP1XhK24j2CGon4FagbtTMtpZt6rDzR7rjKt+2C+rnHGxdBbr2xB9zgc4j1NMJbpXg2hlsp2DnjrSHVaqcgquWbFqNLUgQkvf3DwgiSSA/7uUm8f04530O3Ny87zu5ed7X+8fr/XoJCCEEFAolrIjhuwEUCsUTKkwKJQyhwqRQwhAR3w2IBJqamuBwOHy+rtFokJKSMo4tokQ7VJgAGhoa0NzcDLvdjn//+99oa2uDw+FAa2srAIBhmFHf73K58O233wIAYmNj8cADDwAAZs+ejblz50Kr1UKn00Emk3H6OSjRg+Bum5Vtbm5Gc3MzrFYrPvnkE7S2toJhGPT09KC3t5f160mlUsTGxqKrqwsMw2D69OlYunQpdDodMjMzWb8eJTqIemHa7XbU19fjT3/6E/72t79BoVDA6XSiq6uL13YxDIOuri48/vjjyM/PR2ZmJpKSknhtEyV8iEph2mw21NbW4tChQzh16hSkUulQVzMckcvliImJgUwmQ3Z2NgwGA7RaLd/NovBIVAmzuroar732Gs6fPw8AuHnzJs8tCg61Wg2FQoFXX30VK1asGHOMS4k+Il6YNpsNf/jDH7Bz504ACGvLGChyuRxOpxOLFy/G+vXr6Zj0LiJihdnU1IQ33ngDdXV1cLlcoy5nRAMMw0AkEmHnzp14/vnn+W4OhWMiTpjNzc0oLCxEa2sr7HY7380Zd5RKJWQyGRVolBMxwrTZbFi/fj0++OAD3Lhxg+/m8A4VaHQT9sK02+347W9/i8rKyqgaP7KFUqmESqXCn//8Z+h0Or6bQ2GJsPaV3bdvH5KTk7Fv3z4qSh90dXXh66+/RlZWFjZs2HBXdu+jEhKGdHZ2Ep1OR9RqNQFAi59FJpORSZMmEYvFwuLdqCNlZRdYrI/iD2FnMfft24dZs2ahqamJjiUDxOFw4MqVK9Dr9XjqqadYsJ5tKJ+fjUNsNI4SGHw/Gdxcv36d5OXlUSvJUhGLxWTSpEmksbEx6HtyoSyNACBr6li80RS/CAuL2dDQgPvvvx/vv/8+tZIs0dfXhytXruBHP/oRSkpKAq+grRw78QrK0thvG2VseJ+VLS0txbZt2+ikBYeoVCrMmzcP1dXVfrr3WbB2/nlsPnk/dgr+Dw9eOImN07lrX0NDA4Db+17/+c9/4uuvv/Z67uzZs5GcnAwASEpKuqNEFXya65/97GeEYRjeu313QxGLxSQ5OZmcO3duzPtSt2YNqSOEkAtlJA2Df7NAT08PsVqtxGg0Ep1OR2JjYwkAwjAMYRiGiESigD6TUqkkDMMQpVJJABCtVkuys7OJ0WgMqQsfDvAizOvXr5NHH3106MbQMn4lLi6OHDlyxOe9cY8rb5fQhHnu3Dny8ssvk+TkZCISicbtQcwwDImNjSUZGRnEZDKR9vb2ED7F+DPuwmxvbycpKSlEIpHw/iO9W8uUKVPIO++843lzLpSRNcOXRurWEKSVkUAXSxobG0lhYSFJSEggKpUqYEvIdpHL5USpVJLExERiMpnI9evXQ/kJjwvjKsyzZ8+SxMRE3n+YtIBMnDiRvP3227dvzoUykjZChBfK0vwWZk9PDzGbzSQxMTGshydyuZwoFAqSnZ3N8novu4ybMM+ePUs0Gg3vN4aW2yUuLm5InAPH0siAwbxAytKGn+u7O9vZ2Uk2bdpEJkyYMDTWi5TCMAxJSEggZrN5nFTgP+MizPb2dmopw7TExcWRjRs3BnxPOzs7ycqVK4lcLue9qxpqUSqVYSdQzoXZ3t5OkpKSeP/yafFdGIbxW5w9PT3EaDSS+Ph4IhaLeW87m8Ut0JqaGk414Q+cCpOKMnIKwzBk165do97PqqoqotFoiEKh4L29XBa1Wk10Oh2vM7mcCfP69eskJSWF9y+ZFv/LlClTvC6luDcVqFQq3ts4noVhGGI0GklPTw9XMvEJJ54/DocDjzzyCFpaWtiuOuqRy+WQSCQecW6Tk5OhVqvvODcnJwcA8NFHH+G///0vrl27ht7eXvT19QUdnnPKlCloaGgYiixfX1+PZ555Bt98802QnyiyUSgUiI+Px5EjR8Y1ciEnwnzqqafw/vvvo6+vj+2qowaGYYYiuLtFl5OTM5RuQavVshodr6GhAXa7Hc3Nzaivr4fNZsNXX30FAIiJiUF/f//QuSqVCrt27cJf/vIXfPTRR9RdEkB8fDxKSkqwevXqcbke68Kkvq+eiEQiKJVKdHV1ITU1FUuXLkV6enpY5Dxxi7WjowN//etfceLECXz33XdwuVx3iJUyEFZ00aJFOHjwIPfpLtjsFzc2NpKEhATexwZ8F7frmUgkIjqdjpSUlESU7+bhw4eJRqMharWayOVyIpVKSUxMDO/fazgUsVhMZsyY4ZfPcSiwJszOzk4ybdo03r84vopMJiNKpZJotdqIE+JwTCYTSU9PJ+3t7aSqqoqsWLGCnDt3jphMJpKZmUkkEknULZMEU6ZMmULOnj3L2X1gRZg9PT0kNTWV9y+Lj6JWq0liYiLZvn17xDlKD6enp4esWLGC/PKXvyQ9PT3k448/Jh9//DHZtGkTKS0tveM8s9k8tDtEKBTyfg/4KhqNhrMHMCvCNBqNUb+2NVKMKpWKPPfcc5w+NceLc+fOEa1WS6qqqoYE6S49PT0kMzOTWK1Wj/e1t7eT7du3k8TERCKXy3m/L3yUhISEUXfrBEvIwmxvbw9rp2U2i1KpJNnZ2WHhGcIWVVVVRKfTkXPnznmI0l06OzuJVqslnZ2dPus5e/Ysee6554hSqSRSqZT3ezWexdf6byiEPCs7b948NDU1hVJFWCMSiSCXy5Gbm4utW7dGzU55h8OBoqIi2Gw2mEymMdecRSIRioqKYLFYRp2RdDgceOutt1BSUoLu7m5Oco6GI1OmTMHevXvx5JNPslJfSMLct28fNm/eHJVxemQyGWQyGX7yk59g27Zt0Gg0fDeJNWw2G/R6PXJycpCamur3+86cOYOLFy+itLR0zHMdDgcqKirw6quvoru7Gz09PaE0OSKIj4/Hhx9+yIojQtDCtNlsmDt3Lmw2W8iNCCcUCgViYmKwbt06bNmyJepS4NXX18NgMMBsNge1Trl//34sXLgwoLQMFRUV2Lx5M3p6etDd3R3wNSOJpKQkWK3WkHtWQUfJW7duXVS5aYnFYjAMg+LiYvzvf//Djh07ok6UxcXFKC8vR2NjY9DOAyaTCZWVlQENX55//nlcuXIFBw4cQHJyMmJjY4O6diTQ0dGBH/zgByFnnwvKYjY3NyM9PT1qurAMw2DZsmV46623ok6MwIB3T15eHhYuXIglS5aEXN/UqVORl5cHq9Ua1Pf1u9/9Djt27IDdbofT6Qy5PeGGRCKBVqvF6dOng64jKGEuWrQIVqs16IuGCxMmTMC9994Ls9kctQl5mpqaUFBQAJPJBJFIxFq9/f392Lp1a9C/A5vNhnXr1uH48eNR84AfjkqlwpYtW1BUVBTU+wMWZjRYS7FYDIVCgW3btmH9+vV8N4czysrKcOTIEVRVVaGtrY31+v/xj3/gxo0bwQWUHqShoQErV67E9evXo26CKJTJoIDHmC+++GJEi5JhGOTn56O9vT1qRWm326HX63Hx4kVYLBZORAkACxYsQGtrK2pra4OuIz09HZcvX8ZvfvMbxMfHs9e4MOCbb75BTk5OUOPNgCxmJFtLsViMyZMn47333ovabisAtLa2oqCgABs3bsTUqVM5v96sWbOQlZUFs9kc8k4Zm82GvLw8fPbZZ7h58yZLLeQXsViM/Px8HDhwIKD3BWQxI9VaMgyDRYsWoaWlJapFWVFRAb1eD7PZPC6iBICWlhaYzWbo9fqQZyI1Gg0aGxuxefPmqJmE6+vrw9GjR1FfXx/Q+/y2mE1NTcjKyoq4fZZxcXF45ZVXYDAY+G4KZzgcDhgMBjgcDphMJpw5c2bc23D16lVUVlaipqaGlfqamprw5JNP4sqVK6zUxzfJycn48ssv/T7fb4tpMpkiSpRisRjTpk3DyZMno1qUHR0dyMjIwJw5c/Dzn/+cF1ECQEJCAlJSUrBjxw5W6tPpdDh//jx0Oh3kcjkrdfLJ1atX8fvf/97v8/0SpsPhwKFDkZO+dOLEifjhD3+IL774gvcIAVxSW1uLvLw8mEwmzJo1i+/mYNmyZTh27NhQ9q5QYRgmarq23333HYqLi/3u7vslzOrqalbXwLgkISEB5eXlOHz4MPfhH3ikqKgIlZWVsFqtQQfe4oKamhoYDAZ0dHSwVmdxcTEsFgsSExNZq5MPent7/e5R+DXGnDlzJlpbW0NuGNdoNBpUVVUhPT2d76ZwhtsBfenSpViwYAHfzfGKvztRAqWjowNZWVkR8Vv0hUqlwhdffDHmpogxLWZra6vPJKLhRGJiIiwWS1SL0j0BZzQaw1aUwIBX0KpVq1BYWMhqvUlJSWhsbERqamrE9OBG4nQ6/Vo6GVOYe/bsCfs1pXvvvRcnT54c17if482OHTtgMBhgsVgi4kc5ffp0yGQy7N69m9V6GYbBxx9/jLS0tIh0hu/u7kZJScmYY80xhXngwIGwDWMokUiQkpKClpaWqNnAPBK3A/qNGzewc+dOzrx4uKC0tBTvvvsu6xvpZTIZTpw4gSeeeMIjCHYk4HQ6x1zXHHWM2dHRgYceeiisJhfcyOVyzJkzB3V1dRE/Y+eL5uZmFBQUwGg0IiEhge/mBMXUqVOh1+tRU1PDyWbzTZs2oaKiIuIcX8Za1xzVYtbX18PlcrHeqFCRSCSYM2cOjh8/HrWi3L17NwoLC1FTUxOxogSAS5cuoaSkBAUFBSF7BnmjrKwMRqMREydOZL1uLrl27Rqam5t9vj6qMA8dOhSW48v7778fx48fj8rlEIfDgYKCAnz66aewWq24dOkS300KGZFIhKVLlwa9BWosDAYDnnvuuYjq1nZ3d6OystLn6z67sg6HAxMnTuTkKRcKKSkpaGxsjEpL6XZAX7t2LaZPn853c1jn7bffRk5ODlasWMFJ/Xq9HrW1tWH3m/VFQkKCT5dDnxazqakp7CzStGnTgt41H+7U1tYObWiORlECgNlsxmuvvcbZOmRVVRUeffTRiJi1BgaWlXxNjPkU5ocffhhWkz6JiYk4ceJEVEWrc2MwGFBZWQmLxRJW3znbnDlzBjU1NdDr9Zz5XR87dgyPPPIIxGIxJ/WzSVdXF959912vr/nsyoZT+BCNRgOLxRJ165Tu/YdPP/10QGEkIx22d6KMxG63Y968eRHhIeSrO+vTYv7rX//itEH+kpCQgJqamqgTZUNDAzIyMlBaWnpXiRIYuKdz5sxBcXExJ/UzDAOr1Ypp06ZxUj+bOBwOr37FXoVpt9vDIoK2SqXCiy++GHWbm4uLi7F169aQwkhGOkuWLMHp06cD3kDsLxqNBseOHUNcXBwn9bOJt904XoXZ3NzMu7uTWCyGVqvlbIqdD+x2O7KysgAA27dvHzMtQbRTVVWFoqIiVneiDMe9P1SlUnFSPxt0dXXhk08+8TjuU5h8RyybPHkyjhw5wmsb2KSpqQkZGRnYuHEjK7Fdo4GWlhZUVVUhLy+PsyWO1atXY/HixWE9U+ut1+BVmGfPnuW1K5uQkID33nsvapZFdu/ejaKiItTU1ECpVPLdnLDi6tWreOmll1BQUMDZNQ4ePBjWezm/+uorjweTV2F+9tln49Igb0TTuNLhcECv1+PTTz+FxWKJCi8eLpg6dSo0Gg3Kyso4qV8mk4X1eFOpVHq453kVJl8/ILFYjMWLF0fFuLK1tRXz5s1DTk4OVq5cyVssnkihpKSE1bAkIwnn8abL5fIYZ/ucleWD+Ph4HDx4kJdrs0l1dTX0ej2qqqrGLYxkpHPmzBmYzWYYDAbOMsi5x5tCoZCT+oOlt7fX4zPfIUzi6se3Xd9BoIoF1ApgHD3yGIaB2WwOOzfAQHA4HCgsLMSRI0fQ2NiIq1ev8t2kiKKtrQ0mk4mVGLW++OMf/xh2Xdre3l6cP3/+jmN3CFMQI8K1K/+DTCwGbnYD4+QLLBaL8dhjjyEzM3N8LsgB7ng0c+bMwQsvvEC7rkHS39+PnJwczoYzDMPgjTfeCLsu7UgvJY+ubMfFi4iJCTptZlCoVCpUVFSM6zXZpL6+Hnl5eSgpKQmLMJKRTmpqKux2O2e/iWeffRbf//73Oak7WDo7O+/432Nxx27/dvAvMaCWDPzpugV81wfIZIBUCPQOWlOxGJAPntM7zMIOPw4ncGOY6R3x2gSXGEaj0dM5nThxy+kCAQBBDCRCIQQjj0MAsUiEGBD09/fDCcHA+MHZDycAxIggFbhunx8jgjRGENAXNhbFxcU4ffo0rFbrXe8wwCYmkwlZWVlISUnhZIbebDZj8eLFuHbtGut1B8PIYY+HabTbrw9GLegDbt4C4BwQJQA4nED/rUEBigfGoDe6gRu3AKkMEA8elwsHusI3uoF+IaBwe/p7vjbpnslesm4R9DsJYoRiSEUiCOGCk4w8LoY4hqDPRQAIIBLGQAACp9OJGJEYUmEMBK5+9LoA8dD/TrAVj8FutyMjIwMA8PLLL1NRsox7MqiwsJCTyUitVouVK1eG7S6U0fusfX1AL24LSyYEnH23/46RDEwSqSUAhIBw8LjLCQyehlu3AJHw9nuGvaaWSlFd9a6nWIgTzhghRAJgQHTigb+JE06BAMJBoxcjGCk2AYRC0bAPJYAwZtDSDuIKOE2vJ01NTZg3bx6MRiP14uGQS5cuobS0FHl5eZzU//rrr2PChAmc1B0qvoXpFpbDOSAsMQCh884Jof5bgxZzsLhfixllOnrwNZFIhPzlyzFn9hzv5xF3d3XkceL9+DhRVlYGg8EAq9Ua1m5e0QKXYUlkMhn2798flt5YQ8J0ufrQ6yIDyxXEOeyUQaspV9y2lsCgYCXDllQGu7YOJwDh7eNCIdDvvP2ewddUKhXeLC8FiRF6eToIICDu7isAkAFLJxBCCIL+IbNHfLyffdxhJC9evBhxYSQjHTYS5PoiNzc3LCfshn7TAggAVz/iJ09GTP+ICRKHE8AIa4k+oNcJSBWD3Vnh4OuDY1P3cbET6O67/Z6bt6BQxWHz/9sCgUQGydBkDEF/fx/6BwUoEQrgdPaht78Pvf1OYKhbGwO4+geOuwSD7yfod7pAQOB09sM15v+B0dzcjKysLKxatQrLly8P8N0UNuAyLInJZAo7q+kRwaChoQF5eXkjBtxiQNbH2rpmXFwcLl++HBHOBBUVFdizZw/MZjN1GOCZhIQE6PV6NDY2sv7b4Ttix8hIBv71AmVCwDn2af6gUChQVFQU9qJ0e/GcOHECVquVijIMuHr1KoxGI/R6Pet179q1i1erOdJ100OYdwhGMdgdheP2ZFCISKVS/OpXv2KnMo4Il2SwFE/YTpDrRqvVYtKkSazWGQgjDZWHMDUaze1wF90jZltDJBKsZbglg6V4wnaCXDe7du3iLXv1mBYzKSmJsxCK4W4twzUZLMUTLhLk5ubmYsqUKazVFwgjrbXXMSYXDr7hbC1tNhvmzZsHtVqNX//619SLJwJoaWmByWRiPScKH1ZTLpdj9uzZdxzzKsz4+HjWLy4UCvHMM8+wXm+oNDQ0ICsrC6WlpWGdDJbiCRcJcnNzczF58mTW6vMHiUTiEZ7VqzC5MOcPP/xw2EVR37FjB7Zu3QqLxXLXhpGMdLhIkPvSSy+Nq1dXT08PUlJS7jjmVZhs77pXq9XYsGEDq3WGwvBksNu3b6dePBEO2wlyc3NzIZFIxj6RJaRSqUfgOa/CfOCBB1h9YgiFwrDZBN3c3IyMjAysWrUKy5Yt47s5FBZgOyyJRqPBY489xkLL/OPBBx/0OOZVmAsWLGBtsVUkEmH58uVhMekTLclgKZ6wnSB3w4YNkEqlLLRsdEQiERYvXuxx3KswdTodawGfFQoF1q1bx0pdwRKNyWApnrC5EyUzM3NcjIlSqfS6ddCrMBmGYW1m9p577uE1IZA7jOTChQtpGMm7gNTUVNhsNlRXV4dUj0wmQ05ODkut8o3D4fAaocGnrywbYpLL5di4cWPI9QRLdXU1CgoKUFVVFbXJYCmesLUTpbCwkHOrqdVqvV7DpzCfeOKJkPvYTqeTs7Teo+FwOGAwGHDkyBFYLBbqgH6XwVaCXJ1Ox2mEA6lUiqefftrraz6FqdPpQs74NXPmzHHPP2Kz2ZCVlYX77rsPL7zwAvXiuUu5dOkSjEZjyDlR1qxZw1KLPJFKpT5XK0btyjqdwe/1EolE474c4U4GW1JSctclg6V4wkaC3CVLlnAWsCs+Pt7DscCNT2HKZDI8/vjjQV/U12wTV9BksBRvhJogV6fTcZJSYazVilE3Sj/77LNBO7Q7nc5xydhFk8FSxiKUBLkymQxz585lv1EAfvGLX/h8bVRhZmZmBt2dTU1N5XxGyx1GkiaDpYxGqAly8/PzIRCwGyh84cKFo86/jCpMhmHw0EMPBXxRqVSKJ598MuD3BUJZWRmKiopgtVrDLpASJfwIJUFueno6q+NMhmG8BDm/kzFj/qxatSrg/WmjzTaFit1uh16vx8WLF2GxWKgDOsVvgk2Qy7aDzMSJE8fUx5jCzM3NDfjCEonE52xTKLS2tiIrKws5OTlYvnw59eKhBEywCXLZGmeq1Wrs2rVrzPPGFGYwnvaPPvpoQOf7gzsZrNlspslgKUET7E6UH//4x6xcPy4uzi9j51f4yg0bNkCtVvt1YZFIFNIyy0hoMlgK2wSTIHfJkiUhp6f011oCXgI+++Lee+/F119/7dfFKyoqguoCj6SjowMFBQV4+umnacQ6CuucOXMGFy9eRGlp6ZjnOhwOyOVy+CkXryQnJ+PLL7/061y/HwEGgwEKhWLM8wghrIwvaTJYCtcEkiBXJpOFtPynVqtx+PBhv8/3W5ijLYYO5+bNmyELs6ioCOXl5bBardSLh8IpJpMJlZWVfoUlCTZmlUgkQn5+fkCzu34Lk2EY/PSnPx0z5Mj3vvc9vy8+EpvNhoyMDKjVapoMljIuBJIgN9iZWZVKhTfffDOg9wQ0ml23bt2Y3dmZM2cG1AA3TU1NyMrKgtFopGEkKeOKvwlytVptwB5AarUa+/fvD7gbHJAwtVot8vPzfVrNYGdk3clgLRYLTQZL4QV/wpI88sgjAdUpFouxaNGioCZCA57/3bZtm09PIIVCEdD4kiaDpYQTYyXITUlJCchi3nPPPTh48GBQbQlYmBqNBqtWrfJqmgOZkaXJYCnhyGhhSZKSkvxeLomLi8OxY8eCnskNasX09ddf92o1XS6XXw2pqKhAYWEhzGYzDSNJCStaWlpgNpt9Oh/4EwharVZjx44dIa1OBCVMmUyGoqIij4kgh8Mx6pSyO4wkTQZLCWdGS5A7Vhws97hy9erVIbXBb8+fkTgcDsyePRsXLly447iv6lpbW1FQUIC1a9fSiHWUiODo0aNQq9XYsmXL0LH4+Hhcu3bN6/kSiQQPP/wwjh8/HvJe5KCd/2QyGd555507Nnv6inZQW1uLgoICmEwmKkpKxOAtQe5oET1mz57NiiiBEIQJDMRDefbZZ4ca4q3/bTAYUFlZCYvFQpPBUiKOkQlyfW2YTklJwQcffMBa1I7Q3OUxMBHk9vYZvh3LnQz2vvvuo8lgKRHLyAS53nZZpaSkoLGxkdVQrSELUyaT4dChQ3eY+OHJYGkYSUqkMzxB7vAJT3dAALZFCbAgTGDAIygtLQ0zZsygyWApUYk7Qa5UKoVQKERsbCy0Wi0nogQCEKZlrQACwYgyvxxuX50lS5bAarWitbUVf//736kXDyXqKC0txX/+8x/I5XLk5OTg9OnTnGUaCGi5pK18PmZ8/grIniwAFqwVZGPvmjqQPVk4cOAAtmzZgsuXL7PeSK1Wy8oXkJ6ePvR3RUVFUHFGKRSRSIS+vj5OrxGAMNtQPn8GPn+FYM9AfGVY1gqQjbpBoVIo0U9HRwc6OjrueMgDACxrIcjeO/TvmjqCPViLtdiDYOTh/xiz7SgOnVqD3MGLtJXPR/beNJRtpqKk3D0kJSWNEGUbyucLIMgG6ggBGSy5tQIIslvw4IzgruO3MNuOHsIp7EX24PhyFSpByElspP4ClLuWgV7kJpThAtmD4SYqa08d1mAW7g9SH34Ksw1HD53CmrrBJ8KFMmDTDMwvpxM8lLuXtvJV2HQqDWWVG+Gpvxl4sCwXwfYn/RtjtpVj/ozP8cqwp4JlrQDZe9egbsSTgkK5Oxi0lrO4mWPxy2K2HT2EU2uGq78N51sApD2IILvQFEqEcwGfnwLSgh1EjoEfcTwGu7Gv3JalZe0MbDoFrKnzZsIplLuAtvPg0sl0TIspEAyIcG/2bceC7L1pKLtAgpoGplCigun3YxaAU59f8Pqypfy2800wBL0fk0K56xlcu0wru4CTQ8sTnuv9wUCFSaGEQls55s/YhFNDB9JQdiH0ZUQqTAolDGFldwmFQmEXKkwKJQz5/6GFNae7ZI7lAAAAAElFTkSuQmCC" + "" + ] }, "metadata": {}, - "execution_count": 17 + "execution_count": 11 } ], "metadata": {} @@ -381,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 12, "source": [ "segments.ques_mark_segments" ], @@ -394,7 +403,7 @@ ] }, "metadata": {}, - "execution_count": 19 + "execution_count": 12 } ], "metadata": {} @@ -420,7 +429,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "source": [ "sif4sci(item[\"stem\"], figures=figures, tokenization=False, symbol=\"tfgm\")" ], @@ -433,7 +442,7 @@ ] }, "metadata": {}, - "execution_count": 11 + "execution_count": 13 } ], "metadata": { @@ -461,7 +470,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 14, "source": [ "tokens = sif4sci(item[\"stem\"], figures=figures, tokenization=True)" ], @@ -487,7 +496,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 15, "source": [ "tokens.text_tokens" ], @@ -532,7 +541,7 @@ ] }, "metadata": {}, - "execution_count": 12 + "execution_count": 15 } ], "metadata": { @@ -556,7 +565,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 16, "source": [ "tokens.formula_tokens" ], @@ -593,7 +602,7 @@ ] }, "metadata": {}, - "execution_count": 13 + "execution_count": 16 } ], "metadata": { @@ -619,17 +628,17 @@ }, { "cell_type": "code", - "execution_count": 37, - "source": [ - "sif4sci(\n", - " item[\"stem\"],\n", - " figures=figures,\n", - " tokenization=True,\n", - " tokenization_params={\n", - " \"formula_params\": {\n", - " \"method\": \"linear\",\n", - " }\n", - " }\n", + "execution_count": 17, + "source": [ + "sif4sci(\r\n", + " item[\"stem\"],\r\n", + " figures=figures,\r\n", + " tokenization=True,\r\n", + " tokenization_params={\r\n", + " \"formula_params\": {\r\n", + " \"method\": \"linear\",\r\n", + " }\r\n", + " }\r\n", ").formula_tokens" ], "outputs": [ @@ -665,7 +674,7 @@ ] }, "metadata": {}, - "execution_count": 37 + "execution_count": 17 } ], "metadata": { @@ -686,18 +695,18 @@ }, { "cell_type": "code", - "execution_count": 39, - "source": [ - "sif4sci(\n", - " item[\"stem\"],\n", - " figures=figures,\n", - " tokenization=True,\n", - " tokenization_params={\n", - " \"formula_params\":{\n", - " \"method\": \"ast\",\n", - " }\n", - " }\n", - ").formula_tokens\n" + "execution_count": 18, + "source": [ + "sif4sci(\r\n", + " item[\"stem\"],\r\n", + " figures=figures,\r\n", + " tokenization=True,\r\n", + " tokenization_params={\r\n", + " \"formula_params\":{\r\n", + " \"method\": \"ast\",\r\n", + " }\r\n", + " }\r\n", + ").formula_tokens\r\n" ], "outputs": [ { @@ -717,7 +726,7 @@ ] }, "metadata": {}, - "execution_count": 39 + "execution_count": 18 } ], "metadata": { @@ -736,55 +745,55 @@ }, { "cell_type": "code", - "execution_count": 109, - "source": [ - "f = sif4sci(\n", - " item[\"stem\"],\n", - " figures=figures,\n", - " tokenization=True,\n", - " tokenization_params={\n", - " \"formula_params\":{\n", - " \"method\": \"ast\",\n", - " \"return_type\": \"ast\",\n", - " \"ord2token\": True,\n", - " \"var_numbering\": True,\n", - " }\n", - " }\n", - ").formula_tokens\n", - "f\n" + "execution_count": 19, + "source": [ + "f = sif4sci(\r\n", + " item[\"stem\"],\r\n", + " figures=figures,\r\n", + " tokenization=True,\r\n", + " tokenization_params={\r\n", + " \"formula_params\":{\r\n", + " \"method\": \"ast\",\r\n", + " \"return_type\": \"ast\",\r\n", + " \"ord2token\": True,\r\n", + " \"var_numbering\": True,\r\n", + " }\r\n", + " }\r\n", + ").formula_tokens\r\n", + "f\r\n" ], "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "[,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ]" + "[,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ]" ] }, "metadata": {}, - "execution_count": 109 + "execution_count": 19 } ], "metadata": {} }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 20, "source": [ - "for i in range(0, len(f)):\n", - " ForestPlotter().export(\n", - " f[i], root_list=[node for node in f[i]],\n", - " )\n", - "# plt.show()\n" + "# for i in range(0, len(f)):\r\n", + "# ForestPlotter().export(\r\n", + "# f[i], root_list=[node for node in f[i]],\r\n", + "# )\r\n", + "# plt.show()\r\n" ], "outputs": [], "metadata": {} @@ -799,19 +808,19 @@ }, { "cell_type": "code", - "execution_count": 40, - "source": [ - "sif4sci(\n", - " item[\"stem\"],\n", - " figures=figures,\n", - " tokenization=True,\n", - " tokenization_params={\n", - " \"formula_params\":{\n", - " \"method\": \"ast\",\n", - " \"return_type\": \"list\",\n", - " \"ord2token\": True,\n", - " }\n", - " }\n", + "execution_count": 21, + "source": [ + "sif4sci(\r\n", + " item[\"stem\"],\r\n", + " figures=figures,\r\n", + " tokenization=True,\r\n", + " tokenization_params={\r\n", + " \"formula_params\":{\r\n", + " \"method\": \"ast\",\r\n", + " \"return_type\": \"list\",\r\n", + " \"ord2token\": True,\r\n", + " }\r\n", + " }\r\n", ").formula_tokens" ], "outputs": [ @@ -860,7 +869,7 @@ ] }, "metadata": {}, - "execution_count": 40 + "execution_count": 21 } ], "metadata": { @@ -879,20 +888,20 @@ }, { "cell_type": "code", - "execution_count": 44, - "source": [ - "sif4sci(\n", - " item[\"stem\"],\n", - " figures=figures,\n", - " tokenization=True,\n", - " tokenization_params={\n", - " \"formula_params\":{\n", - " \"method\": \"ast\",\n", - " \"ord2token\": True,\n", - " \"return_type\": \"list\",\n", - " \"var_numbering\": True\n", - " }\n", - " }\n", + "execution_count": 22, + "source": [ + "sif4sci(\r\n", + " item[\"stem\"],\r\n", + " figures=figures,\r\n", + " tokenization=True,\r\n", + " tokenization_params={\r\n", + " \"formula_params\":{\r\n", + " \"method\": \"ast\",\r\n", + " \"ord2token\": True,\r\n", + " \"return_type\": \"list\",\r\n", + " \"var_numbering\": True\r\n", + " }\r\n", + " }\r\n", ").formula_tokens" ], "outputs": [ @@ -941,7 +950,7 @@ ] }, "metadata": {}, - "execution_count": 44 + "execution_count": 22 } ], "metadata": { @@ -967,9 +976,9 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 23, "source": [ - "sif4sci(item[\"stem\"], figures=figures, tokenization=True,\n", + "sif4sci(item[\"stem\"], figures=figures, tokenization=True,\r\n", " symbol=\"fgm\")" ], "outputs": [ @@ -981,7 +990,7 @@ ] }, "metadata": {}, - "execution_count": 96 + "execution_count": 23 } ], "metadata": { @@ -995,11 +1004,11 @@ "metadata": { "kernelspec": { "name": "python3", - "display_name": "Python 3.8.5 64-bit" + "display_name": "Python 3.6.13 64-bit ('data': conda)" }, "language_info": { "name": "python", - "version": "3.8.5", + "version": "3.6.13", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", @@ -1010,7 +1019,7 @@ "file_extension": ".py" }, "interpreter": { - "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" + "hash": "776957673adb719a00031a24ed5efd2fa5ce8a13405e5193f8d278edd3805d55" } }, "nbformat": 4, diff --git a/examples/sif/sif_addition.ipynb b/examples/sif/sif_addition.ipynb index 57830c43..7a2a1b20 100644 --- a/examples/sif/sif_addition.ipynb +++ b/examples/sif/sif_addition.ipynb @@ -2,102 +2,166 @@ "cells": [ { "cell_type": "markdown", + "metadata": {}, "source": [ "# sif_addition" - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\MySoftwares\\Anaconda\\envs\\data\\lib\\site-packages\\gensim\\similarities\\__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.\n", + " warnings.warn(msg)\n" + ] + } + ], "source": [ "from EduNLP.SIF import is_sif, to_sif,sif4sci" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## is_sif" - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 4, - "source": [ - " text = '若$x,y$满足约束条件' \\\r\n", - " '$\\\\left\\\\{\\\\begin{array}{c}2 x+y-2 \\\\leq 0 \\\\\\\\ x-y-1 \\\\geq 0 \\\\\\\\ y+1 \\\\geq 0\\\\end{array}\\\\right.$,' \\\r\n", - " '则$z=x+7 y$的最大值$\\\\SIFUnderline$'\r\n", - " \r\n", - "is_sif(text)\r\n" - ], + "execution_count": 2, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "True" ] }, + "execution_count": 2, "metadata": {}, - "execution_count": 4 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "text = '若$x,y$满足约束条件' \\\n", + " '$\\\\left\\\\{\\\\begin{array}{c}2 x+y-2 \\\\leq 0 \\\\\\\\ x-y-1 \\\\geq 0 \\\\\\\\ y+1 \\\\geq 0\\\\end{array}\\\\right.$,' \\\n", + " '则$z=x+7 y$的最大值$\\\\SIFUnderline$'\n", + " \n", + "is_sif(text)\n" + ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'\r\n", + "text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'\n", "is_sif(text)" - ], + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ - "False" + "(False, )" ] }, + "execution_count": 4, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'\n", + "is_sif(text, return_parser=True)" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## to_sif" - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 6, - "source": [ - "text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'\r\n", - "to_sif(text)" - ], + "execution_count": 5, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "'某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$(单位...'" ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 6 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'\n", + "to_sif(text)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1]siftext : 某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$(单位... ,consume time [0.018142223358154297s]\n", + "[2]return : (False, )\n", + "[2]siftext : 某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$(单位... ,consume time [0.008990764617919922s]\n" + ] + } + ], + "source": [ + "import time\n", + "# ------------不使用‘加速’机制--------------- #\n", + "text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'*150\n", + "start = time.time()\n", + "if not is_sif(text):\n", + " siftext = to_sif(text)\n", + "print(\"[1]siftext : {} ,consume time [{}s]\".format(siftext[:35], time.time() - start))\n", + "\n", + "# ------------使用‘加速’机制--------------- #\n", + "start = time.time()\n", + "ret = is_sif(text, return_parser=True)\n", + "print(\"[2]return : \", ret)\n", + "if ret[0] is not True:\n", + " siftext = to_sif(text, parser=ret[1])\n", + "print(\"[2]siftext : {} ,consume time [{}s]\".format(siftext[:35], time.time() - start))" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## sif4sci\n", " to_symbolize:\n", @@ -105,283 +169,279 @@ " - \"f\": formula\n", " - \"g\": figure\n", " - \"m\": question mark" - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 14, - "source": [ - " test_item = r\"如图所示,则$\\bigtriangleup ABC$的面积是$\\SIFBlank$。$\\FigureID{1}$\"\r\n", - " t1 = sif4sci(test_item)\r\n", - " t1" - ], + "execution_count": 7, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "['如图所示', '\\\\bigtriangleup', 'ABC', '面积', '\\\\SIFBlank', \\FigureID{1}]" ] }, + "execution_count": 7, "metadata": {}, - "execution_count": 14 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "test_item = r\"如图所示,则$\\bigtriangleup ABC$的面积是$\\SIFBlank$。$\\FigureID{1}$\"\n", + "t1 = sif4sci(test_item)\n", + "t1" + ] }, { "cell_type": "code", - "execution_count": 15, - "source": [ - "t1.describe()" - ], + "execution_count": 8, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "{'t': 2, 'f': 2, 'g': 1, 'm': 1}" ] }, + "execution_count": 8, "metadata": {}, - "execution_count": 15 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "t1.describe()" + ] }, { "cell_type": "code", - "execution_count": 17, - "source": [ - "with t1.filter('fgm'):\n", - " print(t1)" - ], + "execution_count": 9, + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "['如图所示', '面积']\n" ] } ], - "metadata": {} + "source": [ + "with t1.filter('fgm'):\n", + " print(t1)" + ] }, { "cell_type": "code", - "execution_count": 18, - "source": [ - "with t1.filter(keep='t'):\n", - " print(t1)" - ], + "execution_count": 10, + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "['如图所示', '面积']\n" ] } ], - "metadata": {} + "source": [ + "with t1.filter(keep='t'):\n", + " print(t1)" + ] }, { "cell_type": "code", - "execution_count": 19, - "source": [ - "with t1.filter():\n", - " print(t1)" - ], + "execution_count": 11, + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "['如图所示', '\\\\bigtriangleup', 'ABC', '面积', '\\\\SIFBlank', \\FigureID{1}]\n" ] } ], - "metadata": {} + "source": [ + "with t1.filter():\n", + " print(t1)" + ] }, { "cell_type": "code", - "execution_count": 20, - "source": [ - "t1.text_tokens" - ], + "execution_count": 12, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "['如图所示', '面积']" ] }, + "execution_count": 12, "metadata": {}, - "execution_count": 20 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "t1.text_tokens" + ] }, { "cell_type": "code", - "execution_count": 23, - "source": [ - "t1.formula_tokens" - ], + "execution_count": 13, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "['\\\\bigtriangleup', 'ABC']" ] }, + "execution_count": 13, "metadata": {}, - "execution_count": 23 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "t1.formula_tokens" + ] }, { "cell_type": "code", - "execution_count": 24, - "source": [ - "t1.figure_tokens" - ], + "execution_count": 14, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[\\FigureID{1}]" ] }, + "execution_count": 14, "metadata": {}, - "execution_count": 24 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "t1.figure_tokens" + ] }, { "cell_type": "code", - "execution_count": 25, - "source": [ - "t1.ques_mark_tokens" - ], + "execution_count": 15, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "['\\\\SIFBlank']" ] }, + "execution_count": 15, "metadata": {}, - "execution_count": 25 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "t1.ques_mark_tokens" + ] }, { "cell_type": "code", - "execution_count": 26, - "source": [ - "sif4sci(test_item, symbol=\"gm\", tokenization_params={\"formula_params\": {\"method\": \"ast\"}})" - ], + "execution_count": 16, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "['如图所示', , '面积', '[MARK]', '[FIGURE]']" ] }, + "execution_count": 16, "metadata": {}, - "execution_count": 26 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "sif4sci(test_item, symbol=\"gm\", tokenization_params={\"formula_params\": {\"method\": \"ast\"}})" + ] }, { "cell_type": "code", - "execution_count": 27, - "source": [ - "sif4sci(test_item, symbol=\"tfgm\")" - ], + "execution_count": 17, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "['[TEXT]', '[FORMULA]', '[TEXT]', '[MARK]', '[TEXT]', '[FIGURE]']" ] }, + "execution_count": 17, "metadata": {}, - "execution_count": 27 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "sif4sci(test_item, symbol=\"tfgm\")" + ] }, { "cell_type": "code", - "execution_count": 28, - "source": [ - "sif4sci(test_item, symbol=\"gm\", tokenization_params={\"formula_params\": {\"method\": \"ast\", \"return_type\": \"list\"}})" - ], + "execution_count": 18, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "['如图所示', '\\\\bigtriangleup', 'A', 'B', 'C', '面积', '[MARK]', '[FIGURE]']" ] }, + "execution_count": 18, "metadata": {}, - "execution_count": 28 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "sif4sci(test_item, symbol=\"gm\", tokenization_params={\"formula_params\": {\"method\": \"ast\", \"return_type\": \"list\"}})" + ] }, { "cell_type": "code", - "execution_count": 29, - "source": [ - " test_item_1 = {\n", - " \"stem\": r\"若$x=2$, $y=\\sqrt{x}$,则下列说法正确的是$\\SIFChoice$\",\n", - " \"options\": [r\"$x < y$\", r\"$y = x$\", r\"$y < x$\"]\n", - " }" - ], + "execution_count": 19, + "metadata": {}, "outputs": [], - "metadata": {} + "source": [ + "test_item_1 = {\n", + " \"stem\": r\"若$x=2$, $y=\\sqrt{x}$,则下列说法正确的是$\\SIFChoice$\",\n", + " \"options\": [r\"$x < y$\", r\"$y = x$\", r\"$y < x$\"]\n", + "}" + ] }, { "cell_type": "code", - "execution_count": 30, - "source": [ - " tls = [\n", - " sif4sci(e, symbol=\"gm\",\n", - " tokenization_params={\n", - " \"formula_params\": {\n", - " \"method\": \"ast\", \"return_type\": \"list\", \"ord2token\": True, \"var_numbering\": True,\n", - " \"link_variable\": False}\n", - " })\n", - " for e in ([test_item_1[\"stem\"]] + test_item_1[\"options\"])\n", - " ]" - ], + "execution_count": 20, + "metadata": {}, "outputs": [], - "metadata": {} + "source": [ + "tls = [\n", + " sif4sci(e, symbol=\"gm\",\n", + " tokenization_params={\n", + " \"formula_params\": {\n", + " \"method\": \"ast\", \"return_type\": \"list\", \"ord2token\": True, \"var_numbering\": True,\n", + " \"link_variable\": False}\n", + " })\n", + " for e in ([test_item_1[\"stem\"]] + test_item_1[\"options\"])\n", + "]" + ] }, { "cell_type": "code", - "execution_count": 33, - "source": [ - "tls" - ], + "execution_count": 21, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[['mathord_0', '=', 'textord', 'mathord_1', '=', 'mathord_0', '{ }', '\\\\sqrt', '说法', '正确', '[MARK]'],\n", @@ -390,21 +450,21 @@ " ['mathord_0', '<', 'mathord_1']]" ] }, + "execution_count": 21, "metadata": {}, - "execution_count": 33 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "tls" + ] }, { "cell_type": "code", - "execution_count": 34, - "source": [ - "tls[1:]" - ], + "execution_count": 22, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[['mathord_0', '<', 'mathord_1'],\n", @@ -412,38 +472,43 @@ " ['mathord_0', '<', 'mathord_1']]" ] }, + "execution_count": 22, "metadata": {}, - "execution_count": 34 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "tls[1:]" + ] }, { "cell_type": "code", - "execution_count": 35, - "source": [ - "from EduNLP.utils import dict2str4sif\n", - "\n", - "test_item_1_str = dict2str4sif(test_item_1, tag_mode=\"head\", add_list_no_tag=False)\n", - "test_item_1_str " - ], + "execution_count": 23, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "'$\\\\SIFTag{stem}$若$x=2$, $y=\\\\sqrt{x}$,则下列说法正确的是$\\\\SIFChoice$$\\\\SIFTag{options}$$x < y$$\\\\SIFSep$$y = x$$\\\\SIFSep$$y < x$'" ] }, + "execution_count": 23, "metadata": {}, - "execution_count": 35 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "from EduNLP.utils import dict2str4sif\n", + "\n", + "test_item_1_str = dict2str4sif(test_item_1, tag_mode=\"head\", add_list_no_tag=False)\n", + "test_item_1_str " + ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 24, + "metadata": {}, + "outputs": [], "source": [ "tl1 = sif4sci(\n", " test_item_1_str, \n", @@ -452,60 +517,55 @@ " \"formula_params\": {\"method\": \"ast\", \"return_type\": \"list\", \"ord2token\": True}\n", " })\n", " " - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 37, - "source": [ - "tl1.get_segments()[0]" - ], + "execution_count": 25, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "['\\\\SIFTag{stem}']" ] }, + "execution_count": 25, "metadata": {}, - "execution_count": 37 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "tl1.get_segments()[0]" + ] }, { "cell_type": "code", - "execution_count": 38, - "source": [ - "tl1.get_segments()[1:3]" - ], + "execution_count": 26, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[['[TEXT_BEGIN]', '[TEXT_END]'],\n", " ['[FORMULA_BEGIN]', 'mathord', '=', 'textord', '[FORMULA_END]']]" ] }, + "execution_count": 26, "metadata": {}, - "execution_count": 38 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "tl1.get_segments()[1:3]" + ] }, { "cell_type": "code", - "execution_count": 39, - "source": [ - "tl1.get_segments(add_seg_type=False)[0:3]" - ], + "execution_count": 27, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[['\\\\SIFTag{stem}'],\n", @@ -513,81 +573,81 @@ " ['mathord', '=', 'mathord', '{ }', '\\\\sqrt']]" ] }, + "execution_count": 27, "metadata": {}, - "execution_count": 39 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "tl1.get_segments(add_seg_type=False)[0:3]" + ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 28, + "metadata": {}, + "outputs": [], "source": [ "test_item_2 = {\"options\": [r\"$x < y$\", r\"$y = x$\", r\"$y < x$\"]}" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 29, + "metadata": {}, + "outputs": [], "source": [ "test_item_2_str = dict2str4sif(test_item_2, tag_mode=\"head\", add_list_no_tag=False)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 43, - "source": [ - "test_item_2_str" - ], + "execution_count": 30, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "'$\\\\SIFTag{options}$$x < y$$\\\\SIFSep$$y = x$$\\\\SIFSep$$y < x$'" ] }, + "execution_count": 30, "metadata": {}, - "execution_count": 43 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "test_item_2_str" + ] }, { "cell_type": "code", - "execution_count": 44, - "source": [ - "tl2 = sif4sci(test_item_2_str, symbol=\"gms\",\n", - " tokenization_params={\"formula_params\": {\"method\": \"ast\", \"return_type\": \"list\"}})\n", - "tl2 " - ], + "execution_count": 31, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "['\\\\SIFTag{options}', 'x', '<', 'y', '[SEP]', 'y', '=', 'x', '[SEP]', 'y', '<', 'x']" ] }, + "execution_count": 31, "metadata": {}, - "execution_count": 44 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "tl2 = sif4sci(test_item_2_str, symbol=\"gms\",\n", + " tokenization_params={\"formula_params\": {\"method\": \"ast\", \"return_type\": \"list\"}})\n", + "tl2 " + ] }, { "cell_type": "code", - "execution_count": 45, - "source": [ - "tl2.get_segments(add_seg_type=False)" - ], + "execution_count": 32, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[['\\\\SIFTag{options}'],\n", @@ -598,143 +658,146 @@ " ['y', '<', 'x']]" ] }, + "execution_count": 32, "metadata": {}, - "execution_count": 45 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "tl2.get_segments(add_seg_type=False)" + ] }, { "cell_type": "code", - "execution_count": 46, - "source": [ - "tl2.get_segments(add_seg_type=False, drop=\"s\")" - ], + "execution_count": 33, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[['\\\\SIFTag{options}'], ['x', '<', 'y'], ['y', '=', 'x'], ['y', '<', 'x']]" ] }, + "execution_count": 33, "metadata": {}, - "execution_count": 46 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "tl2.get_segments(add_seg_type=False, drop=\"s\")" + ] }, { "cell_type": "code", - "execution_count": 47, - "source": [ - "tl3 = sif4sci(test_item_1[\"stem\"], symbol=\"gs\")\n", - "tl3.text_segments" - ], + "execution_count": 34, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[['说法', '正确']]" ] }, + "execution_count": 34, "metadata": {}, - "execution_count": 47 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "tl3 = sif4sci(test_item_1[\"stem\"], symbol=\"gs\")\n", + "tl3.text_segments" + ] }, { "cell_type": "code", - "execution_count": 48, - "source": [ - "tl3.formula_segments" - ], + "execution_count": 35, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[['x', '=', '2'], ['y', '=', '\\\\sqrt', '{', 'x', '}']]" ] }, + "execution_count": 35, "metadata": {}, - "execution_count": 48 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "tl3.formula_segments" + ] }, { "cell_type": "code", - "execution_count": 49, - "source": [ - "tl3.figure_segments" - ], + "execution_count": 36, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[]" ] }, + "execution_count": 36, "metadata": {}, - "execution_count": 49 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "tl3.figure_segments" + ] }, { "cell_type": "code", - "execution_count": 50, - "source": [ - "tl3.ques_mark_segments" - ], + "execution_count": 37, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[['\\\\SIFChoice']]" ] }, + "execution_count": 37, "metadata": {}, - "execution_count": 50 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "tl3.ques_mark_segments" + ] }, { "cell_type": "code", "execution_count": null, - "source": [], + "metadata": {}, "outputs": [], - "metadata": {} + "source": [] } ], "metadata": { - "orig_nbformat": 4, + "interpreter": { + "hash": "776957673adb719a00031a24ed5efd2fa5ce8a13405e5193f8d278edd3805d55" + }, + "kernelspec": { + "display_name": "Python 3.6.13 64-bit ('data': conda)", + "name": "python3" + }, "language_info": { - "name": "python", - "version": "3.8.5", - "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, - "pygments_lexer": "ipython3", + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", "nbconvert_exporter": "python", - "file_extension": ".py" - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3.8.5 64-bit" + "pygments_lexer": "ipython3", + "version": "3.6.13" }, - "interpreter": { - "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" - } + "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/tokenizer/tokenizier.ipynb b/examples/tokenizer/tokenizier.ipynb index 8dcec093..1f52994d 100644 --- a/examples/tokenizer/tokenizier.ipynb +++ b/examples/tokenizer/tokenizier.ipynb @@ -3,82 +3,76 @@ { "cell_type": "code", "execution_count": 1, - "source": [ - "from EduNLP.Tokenizer import PureTextTokenizer, TextTokenizer, get_tokenizer" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "D:\\MySoftwares\\Anaconda\\envs\\data\\lib\\site-packages\\gensim\\similarities\\__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.\n", " warnings.warn(msg)\n" ] } ], - "metadata": {} + "source": [ + "from EduNLP.Tokenizer import PureTextTokenizer, TextTokenizer, get_tokenizer" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ - "# TextTokenizer and PureTextTokenizer\r\n", - "\r\n", - "- ‘text’ Tokenizer ignores and skips the FormulaFigures and tokenize latex Formulas as Text\r\n", + "# TextTokenizer and PureTextTokenizer\n", + "\n", + "- ‘text’ Tokenizer ignores and skips the FormulaFigures and tokenize latex Formulas as Text\n", "- ‘pure_text’ Tokenizer symbolizes the FormulaFigures as [FUMULA] and tokenize latex Formulas as Text" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## TextTokenizer" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 2, - "source": [ - "items = [{\r\n", - " \"stem\": \"已知集合$A=\\\\left\\\\{x \\\\mid x^{2}-3 x-4<0\\\\right\\\\}, \\\\quad B=\\\\{-4,1,3,5\\\\}, \\\\quad$ 则 $A \\\\cap B=$\",\r\n", - " \"options\": [\"1\", \"2\"]\r\n", - " }]\r\n", - "tokenizer = get_tokenizer(\"text\") # tokenizer = TextTokenizer()\r\n", - "tokens = tokenizer(items, key=lambda x: x[\"stem\"])\r\n", - "print(next(tokens))" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "['已知', '集合', 'A', '=', '\\\\left', '\\\\{', 'x', '\\\\mid', 'x', '^', '{', '2', '}', '-', '3', 'x', '-', '4', '<', '0', '\\\\right', '\\\\}', ',', '\\\\quad', 'B', '=', '\\\\{', '-', '4', ',', '1', ',', '3', ',', '5', '\\\\}', ',', '\\\\quad', 'A', '\\\\cap', 'B', '=']\n" ] } ], - "metadata": {} + "source": [ + "items = [{\n", + " \"stem\": \"已知集合$A=\\\\left\\\\{x \\\\mid x^{2}-3 x-4<0\\\\right\\\\}, \\\\quad B=\\\\{-4,1,3,5\\\\}, \\\\quad$ 则 $A \\\\cap B=$\",\n", + " \"options\": [\"1\", \"2\"]\n", + " }]\n", + "tokenizer = get_tokenizer(\"text\") # tokenizer = TextTokenizer()\n", + "tokens = tokenizer(items, key=lambda x: x[\"stem\"])\n", + "print(next(tokens))" + ] }, { "cell_type": "code", "execution_count": 3, + "metadata": {}, + "outputs": [], "source": [ "items = [\"有公式$\\\\FormFigureID{wrong1?}$,如图$\\\\FigureID{088f15ea-xxx}$,若$x,y$满足约束条件公式$\\\\FormFigureBase64{wrong2?}$,$\\\\SIFSep$,则$z=x+7 y$的最大值为$\\\\SIFBlank$\"]" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 4, - "source": [ - "\r\n", - "tokenizer = get_tokenizer(\"text\") # tokenizer = TextTokenizer()\r\n", - "tokens = [t for t in tokenizer(items)]\r\n", - "tokens" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[['公式',\n", @@ -102,30 +96,31 @@ " '[MARK]']]" ] }, + "execution_count": 4, "metadata": {}, - "execution_count": 4 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "\n", + "tokenizer = get_tokenizer(\"text\") # tokenizer = TextTokenizer()\n", + "tokens = [t for t in tokenizer(items)]\n", + "tokens" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## PureTextTokenizer" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 5, - "source": [ - "tokenizer = get_tokenizer(\"pure_text\") # tokenizer = PureTextTokenizer()\r\n", - "tokens = [t for t in tokenizer(items)]\r\n", - "tokens" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[['公式',\n", @@ -147,17 +142,25 @@ " '[MARK]']]" ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "tokenizer = get_tokenizer(\"pure_text\") # tokenizer = PureTextTokenizer()\n", + "tokens = [t for t in tokenizer(items)]\n", + "tokens" + ] } ], "metadata": { + "interpreter": { + "hash": "776957673adb719a00031a24ed5efd2fa5ce8a13405e5193f8d278edd3805d55" + }, "kernelspec": { - "name": "python3", - "display_name": "Python 3.6.13 64-bit ('data': conda)" + "display_name": "Python 3.6.13 64-bit ('data': conda)", + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -170,11 +173,8 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" - }, - "interpreter": { - "hash": "776957673adb719a00031a24ed5efd2fa5ce8a13405e5193f8d278edd3805d55" } }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/tests/test_sif/test_sif.py b/tests/test_sif/test_sif.py index 210441d2..02d30132 100644 --- a/tests/test_sif/test_sif.py +++ b/tests/test_sif/test_sif.py @@ -31,6 +31,12 @@ def test_to_sif(): siftext = to_sif(text) print(siftext) + ret = is_sif(text, return_parser=True) + assert ret[0] == 0 + if ret[0] is not True: + siftext = to_sif(text, parser=ret[1]) + print(siftext) + def test_sci4sif(figure0, figure1, figure0_base64, figure1_base64): repr(sif4sci( @@ -57,3 +63,17 @@ def test_sci4sif(figure0, figure1, figure0_base64, figure1_base64): "figure_params": {"figure_instance": True} } )) + repr(sif4sci( + r"如图所示,则$\bigtriangleup ABC$的面积是$\SIFBlank$。$\FigureID{1}$", mode=0 + )) + repr(sif4sci( + r"如图所示,则$\bigtriangleup ABC$的面积是$\SIFBlank$。$\FigureID{1}$", mode=1 + )) + repr(sif4sci( + r"如图所示,则$\bigtriangleup ABC$的面积是$\SIFBlank$。$\FigureID{1}$", mode=2 + )) + + with pytest.raises(KeyError): + repr(sif4sci( + r"如图所示,则$\bigtriangleup ABC$的面积是$\SIFBlank$。$\FigureID{1}$", mode=3 + ))