diff --git a/AUTHORS.md b/AUTHORS.md index 4188b5a6..73b40ab2 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -12,5 +12,6 @@ [Longhu Qin](https://github.com/KenelmQLH) +[Meikai Bao](https://github.com/BAOOOOOM) -The stared contributors are the corresponding authors. \ No newline at end of file +The stared contributors are the corresponding authors. diff --git a/README.md b/README.md index ed75a0e5..fab37193 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,14 @@ pip install EduNLP pip install EduNLP[full] ``` +### Usage + +```python +from EduNLP import get_pretrained_i2v +i2v = get_pretrained_i2v("d2v_all_256", "./model") +item_vector, token_vector = i2v(["the content of item 1", "the content of item 2"]) +``` + ### Tutorial For more details, please refer to the full documentation ([latest](https://edunlp.readthedocs.io/en/latest) | [stable](https://edunlp.readthedocs.io/en/stable)). diff --git a/asset/_static/d2v.png b/asset/_static/d2v.png new file mode 100644 index 00000000..71d6994e Binary files /dev/null and b/asset/_static/d2v.png differ diff --git a/asset/_static/d2v_bow_tfidf.png b/asset/_static/d2v_bow_tfidf.png new file mode 100644 index 00000000..c5215160 Binary files /dev/null and b/asset/_static/d2v_bow_tfidf.png differ diff --git a/asset/_static/d2v_general.png b/asset/_static/d2v_general.png new file mode 100644 index 00000000..524bd157 Binary files /dev/null and b/asset/_static/d2v_general.png differ diff --git a/asset/_static/d2v_stem_tf.png b/asset/_static/d2v_stem_tf.png new file mode 100644 index 00000000..4cb22522 Binary files /dev/null and b/asset/_static/d2v_stem_tf.png differ diff --git a/asset/_static/data.png b/asset/_static/data.png new file mode 100644 index 00000000..b6c9daa1 Binary files /dev/null and b/asset/_static/data.png differ diff --git a/asset/_static/formula.png b/asset/_static/formula.png new file mode 100644 index 00000000..3cabf913 Binary files /dev/null and b/asset/_static/formula.png differ diff --git a/asset/_static/i2v.png b/asset/_static/i2v.png new file mode 100644 index 00000000..3da11cd0 Binary files /dev/null and b/asset/_static/i2v.png differ diff --git a/asset/_static/parse.png b/asset/_static/parse.png new file mode 100644 index 00000000..fd345f20 Binary files /dev/null and b/asset/_static/parse.png differ diff --git a/asset/_static/prepare_dataset.jpg b/asset/_static/prepare_dataset.jpg new file mode 100644 index 00000000..e82d5c42 Binary files /dev/null and b/asset/_static/prepare_dataset.jpg differ diff --git a/asset/_static/seg.png b/asset/_static/seg.png new file mode 100644 index 00000000..a04de8bc Binary files /dev/null and b/asset/_static/seg.png differ diff --git a/asset/_static/sif.png b/asset/_static/sif.png new file mode 100644 index 00000000..30c7cfef Binary files /dev/null and b/asset/_static/sif.png differ diff --git a/asset/_static/sif_addition.png b/asset/_static/sif_addition.png new file mode 100644 index 00000000..db7ccfdc Binary files /dev/null and b/asset/_static/sif_addition.png differ diff --git a/asset/_static/tokenizer.png b/asset/_static/tokenizer.png new file mode 100644 index 00000000..f074449c Binary files /dev/null and b/asset/_static/tokenizer.png differ diff --git a/asset/_static/w2v_stem_text.png b/asset/_static/w2v_stem_text.png new file mode 100644 index 00000000..069f1468 Binary files /dev/null and b/asset/_static/w2v_stem_text.png differ diff --git a/asset/_static/w2v_stem_tf.png b/asset/_static/w2v_stem_tf.png new file mode 100644 index 00000000..1d628bb4 Binary files /dev/null and b/asset/_static/w2v_stem_tf.png differ diff --git a/docs/requirements.txt b/docs/requirements.txt index 7f8b9b23..5a185c62 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,4 +2,5 @@ sphinx sphinx_rtd_theme sphinx_toggleprompt sphinx-gallery>=0.6 -nbsphinx \ No newline at end of file +nbsphinx +m2r2 diff --git a/docs/source/conf.py b/docs/source/conf.py index 1605600f..9d6a118b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -46,14 +46,34 @@ def copy_tree(src, tar): 'sphinx.ext.mathjax', 'sphinx_toggleprompt', 'nbsphinx', - 'sphinx_gallery.load_style' + 'sphinx_gallery.load_style', + 'm2r2', + 'IPython.sphinxext.ipython_console_highlighting', + 'IPython.sphinxext.ipython_directive' ] # extension variables setting # npsphinx nbsphinx_thumbnails = { - 'build/blitz/sif/sif': '_static/item_figure.png', + 'build/blitz/sif/sif': '_static/sif.png', + 'build/blitz/sif/sif_addition': '_static/sif_addition.png', + 'build/blitz/utils/data': '_static/data.png', + 'build/blitz/formula/formula': '_static/formula.png', + 'build/blitz/seg/seg': '_static/seg.png', + 'build/blitz/parse/parse': '_static/parse.png', + 'build/blitz/formula/formula': '_static/formula.png', + 'build/blitz/tokenizer/tokenizer': '_static/tokenizer.png', + 'build/blitz/vectorization/i2v': '_static/i2v.png', + 'build/blitz/pretrain/prepare_dataset': '_static/prepare_dataset.jpg', + 'build/blitz/pretrain/gensim/d2v_bow_tfidf': '_static/d2v_bow_tfidf.png', + 'build/blitz/pretrain/gensim/d2v_general': '_static/d2v_general.png', + 'build/blitz/pretrain/gensim/d2v_stem_tf': '_static/d2v_stem_tf.png', + 'build/blitz/pretrain/gensim/w2v_stem_text': '_static/w2v_stem_text.png', + 'build/blitz/pretrain/gensim/w2v_stem_tf': '_static/w2v_stem_tf.png', + 'build/blitz/pretrain/seg_token/d2v': '_static/d2v.png', + 'build/blitz/pretrain/seg_token/d2v_d1': '_static/d2v_d1.png', + 'build/blitz/pretrain/seg_token/d2v_d2': '_static/d2v_d2.png', } # Add any paths that contain templates here, relative to this directory. @@ -62,7 +82,7 @@ def copy_tree(src, tar): # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # -source_suffix = ['.rst', '.md', '.ipynb'] +source_suffix = ['.rst', '.md'] # source_suffix = '.rst' # The language for content autogenerated by Sphinx. Refer to documentation @@ -75,7 +95,7 @@ def copy_tree(src, tar): # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build'] +exclude_patterns = ['_build','**.ipynb_checkpoints'] # -- Options for HTML output ------------------------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index 9c66ae39..13f8b2c6 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -84,6 +84,16 @@ But you can also install from source: Getting Started ------------------ + +One basic usage of EduNLP is to convert an item into a vector, i.e., + +.. code-block:: python + + from EduNLP import get_pretrained_i2v + i2v = get_pretrained_i2v("d2v_all_256", "./model") + item_vector, token_vector = i2v(["the content of item 1", "the content of item 2"]) + + For absolute beginners, start with the :doc:`Tutorial to EduNLP ` :doc:`(中文版) `. It covers the basic concepts of EduNLP and a step-by-step on training, loading and using the language models. diff --git a/docs/source/tutorial/zh/index.rst b/docs/source/tutorial/zh/index.rst index dce32fc2..18ce7d4e 100644 --- a/docs/source/tutorial/zh/index.rst +++ b/docs/source/tutorial/zh/index.rst @@ -11,13 +11,138 @@ tokenize vectorization - 示例 -------- + +标准项目格式 +^^^^^^^^ + +.. nbgallery:: + :caption: This is a thumbnail gallery: + :name: sif_gallery + :glob: + + Code for beginner to learn how to use SIF4Sci <../../build/blitz/sif/sif> + Code for beginner to learn how to use sif_additon <../../build/blitz/sif/sif_addition> + + +成分分解 +^^^^^^^^^^^ + +语义成分分解 +#################### + +.. nbgallery:: + :caption: This is a thumbnail gallery: + :name: dict2str4sif_gallery + :glob: + + Code for beginner to learn how to use dict2str4sif <../../build/blitz/utils/data.ipynb> + + +结构成分分解 +#################### + +.. nbgallery:: + :caption: This is a thumbnail gallery: + :name: seg_gallery + :glob: + + Code for beginner to learn how to use seg <../../build/blitz/seg/seg.ipynb> + + +语法解析 +^^^^^^^^^^^ + +文本语法结构解析 +#################### + +.. nbgallery:: + :caption: This is a thumbnail gallery: + :name: parse_gallery + :glob: + + Code for beginner to learn how to use parse <../../build/blitz/parse/parse.ipynb> + + +公式语法结构解析 +#################### + +.. nbgallery:: + :caption: This is a thumbnail gallery: + :name: formula_gallery + :glob: + + Code for beginner to learn how to use Formula <../../build/blitz/formula/formula.ipynb> + + +令牌化 +^^^^^^^^^^^ + +.. nbgallery:: + :caption: This is a thumbnail gallery: + :name: tokenizer_gallery + :glob: + + Code for beginner to learn how to use Tokenizer <../../build/blitz/tokenizer/tokenizer.ipynb> + + +向量化 +^^^^^^^^^^^ + +.. nbgallery:: + :caption: This is a thumbnail gallery: + :name: vectorization_gallery + :glob: + + Code for beginner to learn how to use i2v <../../build/blitz/vectorization/i2v.ipynb> + + +预训练 +^^^^^^^^^^^ + +获得数据集 +#################### + +.. nbgallery:: + :caption: This is a thumbnail gallery: + :name: rst1-gallery + :glob: + + prepare_dataset <../../build/blitz/pretrain/prepare_dataset.ipynb> + + +gensim模型d2v例子 +#################### + +.. nbgallery:: + :caption: This is a thumbnail gallery: + :name: rst2-gallery + :glob: + + d2v_general <../../build/blitz/pretrain/gensim/d2v_general.ipynb> + d2v_bow_tfidf <../../build/blitz/pretrain/gensim/d2v_bow_tfidf.ipynb> + d2v_stem_tf <../../build/blitz/pretrain/gensim/d2v_stem_tf.ipynb> + + +gensim模型w2v例子 +#################### + +.. nbgallery:: + :caption: This is a thumbnail gallery: + :name: rst3-gallery + :glob: + + w2v_stem_text <../../build/blitz/pretrain/gensim/w2v_stem_text.ipynb> + w2v_stem_tf <../../build/blitz/pretrain/gensim/w2v_stem_tf.ipynb> + + +seg_token例子 +#################### + .. nbgallery:: :caption: This is a thumbnail gallery: - :name: gallery + :name: rst4-gallery :glob: - :reversed: - ../../build/blitz/sif/sif \ No newline at end of file + d2v.ipynb <../../build/blitz/pretrain/seg_token/d2v.ipynb> diff --git a/docs/source/tutorial/zh/parse.rst b/docs/source/tutorial/zh/parse.rst index 380d3f4c..9d6ea22e 100644 --- a/docs/source/tutorial/zh/parse.rst +++ b/docs/source/tutorial/zh/parse.rst @@ -6,5 +6,31 @@ * 文本语法结构解析 * 公式语法结构解析 -公式语法结构解析 +其目的是: + + +1、将选择题中的括号,填空题中的下划线用特殊标识替换掉,并将字符、公式用$$包裹起来,使item能通过$符号准确的按照类型切割开; + +2、判断当前item是否合法,并报出错误类型。 + +具体处理内容 +-------------------- + +1.匹配公式之外的英文字母、数字,只对两个汉字之间的字母、数字做修正,其余匹配到的情况视为不合 latex 语法录入的公式 + +2.匹配“( )”型括号(包含英文格式和中文格式),即括号内无内容或为空格的括号,将括号替换$\\SIFChoice$ + +3.匹配下划线,替换连续的下划线或下划线中夹杂空格的情况,将其替换为$\\SIFBlank$ + +4.匹配latex公式,主要检查latex公式的完整性和可解析性,对latex 中出现中文字符发出警告 + +学习路线图 -------------------- + +.. toctree:: + :maxdepth: 1 + :titlesonly: + + 文本语法结构解析 + 公式语法结构解析 + diff --git "a/docs/source/tutorial/zh/parse/\345\205\254\345\274\217\350\257\255\346\263\225\347\273\223\346\236\204\350\247\243\346\236\220.rst" "b/docs/source/tutorial/zh/parse/\345\205\254\345\274\217\350\257\255\346\263\225\347\273\223\346\236\204\350\247\243\346\236\220.rst" new file mode 100644 index 00000000..1a7717fb --- /dev/null +++ "b/docs/source/tutorial/zh/parse/\345\205\254\345\274\217\350\257\255\346\263\225\347\273\223\346\236\204\350\247\243\346\236\220.rst" @@ -0,0 +1,61 @@ +公式语法结构解析 +-------------------- + +本功能主要由EduNLP.Formula模块实现,具有检查传入的公式是否合法,并将合法的公式转换为art树的形式。从实际使用的角度,本模块常作为中间处理过程,调用相应的模型即可自动选择本模块的相关参数,故一般不需要特别关注。 + +主要内容介绍 ++++++++++++++++ + +1.Formula:对传入的单个公式进行判断,判断传入的公式是否为str形式,如果是则使用ast的方法进行处理,否则进行报错。此外,提供了variable_standardization参数,当此参数为True时,使用变量标准化方法,即同一变量拥有相同的变量编号。 + +2.FormulaGroup:如果需要传入公式集则可调用此接口,最终将形成ast森林,森林中树的结构同Formula。 + + +Examples: + +:: + + >>> text = '支持公式如$\\frac{y}{x}$,$\\SIFBlank$,$\\FigureID{1}$,不支持公式如$\\frac{ \\dddot y}{x}$' + >>> text_parser = Parser(text) + >>> text_parser.description_list() + >>> text_parser.fomula_illegal_flag + >>> 1 + +:: + + >>> f = Formula("x") + >>> f + + >>> f.ast + [{'val': {'id': 0, 'type': 'mathord', 'text': 'x', 'role': None}, 'structure': {'bro': [None, None], 'child': None, 'father': None, 'forest': None}}] + >>> f.elements + [{'id': 0, 'type': 'mathord', 'text': 'x', 'role': None}] + >>> f.variable_standardization(inplace=True) + + >>> f.elements + [{'id': 0, 'type': 'mathord', 'text': 'x', 'role': None, 'var': 0}] + +:: + + >>> fg = FormulaGroup(["x + y", "y + x", "z + x"]) + >>> fg + ;;> + >>> fg = FormulaGroup(["x + y", Formula("y + x"), "z + x"]) + >>> fg + ;;> + >>> fg = FormulaGroup(["x", Formula("y"), "x"]) + >>> fg.elements + [{'id': 0, 'type': 'mathord', 'text': 'x', 'role': None}, {'id': 1, 'type': 'mathord', 'text': 'y', 'role': None},\ + {'id': 2, 'type': 'mathord', 'text': 'x', 'role': None}] + >>> fg = FormulaGroup(["x", Formula("y"), "x"], variable_standardization=True) + >>> fg.elements + [{'id': 0, 'type': 'mathord', 'text': 'x', 'role': None, 'var': 0}, {'id': 1, 'type': 'mathord', 'text': 'y', 'role': None, 'var': 1}, {'id': 2, 'type': 'mathord', 'text': 'x', 'role': None, 'var': 0}] + +详细示范 ++++++++++++++++ + +.. toctree:: + :titlesonly: + + 树型处理效果 <../../../build/blitz/formula/tree.ipynb> + 公式解析效果案例 <../../../build/blitz/formula/formula.ipynb> diff --git "a/docs/source/tutorial/zh/parse/\346\226\207\346\234\254\350\257\255\346\263\225\347\273\223\346\236\204\350\247\243\346\236\220.rst" "b/docs/source/tutorial/zh/parse/\346\226\207\346\234\254\350\257\255\346\263\225\347\273\223\346\236\204\350\247\243\346\236\220.rst" new file mode 100644 index 00000000..f2f442a0 --- /dev/null +++ "b/docs/source/tutorial/zh/parse/\346\226\207\346\234\254\350\257\255\346\263\225\347\273\223\346\236\204\350\247\243\346\236\220.rst" @@ -0,0 +1,39 @@ +文本语法结构解析 +-------------------- + +本部分主要由EduNLP.SIF.Parse模块实现,主要功能为将文本中的字母、数字等进行提取,将其转换为标准格式。 + +主要流程介绍 ++++++++++++++++ + +1.按照以下顺序,先后对传入的文本进行判断类型 + +* is_chinese:用于匹配中文字符 [\u4e00-\u9fa5] + +* is_alphabet:匹配公式之外的英文字母,将匹配到的只对两个汉字之间的字母做修正(使用$$包裹起来),其余匹配到的情况视为不合 latex 语法录入的公式 + +* is_number:匹配公式之外的数字,只对两个汉字之间的数字做修正(使用$$包裹起来),其余匹配到的情况视为不合 latex 语法录入的公式 + +2.匹配 latex 公式 + +* latex 中出现中文字符,打印且只打印一次 warning + +* 使用_is_formula_legal函数,检查latex公式的完整性和可解析性,对于不合法公式报错 + +Examples: + +:: + + >>> text = '生产某种零件的A工厂25名工人的日加工零件数_ _' + >>> text_parser = Parser(text) + >>> text_parser.description_list() + >>> text_parser.text + >>> '生产某种零件的$A$工厂$25$名工人的日加工零件数$\\SIFBlank$' + +详细示范 ++++++++++++++++ + +.. toctree:: + :titlesonly: + + 文本语法结构解析的案例 <../../../build/blitz/parse/parse.ipynb> diff --git a/docs/source/tutorial/zh/pretrain.rst b/docs/source/tutorial/zh/pretrain.rst index 0dbee20a..477717a4 100644 --- a/docs/source/tutorial/zh/pretrain.rst +++ b/docs/source/tutorial/zh/pretrain.rst @@ -8,12 +8,13 @@ * 如何加载预训练模型 * 公开的预训练模型 +学习路线图 +------------------ -训练模型 ---------- +.. toctree:: + :maxdepth: 1 + :titlesonly: -装载模型 --------- - -公开模型一览 ------------- + 训练模型 + 装载模型 + 公开模型一览 diff --git a/docs/source/tutorial/zh/pretrain/loading.rst b/docs/source/tutorial/zh/pretrain/loading.rst new file mode 100644 index 00000000..d930674b --- /dev/null +++ b/docs/source/tutorial/zh/pretrain/loading.rst @@ -0,0 +1,11 @@ +装载模型 +-------- + +将所得到的模型传入I2V模块即可装载模型 + +Examples: + +:: + + >>> model_path = "../test_model/test_gensim_luna_stem_tf_d2v_256.bin" + >>> i2v = D2V("text","d2v",filepath=model_path, pretrained_t2v = False) diff --git a/docs/source/tutorial/zh/pretrain/pub.rst b/docs/source/tutorial/zh/pretrain/pub.rst new file mode 100644 index 00000000..3139910f --- /dev/null +++ b/docs/source/tutorial/zh/pretrain/pub.rst @@ -0,0 +1,85 @@ +公开模型一览 +------------ + +版本说明 +################## + +一级版本 + +* 公开版本1(luna_pub):高考 +* 公开版本2( luna_pub_large):高考 + 地区试题 + +二级版本: + +* 小科(Chinese,Math,English,History,Geography,Politics,Biology,Physics,Chemistry) +* 大科(理科science、文科literal、全科all) + +三级版本:【待完成】 + +* 不使用第三方初始化词表 +* 使用第三方初始化词表 + + + +模型命名规则:一级版本 + 二级版本 + gensim_luna_stem + 分词规则 + 模型方法 + 维度 + +Examples: + +:: + + 全量版本-全学科的D2V模型路径: + `/share/qlh/d2v_model/luna_pub/luna_pub_all_gensim_luna_stem_general_d2v_256.bin` + (备注:一个D2V模型含4个bin后缀的文件) + +模型训练数据说明 +################## + +* 当前【词向量w2v】【句向量d2v】模型所用的数据均为 【高中学段】 的题目 +* 测试数据:`[OpenLUNA.json] `_ + +当前提供以下模型,更多分学科、分题型模型正在训练中,敬请期待 + "d2v_all_256"(全科),"d2v_sci_256"(理科),"d2v_eng_256"(文科),"d2v_lit_256"(英语) + +模型训练案例 +------------ + +获得数据集 +#################### + +.. toctree:: + :maxdepth: 1 + :titlesonly: + + prepare_dataset <../../../build/blitz/pretrain/prepare_dataset.ipynb> + +gensim模型d2v例子 +#################### + +.. toctree:: + :maxdepth: 1 + :titlesonly: + + d2v_bow_tfidf <../../../build/blitz/pretrain/gensim/d2v_bow_tfidf.ipynb> + d2v_general <../../../build/blitz/pretrain/gensim/d2v_general.ipynb> + d2v_stem_tf <../../../build/blitz/pretrain/gensim/d2v_stem_tf.ipynb> + +gensim模型w2v例子 +#################### + +.. toctree:: + :maxdepth: 1 + :titlesonly: + + w2v_stem_text <../../../build/blitz/pretrain/gensim/w2v_stem_text.ipynb> + w2v_stem_tf <../../../build/blitz/pretrain/gensim/w2v_stem_tf.ipynb> + +seg_token例子 +#################### + +.. toctree:: + :maxdepth: 1 + :titlesonly: + + d2v.ipynb <../../../build/blitz/pretrain/seg_token/d2v.ipynb> + d2v_d1 <../../../build/blitz/pretrain/seg_token/d2v_d1.ipynb> + d2v_d2 <../../../build/blitz/pretrain/seg_token/d2v_d2.ipynb> diff --git a/docs/source/tutorial/zh/pretrain/start.rst b/docs/source/tutorial/zh/pretrain/start.rst new file mode 100644 index 00000000..f87d6afa --- /dev/null +++ b/docs/source/tutorial/zh/pretrain/start.rst @@ -0,0 +1,24 @@ +训练模型 +------------ + +如需训练模型则可直接train_vector函数接口,来使使训练模型更加方便。模块调用gensim库中的相关训练模型,目前提供了"sg"、 "cbow"、 "fastext"、 "d2v"、 "bow"、 "tfidf"的训练方法,并提供了embedding_dim参数,使之可以按照需求确定向量的维度。 + +基本步骤 +################## + +1.确定模型的类型,选择适合的Tokenizer(GensimWordTokenizer、 GensimSegTokenizer),使之令牌化; + +2.调用train_vector函数,即可得到所需的预训练模型。 + +Examples: + +:: + + >>> tokenizer = GensimWordTokenizer(symbol="gmas", general=True) + >>> token_item = tokenizer("有公式$\\FormFigureID{wrong1?}$,如图$\\FigureID{088f15ea-xxx}$,\ + ... 若$x,y$满足约束条件公式$\\FormFigureBase64{wrong2?}$,$\\SIFSep$,则$z=x+7 y$的最大值为$\\SIFBlank$") + >>> print(token_item.tokens[:10]) + ['公式', '[FORMULA]', '如图', '[FIGURE]', 'x', ',', 'y', '约束条件', '公式', '[FORMULA]'] + + # 10 dimension with fasstext method + train_vector(sif_items, "../../../data/w2v/gensim_luna_stem_tf_", 10, method="d2v") diff --git a/docs/source/tutorial/zh/seg.rst b/docs/source/tutorial/zh/seg.rst index c3bc9439..e1e1c0db 100644 --- a/docs/source/tutorial/zh/seg.rst +++ b/docs/source/tutorial/zh/seg.rst @@ -7,12 +7,20 @@ * 语义成分分解 * 结构成分分解 -语义成分分解 ------------- +主要处理内容 +-------------------- -结构成分分解 ------------- +1.将字典输入形式的选择题通过 `语法解析 `_ 转换为符合条件的item; +2.将输入的item按照元素类型进行切分、分组。 +学习路线图 +-------------------- +.. toctree:: + :maxdepth: 1 + :titlesonly: + + 语义成分分解 + 结构成分分解 diff --git "a/docs/source/tutorial/zh/seg/\347\273\223\346\236\204\346\210\220\345\210\206\345\210\206\350\247\243.rst" "b/docs/source/tutorial/zh/seg/\347\273\223\346\236\204\346\210\220\345\210\206\345\210\206\350\247\243.rst" new file mode 100644 index 00000000..13ae96ca --- /dev/null +++ "b/docs/source/tutorial/zh/seg/\347\273\223\346\236\204\346\210\220\345\210\206\345\210\206\350\247\243.rst" @@ -0,0 +1,53 @@ +结构成分分解 +------------ + +对切片后的item中的各个元素进行分词,提供深度选项,可以按照需求选择所有地方切分或者在部分标签处切分(比如\SIFSep、\SIFTag处);对标签添加的位置也可以进行选择,可以在头尾处添加或仅在头或尾处添加。 + +具有两种模式,一种是linear模式,用于对文本进行处理(使用jieba库进行分词);一种是ast模式,用于对公式进行解析。 + +基础使用方法 +++++++++++++++++++ + +:: + + >>> test_item = r"如图所示,则$\bigtriangleup ABC$的面积是$\SIFBlank$。$\FigureID{1}$" + >>> seg(test_item) + >>> ['如图所示,则', '\\bigtriangleup ABC', '的面积是', '\\SIFBlank', '。', \FigureID{1}] + +可选的的额外参数/接口 +++++++++++++++++++++++ + +1.describe:可以统计出各种类型元素的数量 + +:: + + >>> s.describe() + {'t': 3, 'f': 1, 'g': 1, 'm': 1} + +2.filter:可以选择性的筛除某种或几种类型的元素 + +:: + + >>> with s.filter("f"): + ... s + ['如图所示,则', '的面积是', '\\SIFBlank', '。', \FigureID{1}] + >>> with s.filter(keep="t"): + ... s + ['如图所示,则', '的面积是', '。'] + +3.symbol:选择性的将部分类型的数据转换为特殊符号遮掩起来 + +:: + + >>> seg(test_item, symbol="fgm") + ['如图所示,则', '[FORMULA]', '的面积是', '[MARK]', '。', '[FIGURE]'] + >>> seg(test_item, symbol="tfgm") + ['[TEXT]', '[FORMULA]', '[TEXT]', '[MARK]', '[TEXT]', '[FIGURE]'] + +详细示范 ++++++++++++ + +.. toctree:: + :titlesonly: + + 结构成分分解的案例 <../../../build/blitz/seg/seg.ipynb> diff --git "a/docs/source/tutorial/zh/seg/\350\257\255\344\271\211\346\210\220\345\210\206\345\210\206\350\247\243.rst" "b/docs/source/tutorial/zh/seg/\350\257\255\344\271\211\346\210\220\345\210\206\345\210\206\350\247\243.rst" new file mode 100644 index 00000000..0950dd87 --- /dev/null +++ "b/docs/source/tutorial/zh/seg/\350\257\255\344\271\211\346\210\220\345\210\206\345\210\206\350\247\243.rst" @@ -0,0 +1,55 @@ +语义成分分解 +------------ + +由于选择题是以字典的形式给出,故需要将其在保留数据类型关系的情况下转换为文本格式。dict2str4sif函数就是实现此功能的一个模块,该模块可以将选择题形式的item转换为字符格式,并将题干和选项、各选项之间分割开来。 + + +基础使用方法 +++++++++++++++++++ + +:: + + >>> item = { + ... "stem": r"若复数$z=1+2 i+i^{3}$,则$|z|=$", + ... "options": ['0', '1', r'$\sqrt{2}$', '2'], + ... } + >>> dict2str4sif(item) # doctest: +ELLIPSIS + '$\\SIFTag{stem_begin}$若复数$z=1+2 i+i^{3}$,则$|z|=$$\\SIFTag{stem_end}$$\\SIFTag{options_begin}$$\\SIFTag{list_0}$0$\\SIFTag{list_1}$1$\\SIFTag{list_2}$$\\sqrt{2}$$\\SIFTag{list_3}$2$\\SIFTag{options_end}$' + +可选的的额外参数/接口 +++++++++++++++++++++++ + +1.add_list_no_tag:当此参数为True较False时区别在于是否需要将选项部分的标签计数 + +:: + + >>> dict2str4sif(item, add_list_no_tag=True) # doctest: +ELLIPSIS + '$\\SIFTag{stem_begin}$若复数$z=1+2 i+i^{3}$,则$|z|=$$\\SIFTag{stem_end}$$\\SIFTag{options_begin}$$\\SIFTag{list_0}$0$\\SIFTag{list_1}$1$\\SIFTag{list_2}$$\\sqrt{2}$$\\SIFTag{list_3}$2$\\SIFTag{options_end}$' + + >>> dict2str4sif(item, add_list_no_tag=False) # doctest: +ELLIPSIS + '$\\SIFTag{stem_begin}$若复数$z=1+2 i+i^{3}$,则$|z|=$$\\SIFTag{stem_end}$$\\SIFTag{options_begin}$0$\\SIFSep$1$\\SIFSep$$\\sqrt{2}$$\\SIFSep$2$\\SIFTag{options_end}$' + +2.tag_mode:此参数为选择标签所在位置,delimiter为头尾都加标签,head为仅头部加标签,tail为仅尾部加标签 + +:: + + >>> dict2str4sif(item, tag_mode="head") # doctest: +ELLIPSIS + '$\\SIFTag{stem}$若复数$z=1+2 i+i^{3}$,则$|z|=$$\\SIFTag{options}$$\\SIFTag{list_0}$0$\\SIFTag{list_1}$1$\\SIFTag{list_2}$$\\sqrt{2}$$\\SIFTag{list_3}$2' + + >>> dict2str4sif(item, tag_mode="tail") # doctest: +ELLIPSIS + '若复数$z=1+2 i+i^{3}$,则$|z|=$$\\SIFTag{stem}$$\\SIFTag{list_0}$0$\\SIFTag{list_1}$1$\\SIFTag{list_2}$$\\sqrt{2}$$\\SIFTag{list_3}$2$\\SIFTag{options}$' + +3.key_as_tag:当其为False时则不区分切分标签的类型,而是仅在选项之间加入$\SIFSep$ + +:: + + >>> dict2str4sif(item, key_as_tag=False) + '若复数$z=1+2 i+i^{3}$,则$|z|=$0$\\SIFSep$1$\\SIFSep$$\\sqrt{2}$$\\SIFSep$2' + +详细示范 +++++++++++++++++++++++ + +.. toctree:: + :titlesonly: + + 语义成分分解的案例 <../../../build/blitz/utils/data.ipynb> diff --git a/docs/source/tutorial/zh/sif.rst b/docs/source/tutorial/zh/sif.rst index d838a59a..0bb9f2ae 100644 --- a/docs/source/tutorial/zh/sif.rst +++ b/docs/source/tutorial/zh/sif.rst @@ -1,2 +1,109 @@ 标准项目格式 -=============== \ No newline at end of file +=============== + +version: 0.2 + +为了后续研究和使用的方便,我们需要一个统一的试题语法标准。 + +语法规则 +----------- + +1. 题目文本中只允许出现中文字符、中英文标点和换行符。 + +2. 使用 \$\SIFBlank\$ 替换横线,对于选择题中的括号使用 \$\SIFChoice\$ 替换。 + +3. 图片 ID 以公式的形式嵌入文本中:``$\FigureID{ uuid }$`` 或用 base64 编码表示,特别的,内容为公式的图片用 ``$\FormFigureID{ uuid }$`` 表示。 + +4. 文本标注格式:统一用 ``$\textf{item,CHAR_EN}$`` 表示,目前定义的有:b-加粗,i-斜体,u-下划线,w-下划波浪线,d-加点,t-标题。标注可以混用,按字母顺序排序,例如:$\textf{EduNLP, b}$ 表示 **EduNLP** + +5. 其余诸如,英文字母、罗马字符、数字等数学符号一律需要使用 latex 格式表示,即嵌在 ``$$`` 之中。 + +6. 分子式的录入标准暂且参考 `INCHI `_ + +7. 目前对 latex 内部语法没有要求。 + +:: + + 1. Item -> CHARACTER|EN_PUN_LIST|CH_PUN_LIST|FORMULA|QUES_MARK + 2. EN_PUN_LIST -> [',', '.', '?', '!', ':', ';', '\'', '\"', '(', ')', ' ','_','/','|','\\','<','>','[',']','-'] + 3. CH_PUN_LIST -> [',', '。', '!', '?', ':',';', '‘', '’', '“', '”', '(', ')', ' ', '、','《','》','—','.'] + 4. FORMULA -> $latex formula$ | $\FormFigureID{UUID}$ | $\FormFigureBase64{BASE64}$ + 5. FIGURE -> $\FigureID{UUID}$ | $\FigureBase64{BASE64}$ + 6. UUID -> [a-zA-Z\-0-9]+ + 7. CHARACTER -> CHAR_EN | CHAR_CH + 8. CHAR_EN -> [a-zA-Z]+ + 9. CHAR_CH -> [\u4e00-\u9fa5]+ + 10. DIGITAL -> [0-9]+ + 11. QUES_MARK -> $\SIFBlank$ | $\SIFChoice$ + + +注意事项 ++++++++++++++++ + +1. 保留字符与转义 + +2. 数字 + +3. 选空与填空 + +4. 对于单个的数字或字符也需要添加 ``$$`` (目前能实现自动校验) + +5. latex 公式中尽量不出现中文:(``\text{这里出现中文}``) + +6. MySql 数据库导入数据时会自动忽略一个 ``\``,所以录入的公式需要进一步处理为 ``\\`` + +示例 +----------------- + +标准形式: + +:: + + 1. 若$x,y$满足约束条件$\\left\\{\\begin{array}{c}2 x+y-2 \\leq 0 \\\\ x-y-1 \\geq 0 \\\\ y+1 \\geq 0\\end{array}\\right.$,则$z=x+7 y$的最大值$\\SIFUnderline$' + + 2. 已知函数$f(x)=|3 x+1|-2|x|$画出$y=f(x)$的图像求不等式$f(x)>f(x+1)$的解集$\\PictureID{3bf2ddf4-8af1-11eb-b750-b46bfc50aa29}$$\\PictureID{59b8bd14-8af1-11eb-93a5-b46bfc50aa29}$$\\PictureID{63118b3a-8b75-11eb-a5c0-b46bfc50aa29}$$\\PictureID{6a006179-8b76-11eb-b386-b46bfc50aa29}$$\\PictureID{088f15eb-8b7c-11eb-a86f-b46bfc50aa29}$ + +非标准形式: + +1. 字母、数字和数学符号连续混合出现: + + 例如: + + ``完成下面的2x2列联表,`` + + ``(单位:m3)`` + + ``则输出的n=`` + +2. 特殊的数学符号没有用 latex 公式表示: + + 例如: + + ``命题中真命题的序号是 ①`` + + ``AB是⊙O的直径,AC是⊙O的切线,BC交⊙O于点E.若D为AC的中点`` + +3. 出现以 unicode 编码写成的字符 + + 例如:``则$a$的取值范围是(\u3000\u3000)`` + + +Change Log +---------------- + +2021-05-18 + +修改: + +1. 原用 \$\SIFUnderline\$ 和 \$\SIFBracket\$ 来替换填空题中的横线和选择题中的括号,现分别用 \$\SIFBlank\$ 和 \$\SIFChoice\$ 替换。 + +2. 原统一用 ``$\PictureID{ uuid }$`` 表示图片,现使用 ``$\FigureID{ uuid }$`` ,其中对于数据公式,用 ``$\FormFigureID{ uuid }$`` 来表示。 + +2021-06-28 + +添加: + +1. 注明 ``$$`` 之中不能出现换行符。 + +2. 添加文本标注格式说明。 + diff --git a/docs/source/tutorial/zh/tokenization/GensimSegTokenizer.rst b/docs/source/tutorial/zh/tokenization/GensimSegTokenizer.rst new file mode 100644 index 00000000..f1a66d77 --- /dev/null +++ b/docs/source/tutorial/zh/tokenization/GensimSegTokenizer.rst @@ -0,0 +1,9 @@ +GensimSegTokenizer +===================== + +此令牌解析器在默认情况下对传入的item中的图片、分隔符、题目空缺符等部分则转换成特殊字符进行保护,从而对文本、公式、标签进行令牌化操作。此外,从令牌化方法而言,此令牌解析器对文本均采用线性的分析方法,而对公式采用抽象语法树的分析方法。 + +与GensimWordTokenizer相比,GensimSegTokenizer解析器主要区别是: + +* 提供了切分深度的选项,即可以在sep标签或者tag标签处进行切割 +* 默认在item组分(如text、formula)的头部插入开始标签 diff --git a/docs/source/tutorial/zh/tokenization/GensimWordTokenizer.rst b/docs/source/tutorial/zh/tokenization/GensimWordTokenizer.rst new file mode 100644 index 00000000..e8924e21 --- /dev/null +++ b/docs/source/tutorial/zh/tokenization/GensimWordTokenizer.rst @@ -0,0 +1,20 @@ +GensimWordTokenizer +===================== + +此令牌解析器在默认情况下对传入的item中的图片、题目空缺符等部分转换成特殊字符进行保护,从而对文本、公式、标签、分隔符进行令牌化操作。此外,从令牌化方法而言,此令牌解析器对文本均采用线性的分析方法,而对公式采用抽象语法树的分析方法,提供了general参数可供使用者选择:当general为true的时候则代表着传入的item并非标准格式,此时对公式也使用线性的分析方法;当general为false时则代表使用抽象语法树的方法对公式进行解析。 + +Examples +---------- + +:: + + >>> tokenizer = GensimWordTokenizer(symbol="gmas", general=True) + >>> token_item = tokenizer("有公式$\\FormFigureID{wrong1?}$,如图$\\FigureID{088f15ea-xxx}$,\ + ... 若$x,y$满足约束条件公式$\\FormFigureBase64{wrong2?}$,$\\SIFSep$,则$z=x+7 y$的最大值为$\\SIFBlank$") + >>> print(token_item.tokens[:10]) + ['公式', '[FORMULA]', '如图', '[FIGURE]', 'x', ',', 'y', '约束条件', '公式', '[FORMULA]'] + >>> tokenizer = GensimWordTokenizer(symbol="fgmas", general=False) + >>> token_item = tokenizer("有公式$\\FormFigureID{wrong1?}$,如图$\\FigureID{088f15ea-xxx}$,\ + ... 若$x,y$满足约束条件公式$\\FormFigureBase64{wrong2?}$,$\\SIFSep$,则$z=x+7 y$的最大值为$\\SIFBlank$") + >>> print(token_item.tokens[:10]) + ['公式', '[FORMULA]', '如图', '[FIGURE]', '[FORMULA]', '约束条件', '公式', '[FORMULA]', '[SEP]', '[FORMULA]'] diff --git a/docs/source/tutorial/zh/tokenization/TextTokenizer.rst b/docs/source/tutorial/zh/tokenization/TextTokenizer.rst new file mode 100644 index 00000000..a17de29b --- /dev/null +++ b/docs/source/tutorial/zh/tokenization/TextTokenizer.rst @@ -0,0 +1,27 @@ +TextTokenizer +================ + +即文本令牌解析器,在默认情况下对传入的item中的图片、标签、分隔符、题目空缺符等部分则转换成特殊字符进行保护,从而对文本、公式进行令牌化操作。此外,此令牌解析器对文本、公式均采用线性的分析方法,并提供的key参数用于对传入的item进行预处理,待未来根据需求进行开发。 + + +Examples +---------- + +:: + + >>> items = ["已知集合$A=\\left\\{x \\mid x^{2}-3 x-4<0\\right\\}, \\quad B=\\{-4,1,3,5\\}, \\quad$ 则 $A \\cap B=$"] + >>> tokenizer = TextTokenizer() + >>> tokens = tokenizer(items) + >>> next(tokens) # doctest: +NORMALIZE_WHITESPACE + ['已知', '集合', 'A', '=', '\\left', '\\{', 'x', '\\mid', 'x', '^', '{', '2', '}', '-', '3', 'x', '-', '4', '<', + '0', '\\right', '\\}', ',', '\\quad', 'B', '=', '\\{', '-', '4', ',', '1', ',', '3', ',', '5', '\\}', ',', + '\\quad', 'A', '\\cap', 'B', '='] + >>> items = [{ + ... "stem": "已知集合$A=\\left\\{x \\mid x^{2}-3 x-4<0\\right\\}, \\quad B=\\{-4,1,3,5\\}, \\quad$ 则 $A \\cap B=$", + ... "options": ["1", "2"] + ... }] + >>> tokens = tokenizer(items, key=lambda x: x["stem"]) + >>> next(tokens) # doctest: +NORMALIZE_WHITESPACE + ['已知', '集合', 'A', '=', '\\left', '\\{', 'x', '\\mid', 'x', '^', '{', '2', '}', '-', '3', 'x', '-', '4', '<', + '0', '\\right', '\\}', ',', '\\quad', 'B', '=', '\\{', '-', '4', ',', '1', ',', '3', ',', '5', '\\}', ',', + '\\quad', 'A', '\\cap', 'B', '='] diff --git a/docs/source/tutorial/zh/tokenize.rst b/docs/source/tutorial/zh/tokenize.rst index 12855778..ce719757 100644 --- a/docs/source/tutorial/zh/tokenize.rst +++ b/docs/source/tutorial/zh/tokenize.rst @@ -5,19 +5,24 @@ 在EduNLP中我们将令牌化分为不同的粒度,为避免歧义,我们定义如下: * 词/字级别:分词 + * 句级别:分句 -* 资源级别:令牌化 -分词 -------- +* 资源级别:令牌化 -分句 -------- +本模块提供题目文本的令牌化解析(Tokenization),将题目转换成令牌序列,方便后续向量化表征试题。 -令牌化 -------- +在进入此模块前需要先后将item经过 `语法解析 `_ 和 `成分分解 `_ 处理,之后对切片后的item中的各个元素进行分词,提供深度选项,可以按照需求选择所有地方切分或者在部分标签处切分(比如\SIFSep、\SIFTag处);对标签添加的位置也可以进行选择,可以在头尾处添加或仅在头或尾处添加。 -我们提供了多种已经封装好的令牌化器供用户便捷调用,下面是一个示例 +具有两种模式,一种是linear模式,用于对文本进行处理(使用jieba库进行分词);一种是ast模式,用于对公式进行解析。 +学习路线图 +-------------------- -通过 可以查看更多令牌化器,下面是一个完整的令牌化器列表 +.. toctree:: + :maxdepth: 1 + :titlesonly: + + 分词 + 分句 + 令牌化 diff --git "a/docs/source/tutorial/zh/tokenize/\344\273\244\347\211\214\345\214\226.rst" "b/docs/source/tutorial/zh/tokenize/\344\273\244\347\211\214\345\214\226.rst" new file mode 100644 index 00000000..9782bece --- /dev/null +++ "b/docs/source/tutorial/zh/tokenize/\344\273\244\347\211\214\345\214\226.rst" @@ -0,0 +1,28 @@ +令牌化 +------- +即综合解析,将带公式的句子切分为若干标记的过程。每个标记为一个“令牌”(token)。 +我们提供了多种已经封装好的令牌化器供用户便捷调用,下面是一个示例: + +Examples + +:: + + >>> items = ["已知集合$A=\\left\\{x \\mid x^{2}-3 x-4<0\\right\\}, \\quad B=\\{-4,1,3,5\\}, \\quad$ 则 $A \\cap B=$"] + >>> tokenizer = TextTokenizer() + >>> tokens = tokenizer(items) + >>> next(tokens) # doctest: +NORMALIZE_WHITESPACE + ['已知', '集合', 'A', '=', '\\left', '\\{', 'x', '\\mid', 'x', '^', '{', '2', '}', '-', '3', 'x', '-', '4', '<', + '0', '\\right', '\\}', ',', '\\quad', 'B', '=', '\\{', '-', '4', ',', '1', ',', '3', ',', '5', '\\}', ',', + '\\quad', 'A', '\\cap', 'B', '='] + + + +通过查看"./EduNLP/Tokenizer/tokenizer.py"及"./EduNLP/Pretrain/gensim_vec.py"可以查看更多令牌化器,下面是一个完整的令牌化器列表 + +.. toctree:: + :maxdepth: 1 + :titlesonly: + + ../tokenization/TextTokenizer + ../tokenization/GensimSegTokenizer + ../tokenization/GensimWordTokenizer diff --git "a/docs/source/tutorial/zh/tokenize/\345\210\206\345\217\245.rst" "b/docs/source/tutorial/zh/tokenize/\345\210\206\345\217\245.rst" new file mode 100644 index 00000000..67cf5679 --- /dev/null +++ "b/docs/source/tutorial/zh/tokenize/\345\210\206\345\217\245.rst" @@ -0,0 +1,4 @@ +分句 +------- + +将较长的文档切分成若干句子的过程称为“分句”。每个句子为一个“令牌”(token)(待实现)。 diff --git "a/docs/source/tutorial/zh/tokenize/\345\210\206\350\257\215.rst" "b/docs/source/tutorial/zh/tokenize/\345\210\206\350\257\215.rst" new file mode 100644 index 00000000..ec75b0cd --- /dev/null +++ "b/docs/source/tutorial/zh/tokenize/\345\210\206\350\257\215.rst" @@ -0,0 +1,36 @@ +分词 +------- + +词解析(text-tokenization):一个句子(不含公式)是由若干“词”按顺序构成的,将一个句子切分为若干词的过程称为“词解析”。根据词的粒度大小,又可细分为“词组解析”和"单字解析"。 + +:: + + - 词组解析 (word-tokenization):每一个词组为一个“令牌”(token)。 + + - 单字解析 (char-tokenization):单个字符即为一个“令牌”(token)。 + + +词解析分为两个主要步骤: + +1. 分词: + + - 词组解析:使用分词工具切分并提取题目文本中的词。本项目目前支持的分词工具有:`jieba` + + - 单字解析:按字符划分。 + +2. 筛选:过滤指定的停用词。 + + 本项目默认使用的停用词表:`[stopwords] `_ + 你也可以使用自己的停用词表,具体使用方法见下面的示例。 + +Examples: + +:: + + >>> text = "三角函数是基本初等函数之一" + >>> tokenize(text, granularity="word") + ['三角函数', '初等', '函数'] + + >>> tokenize(text, granularity="char") + ['三', '角', '函', '数', '基', '初', '函', '数'] + diff --git a/docs/source/tutorial/zh/vectorization.rst b/docs/source/tutorial/zh/vectorization.rst index c4be7cd1..89175ba6 100644 --- a/docs/source/tutorial/zh/vectorization.rst +++ b/docs/source/tutorial/zh/vectorization.rst @@ -1,2 +1,26 @@ 向量化 -======== +========= + +此部分提供了简便的接口,可以直接将传入的items经过转化得到向量。当前提供了是否使用预训练模型的选项,可根据需要进行选择,如不使用预训练模型则可直接调用D2V函数,使用预训练模型则调用get_pretrained_i2v函数。 + +总体流程 +--------------------------- + +1.对传入的item进行 `语法解析 `_ ,得到SIF格式; + +2.对sif_item进行 `成分分解 `_ ; + +3.对经过成分分解的item进行 `令牌化 `_; + +4.使用已有或者使用提供的预训练模型,将令牌化后的item转换为向量。 + +学习路线图 +--------------------------- + +.. toctree:: + :maxdepth: 1 + :titlesonly: + + 不使用预训练模型 + 使用预训练模型 + diff --git "a/docs/source/tutorial/zh/vectorization/\344\270\215\344\275\277\347\224\250\351\242\204\350\256\255\347\273\203\346\250\241\345\236\213.rst" "b/docs/source/tutorial/zh/vectorization/\344\270\215\344\275\277\347\224\250\351\242\204\350\256\255\347\273\203\346\250\241\345\236\213.rst" new file mode 100644 index 00000000..5a26588f --- /dev/null +++ "b/docs/source/tutorial/zh/vectorization/\344\270\215\344\275\277\347\224\250\351\242\204\350\256\255\347\273\203\346\250\241\345\236\213.rst" @@ -0,0 +1,22 @@ +不使用预训练模型:直接调用D2V +------------------------------------ + +使用自己提供的任一预训练模型(给出模型存放路径即可)将给定的题目文本转成向量。 + +* 优点:可以使用自己的模型,另可调整训练参数,灵活性强。 + + +处理的具体流程 +++++++++++++++++++++ + +1.调用get_tokenizer函数,得到经过分词后的结果; + +2.调用T2V模块,根据需要选择是否使用预训练的t2v模型 + +Examples: + +:: + + >>> model_path = "../test_model/test_gensim_luna_stem_tf_d2v_256.bin" + >>> i2v = D2V("text","d2v",filepath=model_path, pretrained_t2v = False) + >>> i2v(item) diff --git "a/docs/source/tutorial/zh/vectorization/\344\275\277\347\224\250\351\242\204\350\256\255\347\273\203\346\250\241\345\236\213.rst" "b/docs/source/tutorial/zh/vectorization/\344\275\277\347\224\250\351\242\204\350\256\255\347\273\203\346\250\241\345\236\213.rst" new file mode 100644 index 00000000..93d7a00b --- /dev/null +++ "b/docs/source/tutorial/zh/vectorization/\344\275\277\347\224\250\351\242\204\350\256\255\347\273\203\346\250\241\345\236\213.rst" @@ -0,0 +1,41 @@ +使用预训练模型:直接调用get_pretrained_i2v +--------------------------------------------- + +使用 EduNLP 项目组给定的预训练模型将给定的题目文本转成向量。 + +* 优点:简单方便。 + +* 缺点:只能使用项目中给定的模型,局限性较大。 + +* 调用此函数即可获得相应的预训练模型,目前提供以下的预训练模型:d2v_all_256、d2v_sci_256、d2v_eng_256、d2v_lit_256 + +模型选择与使用 +################## + +根据题目所属学科选择预训练模型: + ++--------------------+------------------------+ +| 预训练模型名称 | 模型训练数据的所属学科 | ++====================+========================+ +| d2v_all_256 | 全学科 | ++--------------------+------------------------+ +| d2v_sci_256 | 理科 | ++--------------------+------------------------+ +| d2v_lit_256 | 文科 | ++--------------------+------------------------+ +| d2v_eng_256 | 英语 | ++--------------------+------------------------+ + +处理的具体流程 +################## + +1.下载相应的预处理模型 + +2.将所得到的模型传入D2V,使用D2V进行处理 + +Examples: + +:: + + >>> i2v = get_pretrained_i2v("d2v_sci_256") + >>> i2v(item) diff --git a/examples/formula/formula.ipynb b/examples/formula/formula.ipynb index 2ee49390..f748a90a 100644 --- a/examples/formula/formula.ipynb +++ b/examples/formula/formula.ipynb @@ -1,69 +1,166 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Formula\n", + "\n", + "## 概述\n", + "\n", + "Formula 首先在分词功能中对原始文本的公式做切分处理,另外提供 [公式解析树] 功能,可以将数学公式的抽象语法分析树用文本或图片的形式表示出来。 \n", + "\n", + "本模块另提供公式变量标准化的功能,如判断几个子公式内的‘x’为同一变量。" + ], + "metadata": {} + }, { "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": true - }, + "execution_count": 1, + "source": [ + "import matplotlib.pyplot as plt\n", + "from EduNLP.Formula import Formula\n", + "from EduNLP.Formula import FormulaGroup\n", + "from EduNLP.Formula.viz import ForestPlotter" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 公式语法结构分析\n", + "\n", + "### 初始化实例\n", + "\n", + "- item 类型:`str or List[Dict]` \n", + "- item 内容:latex 公式 或 公式经解析后产生的抽象语法分析树(abstracted syntax tree)" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "f = Formula(\"x^2 + x+1 = y\")\n", + "f " + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "[{'structure': {'bro': [None, 3],\n 'child': [1, 2],\n 'father': None,\n 'forest': None},\n 'val': {'id': 0, 'role': None, 'text': '^', 'type': 'supsub'}},\n {'structure': {'bro': [None, 2], 'child': None, 'father': 0, 'forest': None},\n 'val': {'id': 1, 'role': 'base', 'text': 'x', 'type': 'mathord'}},\n {'structure': {'bro': [1, None], 'child': None, 'father': 0, 'forest': None},\n 'val': {'id': 2, 'role': 'sup', 'text': '2', 'type': 'textord'}},\n {'structure': {'bro': [0, 4], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 3, 'role': None, 'text': '+', 'type': 'bin'}},\n {'structure': {'bro': [3, 5], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 4, 'role': None, 'text': '1', 'type': 'textord'}},\n {'structure': {'bro': [4, 6], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 5, 'role': None, 'text': '=', 'type': 'rel'}},\n {'structure': {'bro': [5, None],\n 'child': None,\n 'father': None,\n 'forest': None},\n 'val': {'id': 6, 'role': None, 'text': 'y', 'type': 'mathord'}}]" + "text/plain": [ + "" + ] }, - "execution_count": 9, "metadata": {}, - "output_type": "execute_result" + "execution_count": 2 } ], + "metadata": { + "collapsed": true + } + }, + { + "cell_type": "markdown", "source": [ - "import matplotlib.pyplot as plt\n", - "from EduNLP.Formula import Formula\n", - "from EduNLP.Formula import FormulaGroup\n", - "from EduNLP.Formula.viz import ForestPlotter\n", - "\n", - "Formula(\"x^2 + 1 = y\")" - ] + "- 查看公式切分后的结点元素:" + ], + "metadata": {} }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 3, + "source": [ + "f.elements" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "NodeView((0, 1, 2, 3, 4, 5, 6))" + "text/plain": [ + "[{'id': 0, 'type': 'supsub', 'text': '\\\\supsub', 'role': None},\n", + " {'id': 1, 'type': 'mathord', 'text': 'x', 'role': 'base'},\n", + " {'id': 2, 'type': 'textord', 'text': '2', 'role': 'sup'},\n", + " {'id': 3, 'type': 'bin', 'text': '+', 'role': None},\n", + " {'id': 4, 'type': 'mathord', 'text': 'x', 'role': None},\n", + " {'id': 5, 'type': 'bin', 'text': '+', 'role': None},\n", + " {'id': 6, 'type': 'textord', 'text': '1', 'role': None},\n", + " {'id': 7, 'type': 'rel', 'text': '=', 'role': None},\n", + " {'id': 8, 'type': 'mathord', 'text': 'y', 'role': None}]" + ] }, - "execution_count": 10, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], + "metadata": {} + }, + { + "cell_type": "markdown", "source": [ - "f = Formula(\"x^2 + 1 = y\", variable_standardization=True)\n", - "f.ast.nodes" + "- 查看公式的抽象语法分析树:" ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } + "metadata": {} }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, + "source": [ + "f.ast " + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "OutEdgeView([(1, 0), (2, 0)])" + "text/plain": [ + "[{'val': {'id': 0, 'type': 'supsub', 'text': '\\\\supsub', 'role': None},\n", + " 'structure': {'bro': [None, 3],\n", + " 'child': [1, 2],\n", + " 'father': None,\n", + " 'forest': None}},\n", + " {'val': {'id': 1, 'type': 'mathord', 'text': 'x', 'role': 'base'},\n", + " 'structure': {'bro': [None, 2], 'child': None, 'father': 0, 'forest': None}},\n", + " {'val': {'id': 2, 'type': 'textord', 'text': '2', 'role': 'sup'},\n", + " 'structure': {'bro': [1, None], 'child': None, 'father': 0, 'forest': None}},\n", + " {'val': {'id': 3, 'type': 'bin', 'text': '+', 'role': None},\n", + " 'structure': {'bro': [0, 4], 'child': None, 'father': None, 'forest': None}},\n", + " {'val': {'id': 4, 'type': 'mathord', 'text': 'x', 'role': None},\n", + " 'structure': {'bro': [3, 5], 'child': None, 'father': None, 'forest': None}},\n", + " {'val': {'id': 5, 'type': 'bin', 'text': '+', 'role': None},\n", + " 'structure': {'bro': [4, 6], 'child': None, 'father': None, 'forest': None}},\n", + " {'val': {'id': 6, 'type': 'textord', 'text': '1', 'role': None},\n", + " 'structure': {'bro': [5, 7], 'child': None, 'father': None, 'forest': None}},\n", + " {'val': {'id': 7, 'type': 'rel', 'text': '=', 'role': None},\n", + " 'structure': {'bro': [6, 8], 'child': None, 'father': None, 'forest': None}},\n", + " {'val': {'id': 8, 'type': 'mathord', 'text': 'y', 'role': None},\n", + " 'structure': {'bro': [7, None],\n", + " 'child': None,\n", + " 'father': None,\n", + " 'forest': None}}]" + ] }, - "execution_count": 11, "metadata": {}, - "output_type": "execute_result" + "execution_count": 4 } ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, "source": [ - "f.ast.edges" + "print('nodes: ',f.ast_graph.nodes)\n", + "print('edges: ' ,f.ast_graph.edges)\n" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "nodes: [0, 1, 2, 3, 4, 5, 6, 7, 8]\n", + "edges: [(0, 1), (0, 2)]\n" + ] + } ], "metadata": { "collapsed": false, @@ -74,25 +171,27 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, + "source": [ + "ForestPlotter().export(\n", + " f.ast_graph, root_list=[node[\"val\"][\"id\"] for node in f.ast if node[\"structure\"][\"father\"] is None],\n", + ")\n", + "plt.show()" + ], "outputs": [ { + "output_type": "display_data", "data": { - "text/plain": "
", - "image/png": "iVBORw0KGgoAAAANSUhEUgAAG1YAAADnCAYAAAAJF+KoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAACvTUlEQVR4nOzdebhd49k/8O/JPCdEjCUn5iH6BkWCEnMN1QY1iyARM329ihpKq+aqVARpSJoQc4t6VUWIkEliDIoWkZCIyDzJuH9/+NlvTqZzwklOIp/PdT1Xn7XWvZ51r71X4rp6Z+27pFAoBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAoCpVq+oEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANFYDAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACqnMZqAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAldNYDQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAqHIaqwEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFVOYzUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKDKaawGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABUOY3VAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAKlejqhMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANY+devW/fyrr77aoKrzAFYfJYVCoapzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADWMiUlJQU9lIBFVavqBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADRWAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAqpzGagAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQJXTWA0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKhyGqsBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABVTmM1AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACgymmsBgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAVDmN1QAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgCqnsRoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFDlNFYDAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACqnMZqAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAldNYDQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAqHIaqwEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAArOVKS0tTUlKSkpKSjBo1qlLW7NWrV3HNDh06VMqafL/VqOoEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABgVXjnnXfy8MMP55///Gc+/fTTTJgwIU2aNMkGG2yQ7bbbLm3bts0BBxyQrbbaqqpTXStprAYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwPfapEmTcskll+See+5JoVAoc+yLL77IF198kZEjR+bhhx/OnnvumZdffrmKMl27aawGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMD31tixY7Pffvvl/fffL+7bfPPNs9NOO6Vp06aZM2dORo0alTfffDNTpkypukTRWA0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAYG03atSoqk5hpZgxY0aZpmp77LFHbrvttuy6665LxC5YsCDDhg3LG2+8sYqz5Bvfu8ZqdevW/fyrr77aoKrzACpPnTp1xs+ePXvDqs4DAAAAAIC1k/oTLJ9aDgAAAAAAVUktB76f1KAAAAAAAKhKalCwfGo5AAAAAADAmuiSSy4pNlX7+c9/nocffjg1a9Zcamz16tWzxx57ZI899liVKbKI711jta+++mqDQqFQ1WkAlaikpMQ/LgEAAAAAoMqoP8HyqeUAAAAAAFCV1HLg+0kNCgAAAACAqqQGBcunlgMAAAAAAKxp3n777dx5551Jko033ji9evVaZlM1Vg/VqjoBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACqVmlpaUpKSlJSUpJRo0aVGz9o0KC0b98+paWlqVOnTjbYYIO0bt06f/jDHzJ58uSVn3AF3HnnnSkUCkmSs88+O40bN67ijChPjapOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACANcPChQtz3nnnpVu3bmX2f/HFF/niiy8ybNiwdOnSJY888kgVZfi1hQsXpm/fvsXtY445pgqzoaKqVXUCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACsGTp27FimqVrDhg1zxBFHpGPHjjnssMNSr169jBkzJoceemhGjx5doTUHDBiQkpKS4hgwYMB3zvOdd97JlClTkiSNGjXKlltumQULFqRPnz45+OCDs/HGG6d27drZcMMNs88+++T666/PpEmTvvN1+W5qVHUCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACs/h588MH07NmzuH366afntttuS4MGDYr7Jk+enE6dOuWxxx7L73//+6pIM0nyyiuvFOebbrppPvvssxxzzDEZMmRImbjx48dn/PjxGThwYG688cbcddddOe6441Z1uvx/GqsBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALBcCxcuzGWXXVbc/sUvfpEePXosEbfOOuvkoYceysEHH5z+/fuvyhTLGDNmTJntQw45JG+//XaSZKuttsruu++eWrVq5d13382wYcNSKBQyderUnHDCCZk1a1ZOO+20qkh7raexGgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAy/Xcc89l1KhRSZIaNWrktttuW2Zs9erV07Vr12y33XarJrmlmDJlSnH+zjvvJEnq1KmTHj165MQTTywTO2LEiPziF7/IqFGjUigUcu6552avvfbK1ltvvSpTJkm1qk4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIDVW//+/YvzAw44IBtvvPFy47fddtu0adOmQmu3bds2hUKhONq2bftdUk2SzJgxY4l999577xJN1ZLkRz/6Ufr165d69eolSWbPnp0bbrjhO+fAitNYDQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACA5XrttdeK89atW1fonIo2VlsZ6tSpU2Z7l112yfHHH7/M+C233DJnnXVWcfuRRx7JggULVlp+LJ3GagAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALNeECROK880226xC51Q0bmVo2LBhme0jjzyy3HPatWtXnM+YMSNvvvlmpefF8mmsBgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADAcs2YMaM4r1evXoXOqV+//spKp1xNmzYts7399tuXe87iMZ999lml5kT5NFYDAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGC5GjRoUJzPmjWrQufMnDlzZaVTru22267M9qL5L0vDhg3LbE+fPr1Sc6J8GqsBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALBczZo1K85Hjx5doXM++eSTlZVOuVq2bFlme8aMGeWes3gjtcaNG1dqTpRPYzVWSGlpaUpKSlJSUpJRo0ZVypq9evUqrtmhQ4dKWRMAAAAAAIDKpU4EAAAAAACwdlInAgAAAAAAWDupEwEAAAAAALA0O++8c3E+ZMiQCp1T0biVoXnz5tlyyy2L2++++2655ywes9lmm1V6XiyfxmqwiAULFuThhx9Ou3btsvnmm6du3bpp1qxZdtlll1x55ZX56KOPqjpFAAAAAAAAKsnEiRPz97//PVdeeWXatWuXH/7wh2natGlq1aqVOnXqZMMNN8zee++dyy67LO+8805VpwsAAAAAAMAq8oc//KH4g56V/WOhAAAAAAAAVJ1FG7tVdBx33HFVnTYAAAAAAMBqZb/99ivO+/fvn7Fjxy43/r333svQoUNXdlrLddRRRxXnf/vb38qNXzSmadOmadmy5UrJi2XTWA3+v48++ig//vGPc+yxx+bxxx/Pxx9/nK+++ipffvllXnvttVx77bXZYYcd8qc//amqUwUAAAAAAKASnH766TniiCNy7bXX5vHHH8/IkSMzadKkzJs3L3PmzMn48ePz0ksv5YYbbkjLli3Tvn37TJ06tarTBgAAAAAAYCX68MMPc+WVV1Z1GgAAAAAAAAAAAAAAALBaOvDAA9O8efMkyfz583PhhRcuM3bBggU599xzV1Fmy3bWWWeldu3aSZIRI0bk4YcfXmbsRx99lLvuuqu4fcopp6SkpGSl50hZNao6AVgdjB8/Pvvvv39GjRpV3Lfbbrtlhx12yNSpU/P8889nypQp+eqrr3LBBRdk3rx5ueiii6ouYQAAAAAAACpVs2bNsu2222azzTZLw4YN89VXX+Wjjz7KsGHDMmfOnCRJnz598vbbb+fFF19Mw4YNqzhjAAAAAAAAKluhUEjHjh0ze/bsqk4FAAAAAACAlWzbbbfN/vvvX27cLrvssgqyAQAAAAAAWHNUq1Yt1113XU488cQkySOPPJKOHTvmtttuS4MGDYpxkydPzhlnnJH+/funVq1amTt3brlrDxgwIPvuu29x+4UXXkjbtm2/c87NmzfPhRdemBtvvDFJ0qFDhyxcuDDHHXdcmbjXXnstv/jFLzJz5swkyXrrrZeLL774O1+fFaexGitk0cZj3ycnn3xy8d7WXXfdPPLII9lvv/2Kx2fOnJnOnTvn/vvvT5JcfPHF2XPPPdO6deuqSBcAAAAAAGCV+z7Wifbee+8ccsghOeCAA7LFFlssNWby5Mm54oor0q1btyTJ66+/niuuuCJdunRZlakCAAAAAABUme9jnWhZunfvngEDBiRJTjzxxOK7RAAAAAAAAGuj73udaPfdd0/Xrl2rOg0AAAAAAIA10gknnJBnnnkmffr0SZLcc889xX4/zZo1y7hx4/L8889n1qxZWWeddXLBBRfk6quvrtKcf/e73+X111/Ps88+m9mzZ+f444/Pb37zm+y+++6pVatW3n333QwdOjSFQiFJUrt27TzwwAPZcMMNqzTvtZXGaqz1+vXrl379+hW3+/btW6apWpLUr18/vXv3zscff5zBgwenUCjkkksuyYsvvriq0wUAAAAAAKCS/Pd//3e5Meuss07uuOOOTJ8+vVi479mzZ2666abUrl17ZacIAAAAAADAKvLpp5/mV7/6VZJk7733zumnn66xGgAAAAAAAAAAAAAAACxDz549U7du3XTv3j1JMm3atDz++ONlYjbZZJM88sgjef/996sgw7Jq1qyZv/71r7ngggtyzz33JEk++OCDfPDBB0vENm/ePH379s0ee+yxqtPk/6tW1QlAVevatWtxfuCBB+bggw9ealy1atVy0003FbcHDhyYt956a6XnBwAAAAAAQNXr1KlTcT59+vT8+9//rsJsAAAAAAAAqGxnnXVWpk2bltq1a6d79+4pKSmp6pQAAAAAAAAAAAAAAABgtVW9evXcfffdGThwYE488cRsttlmqV27dpo1a5Zdd901N9xwQ9588820adOmqlMtql+/fnr06JFhw4blnHPOyfbbb58mTZqkdu3a2WSTTXL44Yene/fu+eCDDzRVq2Iaq7FCSktLU1JSkpKSkowaNarc+EGDBqV9+/YpLS1NnTp1ssEGG6R169b5wx/+kMmTJ6/8hMsxc+bMPPvss8XtU089dbnxe+65Z7baaqvi9l//+teVlhsAAAAAAMDq5PtWJ1pR66+/fpnt6dOnV1EmAAAAAAAAq9baUCfq27dvnnrqqSTJ5Zdfnm222aaKMwIAAAAAAKh6a0OdCAAAAAAAgCWNGjUqhUIhhUIhpaWl5cb/+Mc/zn333ZdPPvkkX331Vb744ou88sorueSSS9K0adMkSYcOHYpr9urVa5lrtW3bthhXKBTStm3byrmpxey2227p2rVr3nnnnUyePDlfffVVPv300/z9739Pp06dUqtWrZVyXSquRlUnwPfTwoULc95556Vbt25l9n/xxRf54osvMmzYsHTp0iWPPPJIFWX4tSFDhuSrr74qblfkL8O2bdvm3//+d5Lk+eefz9VXX72SsgMAAAAAAFjzrCl1ohX17rvvltlu3rx5FWUCAAAAAACwelpT60QTJkzIBRdckCTZfvvtc8kll1RxRgAAAAAAAGuWNbVOBAAAAAAAAKy+qlV1Anw/dezYsUxxu2HDhjniiCPSsWPHHHbYYalXr17GjBmTQw89NKNHj67QmgMGDEhJSUlxDBgw4Dvn+c477xTnG264YTbaaKNyz9l5552L88V/QBMAAAAAAGBtt6bUiVbE1KlT85vf/Ka4vccee2TjjTdepTkAAAAAAACs7tbUOtH555+fL7/8MiUlJfnzn/+cWrVqVfo1AAAAAAAAvs/W1DrRlClT8uijj+aaa67Jf//3f+eqq65Kt27d8vrrr2fhwoWVfj0AAAAAAACg4mpUdQJ8/zz44IPp2bNncfv000/PbbfdlgYNGhT3TZ48OZ06dcpjjz2W3//+91WRZpLk/fffL86bN29eoXM222yz4nzixImZMGFCmjVrVum5AQAAAAAArGnWpDpReb766quMHj06zz33XG6++eaMGjUqydcvdnbt2rVqkwMAAAAAAFjNrKl1or///e958MEHkyRnnnlm9thjjyrOCAAAAAAAYM2yptaJkuSJJ57IE088sdRjm222WS666KKcc845qV69+irODAAAAAAAAKhW1Qnw/bJw4cJcdtllxe1f/OIX6dGjR5nidpKss846eeihh7L//vtn7ty5qzrNookTJxbnG2ywQYXO2XDDDctsT5o0qVJzAgAAAAAAWBOtaXWixb3xxhspKSkpjrp162abbbbJOeecU2yqtt1222Xo0KHZaaedqjZZAAAAAACA1ciaWieaOnVqzjrrrCTJxhtvnBtuuKGKMwIAAAAAAFizrKl1oooYPXp0Lrjgguy777758ssvqzodAAAAAAAAWOtorEaleu6554o/LFmjRo3cdttty4ytXr16unbtumoSW4YZM2YU53Xr1q3QOYvHLboGAAAAAADA2mpNqxOtiJo1a+aaa67J22+/ne23376q0wEAAAAAAFitrKl1oosvvjifffZZkqRr165p1KhRFWcEAAAAAACwZllT60SlpaX51a9+lWeffTaffvpp5syZk5kzZ+b9999Pt27dsu222xZjX3rppRxxxBH56quvqjBjAAAAAAAAWPvUqOoE+H7p379/cX7AAQdk4403Xm78tttumzZt2mTIkCHlrt22bdsUCoXvnOOiFi1S16pVq0Ln1K5du8z27NmzKzUnAAAAAACANdGaVidaXLNmzXLOOeckSQqFQqZPn54PPvggr776aubNm5ff/OY3eeCBB3LnnXembdu2KzUXAAAAAACANcmaWCd64YUX8uc//zlJ8vOf/zzt2rWr9GsAAAAAAAB8362JdaKf//znad++fapVq1Zmf61atbL11ltn6623zumnn54zzzwzPXv2TJIMGTIkt912Wy699NJKzwcAAAAAAABYumrlh0DFvfbaa8V569atK3ROmzZtVlY65apTp05xPnfu3AqdM2fOnDLbdevWrdScAAAAAAAA1kRrWp1ocZtsskm6du2arl275o477kjv3r0zdOjQjB49OqeeemqS5L333suBBx6YJ598soqzBQAAAAAAWH2saXWiWbNmpVOnTkmSRo0apWvXrlWWCwAAAAAAwJpsTasTJUmTJk2WaKq2uFq1aqVHjx758Y9/XNx38803Z/78+Ss7PQAAAAAAAOD/01iNSjVhwoTifLPNNqvQORWNWxkaNGhQnM+ePbtC5ywet+gaAAAAAAAAa6s1rU5UURtttFHuvffenH/++UmS+fPnp0OHDpk0aVIVZwYAAAAAALB6WNPqRFdccUU+/PDDJMn111+fTTbZpMpyAQAAAAAAWJOtaXWiFVGtWrX85je/KW5PmjQpw4YNq8KMAAAAAAAAYO2isRqVasaMGcV5vXr1KnRO/fr1V1Y65WratGlxPn78+Aqd8/nnn5fZXnfddSs1JwAAAAAAgDXRmlYnWlHXX399GjVqlCSZPHly7rvvvirOCAAAAAAAYPWwJtWJXnvttXTp0iVJ0qZNm5x11llVkgcAAAAAAMD3wZpUJ/o29t5779SsWbO4/e6771ZhNgAAAAAAALB2qVHVCfD90qBBg+J81qxZFTpn5syZKyudcm2zzTbF+SeffFKhc0aPHl2cr7vuumnWrFml5wUAAAAAALCmWdPqRCuqXr162WOPPfLMM88kSQYPHpzzzz+/irMCAAAAAACoemtSneitt97KwoULkySff/552rRps8zYadOmldlu165dateunSQ57rjjcuGFF660PAEAAAAAANYEa1Kd6NuoWbNm1ltvvYwbNy5J8uWXX1ZxRgAAAAAAALD20FiNSrVok7FFG5AtT0Ubmq0MO+ywQ3H++eef5/PPP8+GG2643HNee+21pZ4PAAAAAACwNlvT6kTfxjrrrFOcT5o0qQozAQAAAAAAWH2sqXWijz/+OB9//HGF4994443ivHXr1ishIwAAAAAAgDXLmlonWhGLNoKrX79+FWYCAAAAAAAAa5dqVZ0A3y8777xzcT5kyJAKnVPRuJWhTZs2qVOnTnF7wIAB5Z7z4osvFuf77bffykgLAAAAAABgjbOm1Ym+jXHjxhXn6667bhVmAgAAAAAAsPpYG+pEAAAAAAAALOn7Xif66KOPMm3atOL2xhtvXIXZAAAAAAAAsCbp1atXSkpKUlJSkg4dOlR1OlWuQ4cOxc+jV69eFTpHYzUq1aKNxvr375+xY8cuN/69997L0KFDV3Zay1S/fv0ceOCBxe3y/uAMGTIkH3zwQXH7yCOPXFmpAQAAAAAArFHWtDrRipo4cWKZFze32267KswGAAAAAABg9bEm1Yk6dOiQQqFQofHCCy+UOffjjz8uHrvtttuqJH8AAAAAAIDVyZpUJ/o27r333uK8pKQk++yzTxVmAwAAAAAAAGsXjdWoVAceeGCaN2+eJJk/f34uvPDCZcYuWLAg55577irKbNkWzeGf//xn+vXrt9S4hQsX5le/+lVx+8c//nF++MMfrvT8AAAAAAAA1gRrWp1o0qRJFY5duHBhzj333MyZM6e476ijjloZaQEAAAAAAKxx1rQ6EQAAAAAAAJVjTasTzZgxo8KxgwcPzh/+8Ifi9iGHHJJmzZqtjLQAAAAAAIBKUlpampKSkpSUlGTUqFFVnQ7fI1dffXXx2br66qurOp21hsZqVKpq1arluuuuK24/8sgj6dix4xKF5MmTJ+e4445L//79U6tWrQqtPWDAgOJfEiUlJRkwYECl5HzQQQflgAMOKG4ff/zxS6w9c+bMdOjQIS+//HKSpKSkJDfddFOlXB8AAAAAAOD7YE2rE/Xu3Tu77rprevfunWnTpi0z7q233sqhhx6aBx98sLivffv2admy5XfOAQAAAAAA4PtgTasTAQAAAAAAUDnWtDpRjx49stdee6Vv376ZPn36UmNmz56dLl265IADDshXX32VJKldu3ZuvPHG73x9AAAAAAAAoOJqVHUCfP+ccMIJeeaZZ9KnT58kyT333JNHHnkk++23X5o1a5Zx48bl+eefz6xZs7LOOuvkggsuqPJuin369Enr1q3zySefZOLEidl3332z++67Z/vtt8+0adPy/PPPZ/LkycX4m266Ka1bt67CjAEAAAAAAFY/a1qdaMSIETnllFNSo0aNbLvtttlmm22yzjrrpKSkJBMnTsxbb72V//znP2XO2XvvvXPHHXdUUcYAAAAAAACrpzWtTgQAAAAAAEDlWNPqRIMGDcqgQYNSs2bNbLfddtlmm23SpEmTLFiwIJ9++mmGDBlSpulajRo10rdv37Rs2bLKcgYAAAAAAIC1kcZqrBQ9e/ZM3bp107179yTJtGnT8vjjj5eJ2WSTTfLII4/k/fffr4IMy9pwww3z/PPP58QTT8zQoUOTJMOGDcuwYcPKxNWpUyc33HBDLrjggqpIEwAAAAAAYLW3ptSJateuXZzPnz8/b7/9dt5+++1lxtepUyf/8z//k8svvzx16tRZFSkCAAAAAACsUdaUOhEAAAAAAACVa02sE82bNy9vvfVW3nrrrWXGtGzZMvfee2923XXXVZgZAAAAAAAAkGisxkpSvXr13H333TnppJNy991356WXXsr48ePTqFGjlJaW5qijjkrHjh3TtGnT1abAvfnmm+fll1/Oo48+mgceeCBvvPFGPv/889SvXz+bbbZZDj300Jx22mnZYostqjpVAAAAAACA1daaUic666yzcsABB+S5557LK6+8knfeeSeffPJJpkyZkkKhkIYNG2aDDTbIf/3Xf6Vt27Y55phjss4661RZvgAAAAAAAKu7NaVOBAAAAAAAQOVaU+pEZ555ZnbZZZcMGTIkw4YNy4cffpiJEydm4sSJWbhwYRo3bpzNNtssu+++e9q1a5f999+/ynIFAAAAAACAtV1JoVCo6hwqVUlJSeH7dk+wtispKUmhUCip6jwAAAAAAFg7qT/B8qnlAAAAAABQldRy4PtJDQoAAAAAgKqkBgXLp5YDAAAAAEBl+7b1mVGjRqVFixYVju/Zs2c6dOiQww47LE8//XSS5JprrslVV11VofN79+6dU045JUmyzTbb5L333itzvG3btnnxxReTJC+88ELatm2bcePG5c9//nMef/zxjB49OjNnzszGG2+c/fffP2eeeWZ23nnnCuefJCNHjsz999+f/v37Z/To0Zk8eXIaN26c0tLSHHDAAencuXNKS0tXaM3KMmDAgOy7775Jkn322ScDBgxIkvzjH//Ivffem1dffTVjx45N3bp1s/POO6dz58455phjlljnpZdeSrdu3fLaa69lzJgxqVevXlq1apVOnTrl2GOPLTePQqGQwYMH57nnnsvQoUPz3nvv5Ysvvsi8efOyzjrrZPPNN8/ee++dM844I1tsscUy11n0+yzPovebJL169cqpp56aJDnllFPSq1evJMnTTz9d/CzGjRuX+vXrZ9ttt81RRx2Vs88+O3Xq1KnQ9ZLk1VdfTZ8+ffL888/ns88+y4wZM9K0adNstdVW+clPfpKOHTumWbNmy11jWd9Zv3790qdPnwwbNizjxo3L9OnTc8EFF+S2224rc36hUMgDDzyQPn365I033sjkyZOz/vrrZ/vtt8/JJ5+c4447LtWrV0+HDh3yl7/8Jcn//TksT40KfxIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGuozp07Fxur9ezZM1deeWVKSkrKPe/Pf/5zcd6xY8dy4//xj3/kpJNOyqRJk8rs/+ijj/LRRx/lnnvuya9+9atcf/315a41Y8aMnHPOObnvvvuycOHCMse+/PLLfPnllxkxYkRuvfXWXHHFFbnyyivLXXNlmz17ds4444zcd999ZfbPmTMnzz//fJ5//vm88MILufPOO4vxJ598ch577LEl1unfv3/69++f//3f/02vXr1SrVq1pV5z3rx5adGiRT777LOlHv/iiy/yxRdfZOjQofnDH/6QX/3qV7n22muXuV5lmTZtWk4//fQ8+uijZfbPmTMngwcPzuDBg3P33XenX79+2WyzzZa71uzZs3PmmWemT58+Wbwh4bhx4zJu3LgMHDgwN9xwQ26++eacccYZFc5z+vTp6dixYx5++OFyY7/88ssceeSReemll8rsHzNmTMaMGZN//vOf6d69ex566KEKX39RGqsBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGuMRo0a5ZxzzkmS9O7dO9OnT0+StG/fPg0bNlwifrvttkuSHHbYYdlkk03y2WefZdSoUenXr18OOuig5V7r/fffz8svv5wkqVWrVk455ZTlxr/66qu5/PLLM2fOnDRp0iRt27ZNs2bN8tlnn+WFF17I7Nmzs3Dhwtxwww2ZM2dObr311mWuNXHixBxwwAF54403ivu22mqr7LTTTllnnXUyZcqUDB06NJ988knmzp2bq666KhMmTMif/vSnZa7ZoUOH/OUvf0mSNG/ePKNGjVru/XwbnTp1yv3335/q1aunTZs22XrrrTN37ty8+OKLGTNmTJLkrrvuyhZbbJH//u//zlFHHZV//OMfS8QPHDgwo0ePTpL06dMnO+64Yy6++OKlXnPBggXFpmr16tXL9ttvn8033zyNGzdOoVDI+PHjM3z48Hz++edZsGBBrr/++syaNSu33XbbEmu1a9cuLVu2zCuvvJLhw4cnSXbdddfstttuS8RutdVWy/wcFixYkF/84hd59tlnU7169bRu3Tpbb711CoVCRowYkbfffjtJ8sEHH+TII4/MsGHDUr169aWuNWfOnBx44IEZNGhQcV+zZs2yzz77pEmTJhk9enQGDBiQuXPnZtq0aencuXMmTJiQyy+/fJn5faNQKOSkk07Kk08+mZKSkrRq1SotW7ZM9erV88EHH5RpPjd9+vTsv//+eeutt4r7fvCDH2SvvfZK/fr18/7772fw4MEZOHBgjjzyyGy++eblXn9xGqsBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGuMddddN127dk2SPPXUU8XGatdcc01KS0uXeV716tXTsWPHXHPNNUmSHj16lNtYrUePHsX5z372szRr1my58b/+9a8zd+7cnHfeebnxxhtTt27d4rFJkybljDPOyGOPPZYk+eMf/5iDDz44Bx988BLrFAqFnHzyycWmaq1atUq3bt3Spk2bJeIefPDBnHnmmZk2bVpuv/327LvvvmnXrt1y81xZhg4dmjlz5mT33XdPnz59yjQemzdvXn75y1/mjjvuSJLccMMNmTlzZv7xj3+kdevW6d27d5n4+fPn55e//GXxu7722mtzzjnnpF69ektct1q1ajnllFPSvn377LXXXqlVq9YSMQsXLszf/va3dO7cORMnTkyXLl1y7LHHLvGZXnDBBUmSq6++uthY7dBDD83VV1+9Qp/FI488kjlz5qRt27bp0aNHtthiizLH77vvvpx66qmZP39+Xn311Tz00EM54YQTlrrWJZdcUmyqVlJSkt/+9re59NJLU6PG/7Uh+/zzz9OhQ4f885//TJJceeWVad26dfbff//l5jl48ODMnz8/2223Xe67777svPPOZY5/9dVXxfnFF19cbKpWvXr1dOnSJWeffXZKSkqKMe+++26OPfbYDBkyJK+++mp5H9MSqpUfAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACw5uvYsWOqV6+eJHniiSfy5ZdfLjN23rx56d27d3G7U6dO5a4/d+7cdOzYMX/605/KNFVLvm4I99BDD5VpdHXxxRcvdZ2HH344//jHP5J83VTtpZdeWqIBWPJ1k63jjz++2KwtSa666qoUCoVyc10Z5syZk6222irPPfdcmSZpSVKzZs106dKluH/ixIm5+uqrs8022yw1vkaNGvnjH/9Y3D9t2rT87//+71KvW6tWrfTq1Sv77bffUpuqJV83XzvqqKPy5JNPFvfdfvvt3/peyzNnzpzsuuuu+ec//7lEU7UkOemkk3LeeecVt++///6lrjNq1Khic7kk+d3vfpcrrriiTFO1JNlwww3zxBNPFJ+TQqGwzOdrUfPnz8/666+fAQMGLNFULUnq1KmTJPnwww/TvXv34v4uXbrknHPOKdNULUm23377PPfcc9lggw0yd+7ccq+/OI3VAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIC1wg9+8IMceuihSb5ugrZo47TFPfnkk/niiy+SJC1atMgBBxxQ7voNGjTIzTffvMzj1atXL9Mka+TIkRk6dOgScbfccktxfscdd6RBgwbLve4BBxyQgw46KEny9ttv54033ig315XlhhtuWGa+1atXz7HHHltm3/XXX5/69esvNb5GjRo55phjituvvPLKd85vjz32yLbbbpsk6dev33deb3luv/32ZTZ6S75u9PeNZd3b3XffnQULFiRJttxyy1xyySXLXK927drp1q1bcfv111/PkCFDys3zqquuyvrrr7/cmHvuuafYsK9ly5Y5++yzlxm7wQYb5Jprrin3ukujsRoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAsNbo3LlzcX7PPfcsM65Hjx7F+emnn56SkpJy1z7iiCPSpEmT5cZsu+22adOmTXG7f//+ZY6PHz8+I0aMSJI0b948e+yxR7nXTZL999+/OH/ppZeWGtOrV68UCoUUCoWMGjWqQuuuiDp16uSnP/3pcmN23HHHMvGHH374cuN/+MMfFucff/xxhfJ4880306tXr1xzzTW56KKLcu6555YZ06ZNS5J8+eWXGTNmTIXWXFGlpaXZfffdlxuz7bbbpm7dusVcZs6cuUTM888/X5yfcsopqVGjxnLXbNWqVXbZZZelnr8sxx13XLkxiz6nJ510Url/Hk444YTlNpVbluXfHQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADA98ghhxySTTfdNGPGjMm7776bwYMHL9G8bMyYMXn22WeTJNWrV8+pp55aobUXbZhWXtyQIUOSJK+99lqZY4MHDy7OFyxYkHPPPbdCa7777rvF+cpqFlaerbfeOjVr1lxuzLrrrlucb7PNNisUP3Xq1GXGLVy4MN27d8/NN9+cjz76qIIZf93QbNNNN61wfEUt2hBuWapVq5Z11lkns2fPTvL1/dWvX794vFAo5I033ihu77nnnhW69l577ZVXX301yZLP1+JKS0vTtGnT5cYsnkfr1q3LzaFhw4Zp2bJluddfnMZqAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMBao1q1aunYsWN+85vfJEl69OixRGO1e++9NwsXLkySHHroodl4440rtPZmm222wnETJkwoc2zs2LHF+aeffpo77rijQmsuavLkySt8TmVo0qRJuTE1avxf66zGjRuvUPy8efOWGjNv3rwcffTRefLJJ8tPcjHTp09f4XMqoiKfRZIyjeXmzp1b5ti0adPK7GvevHmF1mzRokVxvvjztbj111+/3PWmTp1aJo8Vec5XtLFatRWKBgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABYw51++umpXr16kuThhx8u01xr4cKF6dmzZ3G7U6dOFV63fv36Kxy3eGOvKVOmVPh6yzJ//vzvvMa3UVJSslLjl+Xaa68tNlUrKSnJMccck759++add97J5MmTM2fOnBQKheLYZ599iud+00CvslXGvS3+bFTG87W4unXrlrvejBkzymzXq1dvhfOoqBrlhwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAfH9ssskmOfzww/PEE09k5syZeeCBB3LGGWckSfr165dPPvmkGHfooYdWeN2ZM2eucFzDhg3LHGvQoEFxvt9++6V///4Vvv7aaO7cufnjH/9Y3O7Vq1fat2+/3HOmTZu2stOqFIs/G5XxfH0biz6TSTJr1qwVzqOiqq3wGQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAa7jOnTsX5z169Fjq/NRTT0316tUrvObo0aMrFPdN47YkadasWZljG2ywQXH++eefV/jaa6tXXnkl06dPT5K0bNmy3KZqScW/p6rWqFGj1KpVq7i96HOzPKNGjSrOF3++vo3GjRuXyePbPOcVpbEasFxXX311SkpKUlJSkquvvrqq0wEAAAAAAFglBgwYUKyRtG3bttLWbdu2bXHdAQMGVNq6AAAAAAAAUJU6dOhQrIP16tWrqtMBAAAAAAD43lhZ7zkBAAAAAMD3TUlJybc+9+CDD07z5s2TJMOHD8/IkSMzYcKEPPnkk8W1Tz/99BVac8iQISsct/POO5c51rp16+L8X//6VyZOnLhCOaxtxo4dW5y3bNmy3PiRI0dW6DP9Ls9WZSkpKclOO+1U3B48eHCFzhs0aFBxvssuu1RKHq1atSpuV+Q5nzZtWt55550VvpbGagAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADAGqlOnTrF+bx581bo3GrVqqVTp07F7T//+c/p3bt35s6dmyQ58MADU1paukJrPvnkk5kyZcpyY957770MHTq0uL3ffvuVOV5aWpoddtghSVIoFHLXXXetUA5rm0UboM2cObPc+K5du1Zo3e/ybFWmRZ+P3r17Z8GCBcuNf+uttzJ8+PClnl9Zedx///0pFArLje/bt2/xz9KK0FhtMaWlpSkpKUlJSUlGjRpV1ekAAAAAAACwFlKzAgAAAAAAWHXUZqregAEDit9B27ZtqzqdSvPNPS36UiYAAAAAAEBFqGEBAAAAAACsmKZNmxbnn3322Qqff9ppp6VGjRpJkvvuuy/du3cvHlu06VpFzZgxIxdffPEyjy9YsCDnnntucbtly5Zp06bNEnGXXnppcX7ttdeWacRWns8//7zCsd8HW2yxRXH+4osvZvLkycuM7devX3r06FGhdb/rs1VZzjjjjFSr9nW7sQ8++CC33HLLMmPnzp2bs88+u7i98847p3Xr1pWSx+mnn158X+rtt99Ot27dlhk7fvz4XH311d/qOhqrAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABrpB133LE4f+SRR1b4/I022ig//elPkySTJ0/OBx98kCRZf/3187Of/WyF16tVq1Z69OiRCy64ILNnzy5zbPLkyTnuuOPSv3//4r4bb7xxqeuccMIJ+clPfpIk+eqrr7Lffvvl1ltvzcyZM5caP2/evPzjH//IiSeemJ122mmZ+XXo0CElJSUpKSlJaWnpCt7d6qlVq1bZdNNNkyTTpk3LkUcemU8++aRMTKFQyL333puf/exnWbhwYerXr1/uuos+W88++2ymTp1auYlXUGlpac4777zi9mWXXZYbb7wx8+fPLxM3fvz4tGvXLoMGDUqSlJSU5Oabb660PLbccst07NixuH3hhRemW7duKRQKZeL+9a9/5cADD8z48eNTq1atFb5Oje+cKQAAAAAAAAAVMmDAgKpOAQAAAAAAAAAAAAAAAAAAAAAAAL5Xjj766Nx1111Jkm7dumXEiBHZeeedyzTOOu644/KjH/1omWt07tw5f/vb38rsO+WUU1KzZs0Vzue6667L5Zdfnj/96U/p3bt39t1336y33noZO3ZsXnjhhcyaNasYe/755+fQQw9d6jrVqlXLAw88kIMPPjivvPJKZs+enYsuuihXXnllWrdunU033TS1a9fO1KlT89FHH+Xtt98uNnJr2rTpCue9JqtWrVp+//vfp3379km+/t23bbfdNnvssUdatGiR6dOn5+WXX87YsWOTJGeffXbeeeedvPjii8tdd9ddd81mm22W0aNHZ9y4cdl2221z4IEHZr311ku1atWSJFtssUXOOuuslXuDSW644YaMGDEigwYNSqFQyKWXXpo//vGP2XvvvdOkSZOMHj06AwYMyJw5c4rn/O53v8t+++1XqXnccsstGTx4cN55553Mnz8/55xzTm644YbstddeqVevXj744IMMGjQoCxcuTOvWrbP55punb9++K3QNjdUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgDXS/vvvn5NPPjl9+vRJkrzyyit55ZVXysS0bNlyuY3VDjrooLRo0SIff/xxcV/Hjh2/VT677LJLHnvssZx00kmZMmXKEg3bkq8bgV100UW58cYbl7tWkyZNMnDgwFx22WXp1q1b5syZk1mzZuX5559f5jnVqlXLXnvt9a1yX5OdfPLJGTVqVH7zm9+kUCjkq6++WurndP755+eWW27JgQceWO6a1apVy5133pkjjzwyc+bMyeeff158zr6xzz77rJLGanXq1Em/fv3SuXPn3HfffSkUChk/fnweeeSRJWIbNmyYm2++OZ07d670PBo1apTnn38+7dq1y+DBg5MkY8aMyQMPPFAmbs8998wjjzySyy67bIWvUa1SMgUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAqAK9e/fOgw8+mCOOOCKbbbZZ6tatu0Lnl5SUpF27dsXtvffeO1tvvfW3zuewww7L22+/nauuuiqtWrXKuuuumzp16qRFixY5/fTTM2zYsNx0000pKSkpd63atWvn1ltvzccff5wbb7wxBx98cDbddNPUq1cvNWvWzLrrrptWrVrlhBNOSPfu3TN69Og8/vjj3zr3NdmVV16ZYcOG5eSTT07z5s1Tq1atNG7cONttt106d+6coUOHpkuXLqlZs2aF1zz00EPz2muv5eyzz86OO+6YRo0apVq1qmn9Vbdu3fTu3TvDhw/P+eefn5YtW2adddZJzZo1s+GGG+bHP/5xrrvuunz44YcrpanaN9Zff/28/PLLue+++3LwwQdngw02SK1atbLJJpvkwAMPTK9evTJgwIBstNFG32p9jdWSjBo1KiUlJSkpKcknn3xS3N+iRYvi/kVHr169knz9l883+377299W+Hq9e/cunrftttsucbxt27bF4wMGDEiSjBs3Lr/97W+z8847Z7311kvdunWzxRZb5Iwzzshrr722wvc8cuTIXHrppdl1112LD1WzZs2y66675rLLLsuoUaNWeM3K9vrrr+eCCy7IrrvumqZNm6ZmzZqpU6dONthgg+y66645/fTT85e//CWTJ09e6vmlpaXFz7Ei99OrV69ifIcOHZYas+izUlpaWtz/0ksvpX379tlqq61Sr169rLvuutltt91y/fXXZ+rUqRW63/nz5+eBBx7I0UcfnS233DINGzZMjRo10qBBg7Ro0SL7779/Lr300gwYMCALFy5c4vwBAwYUc2vbtm2Frrm0Z60iZs+ene7du2fffffNJptskjp16uQHP/hBfv7zny+1wykAAAAAAFA+NavVs2a1uKeeeipHHXVUNt9889StWzfNmjXL3nvvndtvvz1z5swp9/yK1GeuvvrqYszVV1+dJFm4cGEefPDBHHroodl0001Tu3btrL/++jnggAPSo0ePLFiwoBLvEgAAAAAAvv/UZlaP2sw3dZF99923uO/FF19c6ndQ3guiH3/8ca655prstdde2XjjjVO7du2su+66+eEPf5gLL7wwI0eOXOa5w4cPT61atYrX6dGjR7m5X3LJJcX4ddddN2PGjElS9h2fRS3rnpb3Ts+rr76aCy+8MD/84Q/TtGnT1K5dOxtvvHH22WefXH/99ZkwYUK5eS7rnaN+/fqlffv22WabbdKoUaOUlJTkwgsvXOL8QqGQvn375pBDDslGG22UOnXqZLPNNstPfvKT3H///epUAAAAAABQidSwVo8a1je+a50lSb744ovcdNNN2X///bPpppumTp06ZX60c9CgQavmZgAAAAAAYC1y7LHH5oknnsgnn3ySWbNmpVAoFMey+tEs6rnnnivOO3Xq9J3z2WSTTXLNNdfk9ddfz8SJEzN79ux89NFH6dGjR370ox+t8HobbbRRfvWrX+WZZ57J6NGjM3PmzMydOzcTJ07M66+/nvvvvz+dOnXKJptsstx1evXqVfxcKqsm07Zt2+KaFemDszLjd9111/Tu3TujRo3KnDlzMmXKlLz77ru56667svvuuxfjBgwYUFyzvH4/22+/fe6444689dZbmTp1ahYsWLDMfDp06FA89k1drzyjRo0qnrNoX6Rl2WWXXdKlS5eMHDkykyZNyty5czNu3LgMHDgwl112WZo1a1buGiv6HSyupKQkJ554Yp555pl8/vnnmTNnTj799NM8++yzOeWUU1KjRo0kZZ+3ivw5TJIaK5wNRZ07d87TTz+dJOnZs2euvPLKCnVw/POf/1ycd+zYsdz4f/zjHznppJMyadKkMvs/+uijfPTRR7nnnnvyq1/9Ktdff325a82YMSPnnHNO7rvvviWac3355Zf58ssvM2LEiNx666254oorcuWVV5a7ZmVbsGBBzjvvvNx1110pFApljs2fPz9ffPFFvvjii4wYMSL33ntvTjzxxNx3332rPM9v8vnlL3+Zrl27ltk/e/bsDB8+PMOHD8/tt9+eBx54IPvss88y1/nPf/6Tn//853nnnXeWODZz5szMnDkzo0aNyvPPP58bb7wx/fr1ywEHHFDp91MR//73v3PUUUct8TLpZ599ls8++yxPPPFEfvKTn+Shhx5Ko0aNqiRHAAAAAABYm6hZrRozZszIqaeemkcffbTM/q+++iovvfRSXnrppXTt2jV//etfs8MOO1TqtceNG5fjjz8+L774Ypn9EyZMSP/+/dO/f//ce++9efrpp9OkSZNKvTYAAAAAAFCW2szqZ/78+bn00ktz++23Z+7cuWWOzZ07N5MnT87IkSNz++2356yzzsptt91WfCHvG7vuumuuvfbaXHLJJUmSCy64IHvttddSf0A0+foF2Ztvvrm4fc8992TTTTettHuaPXt2zjzzzPTp02eJ96vGjRtXfMHxhhtuyM0335wzzjijwmtPnz49HTt2zMMPP1xu7JdffpkjjzwyL730Upn9Y8aMyZgxY/LPf/4z3bt3z0MPPVTh6wMAAAAAAJVPDWvVWJE6S5LcdNNN+d3vfpcZM2aU2T9nzpxMmzYt7733Xrp3756jjz46PXv2TIMGDVZG2gAAAAAAwAp45ZVX8tZbbyVJ1llnnRx99NFVnBGsvTRWS9KoUaOcc845SZLevXtn+vTpSZL27dunYcOGS8Rvt912SZLDDjssm2yyST777LOMGjUq/fr1y0EHHbTca73//vt5+eWXkyS1atXKKaecstz4V199NZdffnnmzJmTJk2apG3btmnWrFk+++yzvPDCC5k9e3YWLlyYG264IXPmzMmtt966zLUmTpyYAw44IG+88UZx31ZbbZWddtop66yzTqZMmZKhQ4fmk08+ydy5c3PVVVdlwoQJ+dOf/rTMNTt06JC//OUvSZLmzZtXSgfJiy++OHfeeWdxe+ONN85uu+1W7GI4adKkvPfee/nXv/61RCF8Vbv00kuLTdW222677LLLLqlRo0ZGjhyZV199NcnXLysedthh6devX9q0abPEGtOnT8/++++f0aNHJ/m6k+JOO+2U7bbbLg0bNszs2bMzbty4vPXWW/n8889X3c0txdSpU3PIIYfkww8/TM2aNbPPPvuktLQ0U6dOzcCBAzN+/PgkyTPPPJOf/OQneeGFF1K7du0qzRkAAAAAANYUalarV81qcYs2Vdtll12y4447Zv78+Xn11Vfzr3/9K0nywQcfZL/99sugQYOy5ZZbVsp1Z8yYkZ/85Cd56623Urt27ey5555p0aJF5syZkyFDhuTDDz9MkgwZMiQdOnTI448/XinXBQAAAACA7zu1mdWjNrPbbrvlnHPOyWeffVasc2y88cZp165dhc6fM2dODj/88Dz33HPFfZtuuml23XXXrLfeepk5c2ZeffXVvPfee1m4cGHuuOOOjBkzJo8//vgSPyZ68cUXp1+/fnnuuecya9asHH/88Rk2bFhq1apVJm7ChAk5+eSTiw3POnfuXCbfTTbZpPhs3XHHHcX93+xb3CabbLLEPR144IEZNGhQcV+zZs2yzz77pEmTJhk9enQGDBiQuXPnZtq0aencuXMmTJiQyy+/vNzPq1Ao5KSTTsqTTz6ZkpKStGrVKi1btkz16tXzwQcfpFq1asXYb955+uZl4CT5wQ9+kL322iv169fP+++/n8GDB2fgwIE58sgjs/nmm5d7fQAAAAAAYPnUsFaPGtbSrEidpVAopEOHDundu3dx3/rrr5/dd989G264YebMmZORI0fm9ddfT5I8+uij+eSTTzJw4MDUqVOnUvMGAAAAAABWTJcuXYrzDh06+P/uoSoVCoXv1fj6lr695s2bF5IUkhQ+/vjjcuN/85vfFON/8YtflBv/P//zP+XG77PPPsWYWrVqFZIUzjvvvMKsWbPKxE2cOLFw1FFHFWOTFJ555pmlrrlw4cLCIYccUoxr1apVYfDgwUuN69u3b6FRo0bF2L/+9a/LvJ9TTjmlGNe8efNy7788EyZMKNSoUaOQpFC9evXCvffeW1iwYMFSY7/88svCPffcU7jxxhuXenxFv8uePXsW40855ZSlxnz88cfFmJo1axaSFJo0aVL4+9//vkTsyy+/XPjBD35QjN9iiy0KM2fOXCLuj3/8YzFmu+22K7z77rvLzPGtt94qXHzxxYVhw4YtceyFF14orrPPPvuUe7+FQtln7YUXXlhqzKLP+DfPY9u2bQtjxowpEzdv3rzCtddeW+Z5vPTSSyuUR3n+/5/rKv/7xTAMwzAMwzAMwzAMwzAMw1g7x3etP30balZVW7MqFMrWXr65/80226wwaNCgJWKffPLJQuPGjYvxe+yxxzJrXCtan6ldu3YhSeHoo48ujBs3rkzcggULCjfddFOZz35p+a1sajmGYRiGYRiGYRiGYRiGYRhGVY7KqOWozaxetZmKvhdTKBQKnTt3Lp7XokWLwtNPP11YuHDhEnH//Oc/CxtttFEx9tZbb13qeuPGjSs0a9asGHfhhRcuEXPYYYeVeRdo8e9oUYt+TxV1wQUXFM8pKSkp/O53vyvMmzdviTwPPvjgMnHPPffcUtdb9LP95r2t7bbbrvDqq68uETt79uzifNHPtnr16oWuXbsu8dm+8847hZYtW5Z5bpMUevbsWeH7XRY1KMMwDMMwDMMwDMMwDMMwDKMqR1W8T7Q4NazVq4a1InWW66+/vnjeeuutV7j//vsL8+fPX+Kc4cOHF7bZZpti7Pnnn19uHitSS1uZ1HIMwzAMwzAMwzAMwzAMwzCMyh6rQ33m2WefLZSUlBR70lSkRrO4ivzOGFAx1cJ30rFjx1SvXj1J8sQTT+TLL79cZuy8efPSu3fv4nanTp3KXX/u3Lnp2LFj/vSnP6Vu3bpljq277rp56KGHsv/++xf3XXzxxUtd5+GHH84//vGPJEmrVq3y0ksvpU2bNkvElZSU5Pjjj89jjz1W3HfVVVd98x+RlW7o0KGZP39+kuTYY4/NqaeemmrVlv6YNm3aNKeddlp+9atfrZLcFjdv3rxUq1YtTz75ZA4//PAlju+5557p169f8Xv78MMPc9dddy0R9/LLLxfnXbp0yXbbbbfMa+6444656aabsttuu1XCHay4uXPn5oc//GGefvrp/OAHPyhzrEaNGrn88stz9dVXF/f94Q9/yPjx41dxlgAAAAAAsPZRs1q55s6dm3r16qVfv37ZY489ljj+05/+NH/7299SUlKSJBk8eHAef/zxSrn2nDlz8vOf/zwPP/xwNtxwwzLHqlWrlosvvjjt2rUr7rv//vsr5boAAAAAAMCyqc2sHoYNG5a77747SbLppptm0KBBOeSQQ4o1m0UddNBB6devX2rXrp0kuf766zN79uwl4jbccMP07NmzuN2lS5fiZ5gkt912W/73f/83SVK7du08+OCDS3xH38WoUaPStWvX4vbvfve7XHHFFalRo8YSeT7xxBPF77NQKCzzOVjU/Pnzs/7662fAgAHZeeedlzhep06dJF+/B9W9e/fi/i5duuScc85Z4rPdfvvt89xzz2WDDTbI3LlzK36jAAAAAABApVPDWrkqWmf59NNPc9VVVyVJGjZsmIEDB+aEE04ofjeL+tGPfpQXXngh66+/fpLkrrvuytixY1fiXQAAAAAAAIv6z3/+kwsvvDDnnXdeDjnkkPzkJz8p1iLOOuuslJaWVm2CsJbTWO07+sEPfpBDDz00ydcF30WLxIt78skn88UXXyRJWrRokQMOOKDc9Rs0aJCbb755mcerV69e5mW5kSNHZujQoUvE3XLLLcX5HXfckQYNGiz3ugcccEAOOuigJMnbb7+dN954o9xcK8O0adOK82+KvKuz448/Pj/+8Y+XeXzbbbfNBRdcUNxe9IXCb6xp9/yHP/xhuS98XnbZZdlss82SfP0PJxZ9mRQAAAAAAFg51KxWvl/+8pfZeuutl3l83333zbHHHlvcXlpd6NuoUaNGbr/99qX+AOg3OnbsWJy/8sorlXJdAAAAAABg2dRmVg+L3t8NN9yQjTbaaLnxO+ywQ0455ZQkyYQJE/LMM88sNe6www7L+eefn+TrhmUdOnTI+PHj88Ybb+TSSy8txt1888354Q9/+F1vo4y77747CxYsSJJsueWWueSSS5YZW7t27XTr1q24/frrr2fIkCHlXuOqq64q9x2me+65p/gicMuWLXP22WcvM3aDDTbINddcU+51AQAAAACAlUsNa+WrSJ2lS5cumTdvXpLk0ksvzXbbbbfc+I022igXXnhhkq+/t4ceeqhScgUAAAAAAMr36aefpkuXLunatWueeeaZLFy4MEnSqlWr/P73v6/i7ACN1SpB586di/N77rlnmXE9evQozk8//fTl/vjhN4444og0adJkuTHbbrtt2rRpU9zu379/mePjx4/PiBEjkiTNmzfPHnvsUe51k2T//fcvzl966aWlxvTq1SuFQiGFQiGjRo2q0LrL07x58+L80Ucfzbhx477zmivTNy9TLs+pp55anL///vsZO3ZsmeOL3nPXrl2LLx2ujn7wgx+UeS6WplatWjnxxBOL24s/jwAAAAAAwMqhZlV5NaulWdG60MCBAzN//vzvfN299torP/jBD5Ybs/POOxfnK+v+AQAAAACAstRmVm5tpjzz588vNkarVatWjjrqqAqdV5H7S5Kbbrop//Vf/5Uk+eKLL3LSSSfl+OOPz5w5c5Ikhx9+eM4777xvm/4yPf/888X5Kaeckho1aiw3vlWrVtlll12Wev6yHHfcceXGLPo8nXTSSeU+tyeccEJq1apV7roAAAAAAMDKpYa1cmtYFamzPPXUU8X5CSecUKF1K1rDAgAAAAAAVp6aNWtmyy23zKWXXpqBAwemQYMGVZ0SrPWW/2YVFXLIIYdk0003zZgxY/Luu+9m8ODBSxRqx4wZk2effTZJUr169TI/qrg8ixaHy4sbMmRIkuS1114rc2zw4MHF+YIFC3LuuedWaM133323OB8zZkyFzvmudt9995SWlmbUqFH59NNPs8MOO6R9+/b56U9/mjZt2qRevXqrJI+KKCkpSevWrcuN23rrrbPuuutm0qRJSZJXX301G2+8cfH4cccdV/wHBj169MiIESPSvn37HHTQQdl+++0r9I8NVpXdd9+9Qvks+twu/jwCAAAAAAArh5rVytO0adNstdVW5ca1bt06JSUlKRQKmT17dv71r39lxx13/E7X/uEPf1ih/L4xderU73Q9AAAAAACgYtRmqtbIkSMzY8aMJEmdOnVy0UUXVei8Tz/9tDhf3v3Vrl07Dz74YHbZZZfMmjUrzz33XPHYRhttlJ49e37LzJetUCjkjTfeKG7vueeeFTpvr732yquvvpqk/Pd4SktLy9SWKpJHRd6fatiwYVq2bOk9IgAAAAAAqGJqWCtPReoskyZNynvvvZfk69+pu+WWWyq09qLvA63ONToAAAAAAPi+adu2bQqFQqWuOWDAgEpdD9ZmGqtVgmrVqqVjx475zW9+k+Tr5liLF5HvvffeLFy4MEly6KGHlmmstTybbbbZCsdNmDChzLGxY8cW559++mnuuOOOCq25qMmTJ6/wOd9GjRo1cv/99+fQQw/N1KlTM3ny5HTp0iVdunRJjRo1stNOO2XvvffOT37yk+y7776pXr36KslraZo0aZKGDRtWKHazzTYrNlZb/PvZf//9c/HFF+fmm29OkrzxxhvFlw/XWWed7Lnnntlnn33Srl27bLHFFpV3A9/Ct3keJ0+enAULFlTpdwUAAAAAAGsDNauVp6L336hRozRu3DhTpkxJsuRn8G00adKk3JiaNWsW5/PmzfvO1wQAAAAAAMqnNlO1Fr2/adOmrZT723bbbdOlS5d06tSpuK+kpCS9e/fOeuutt8LXK8+0adMyd+7c4nbz5s0rdF6LFi2K8/LqU+uvv365602dOrVMHivyPGqsBgAAAAAAVUsNa+WpSJ1l3LhxxXmhUFij7g8AAAAAAABWN9WqOoHvi9NPP73YOOrhhx/O9OnTi8cWLlyYnj17FrcXfZmuPPXr11/huEWvnaT4w43fxfz587/zGhW1xx57ZOTIkTnjjDPKNC6bP39+hg8fnj/84Q858MAD06JFizKf66pW0e9m8djFv58kuemmm/L0009n7733TklJSXH/5MmT89RTT+Xiiy/OlltumYMPPjjvvvvud0v8O/g2z2OhUMiMGTNWVkoAAAAAAMAi1KxWjsqsC62oRWtHAAAAAADA6kVtpuqsqvvbYIMNymxvtNFGadOmzXe+9tIs/h1WxnOwuLp165a73uLvAdWrV2+F8wAAAAAAAKqOGtbKUZE6y5p8fwAAAAAAALC60VitkmyyySY5/PDDkyQzZ87MAw88UDzWr1+/fPLJJ8W4Qw89tMLrzpw5c4XjFm1GliQNGjQozvfbb78UCoUVHr169apwzpVh0003zd13350JEyZkwIAB+e1vf5uDDjqozL2MGTMmp512Wn75y19WyjUXLly4QvEV/W4Wj138+/nGIYcckhdffDFjx47Ngw8+mHPPPTetWrUq82OZzz77bHbdddcMGTJkhXJdlpV1z4vGlZSUlPneAAAAAACAlUfNauWo7LoQAAAAAADw/aA2U3UWvb/NN9/8W93fgAEDlnuNsWPH5rTTTlti33nnnbcybmmJ77AynoNvY/H3gGbNmrXCeQAAAAAAAFVHDavqLHp/NWrU+Fb3N2rUqKq7AQAAAAAAAFiNaKxWiTp37lyc9+jRY6nzU089NdWrV6/wmqNHj65Q3DdF6iRp1qxZmWMbbLBBcf75559X+Nqrg9q1a2efffbJlVdemX/+85+ZNGlSnn766ey1117FmNtuuy0jRoxY4tyaNWsW5/Pnzy/3WlOmTFmh3KZMmZJp06ZVKHbR73Hx72dxG264YY499tjcfvvtef311/P555/nj3/8Y5o2bZrk65cRzzjjjCXOW9H7TVb8nr/N87jOOuus0DMPAAAAAAB8N2pWla+i9z916tRMnTq1uF1eXQgAAAAAAFjzqc1UjUXv74svvsjChQsrdf1CoZD27dvnyy+/TJJstdVWxe+wZ8+eeeihhyr1eknSqFGj1KpVq7i96Pe7PIv+uGZl1KcaN25cJo9v8zwCAAAAAABVSw2raix6f/Pnz8+ECROqMBsAAAAAAABYs2mstpiSkpJvfe7BBx+c5s2bJ0mGDx+ekSNHZsKECXnyySeLa59++ukrtOaQIUNWOG7nnXcuc6x169bF+b/+9a9MnDhxhXJYndSsWTOHHHJInnvuubRs2bK4/5vPeFENGzYszityz2+99dYK5VIoFDJs2LBy495///1MmjSpuL3491Oe9ddfPxdeeGGeeOKJ4r633347H330UZm4Fb3fOXPm5IMPPlihXIYOHZpCoVBu3PKeRwAAAAAAoHxqVquXiRMn5t///ne5cYvWUurWrZvttttuZacGAAAAAABUArWZqrei30GrVq1Sp06dJMmMGTPy5ptvVmo+N910U/r3758kadCgQf73f/83v/71r4vHzzzzzEpvJFZSUpKddtqpuD148OAKnTdo0KDifJdddqmUPFq1alXcrsjzOG3atLzzzjvf+doAAAAAAMD/UcNa82y44YbFzz1JXn755SrMBgAAAAAAANZsGqst5psX6pJk3rx5K3RutWrV0qlTp+L2n//85/Tu3Ttz585Nkhx44IEpLS1doTWffPLJTJkyZbkx7733XoYOHVrc3m+//cocLy0tzQ477JDk62Zgd9111wrlsDqqXbt2DjrooOL2+PHjl4hp0aJFcf7aa68td72ZM2cutTlbef7yl7+UG9OzZ8/ifOutt84mm2yywtdJkj333DPrrrtucXvxey4tLS3+I4j//Oc/mTZt2nLX++tf/5o5c+asUA6fffZZ8aXQZZk7d27uv//+4vbizyMAAAAAAFA+NavVz4rWhX784x+nRo0aKzMlAAAAAACgkqjNVL0V/Q7q1KmT/fffv7jdrVu3Sstl+PDhufLKK4vbf/rTn7LVVlvlN7/5Tdq0aZMkmTJlSk488cQsWLCg3Dy/UZH7WvR77N27d7nrv/XWWxk+fPhSz/8uFl3n/vvvT6FQWG583759i888AAAAAABQOdSw1kyHH354cV6ZNSwAAAAAAABY22istpimTZsW55999tkKn3/aaacVfyDxvvvuS/fu3YvHFi0wV9SMGTNy8cUXL/P4ggULcu655xa3W7ZsWXxBb1GXXnppcX7ttdeWKTqX5/PPP69w7Hc1ceLECseOHj26OF9//fWXON66devivGfPnst9ge+SSy7J5MmTK3ztbzzwwAN56aWXlnn8vffeS5cuXYrbZ5xxxhIxX375ZYWuNXny5MyYMaO4vfg9N2rUKNttt12SZP78+endu/cy15o0aVKZZ2JFXHTRRZk9e/Yyj19//fUZM2ZMkqRmzZo59dRTv9V1AAAAAABgbaZmtaRVWbNamj/+8Y/54IMPlnn8hRdeyMMPP1zcXlpdCAAAAAAAWD2pzSxpVddmvs13sOj93XPPPfnb3/5W4et9+eWXS21aNmPGjBx//PHFHyc99thji+/GVK9ePX379k3jxo2TJIMGDcrvfve75V5nRe/rjDPOSLVqX7/y98EHH+SWW25ZZuzcuXNz9tlnF7d33nnnMu9TfRenn356SkpKkiRvv/32cn/0c/z48bn66qsr5boAAAAAAMD/UcNaUlW/X1QRF110UfFzf+6558r8Dl15ZsyYkZkzZ66s1AAAAAAAgLVEaWlpSkpKUlJSklGjRlV1OpWiQ4cOxXvq1atXVafDKqKx2mJ23HHH4vyRRx5Z4fM32mij/PSnP03ydSOsb35ccf3118/PfvazFV6vVq1a6dGjRy644IIlmllNnjw5xx13XPr371/cd+ONNy51nRNOOCE/+clPkiRfffVV9ttvv9x6663LLJ7Omzcv//jHP3LiiSdmp512WmZ+i/7FUVpauoJ3t6Tbb789//Vf/5WuXbtm7NixS42ZPXt2br311jz22GPFfYcddtgScccdd1zxRcLhw4fnl7/8ZfGlxm9MnTo1Z511Vu64447Url17hXKtWbNmFi5cmJ/97Gd56qmnljg+ePDgHHTQQfnqq6+SJC1atMiZZ565RFybNm1y3HHH5e9//3vmzJmz1Gt98sknOfbYYzN37twkyTbbbJMttthiibgTTzyxOP/1r39d5tn4xuuvv5699947o0ePXuF7rlWrVt56660cdthh+fTTT8scmz9/fq677rpcc801xX2//OUvs+GGG67QNQAAAAAAADWrb1RVzWpxtWrVyqxZs3LggQdmyJAhSxz/+9//nnbt2qVQKCT5uv7Trl27Ss8DAAAAAABYOdRmvlaVtZnNN9889evXT/L1ezTDhw8v95y99tqr+KOfhUIhv/jFL3L55Zdn0qRJS41fuHBhBg4cmDPPPDObbbbZEp9tkpxzzjn58MMPkyTNmzfP3XffXeZ4aWlp7rzzzuL2tddem5dffnmZOa7os1VaWprzzjuvuH3ZZZflxhtvzPz588vEjR8/Pu3atcugQYOSJCUlJbn55pvLXb+ittxyy3Ts2LG4feGFF6Zbt27Fetg3/vWvf+XAAw/M+PHjU6tWrUq7PgAAAAAAoIb1jdXl/aKKatGiRZnfYrvwwgvTuXPn5TbHGzFiRC6++OJsuumm+fjjj1dFmgAAAAAAALDaq1HVCaxujj766Nx1111Jkm7dumXEiBHZeeediy/mJV837PrRj360zDU6d+6cv/3tb2X2nXLKKalZs+YK53Pdddfl8ssvz5/+9Kf07t07++67b9Zbb72MHTs2L7zwQmbNmlWMPf/883PooYcudZ1q1arlgQceyMEHH5xXXnkls2fPzkUXXZQrr7wyrVu3zqabbpratWtn6tSp+eijj/L2228Xi9ZNmzZd4by/i7feeivnnXdezjvvvGy++ebZcccds95662XhwoUZN25chg4dmilTphTj27dvn9atWy+xzqabbprzzz8/t912W5KkS5cueeSRR7LPPvukQYMG+fTTTzNw4MDMnDkzm2yySc4555z8+te/rnCeG2+8cY4++uj84Q9/yE9/+tNsv/32+dGPfpTq1atn5MiRGTFiRDG2bt266dOnT5nn6Bvz5s3LQw89lIceeii1a9fOjjvumC233DKNGzfO1KlTM2rUqAwbNqz44mH16tXTtWvXpeZ03nnn5e67787o0aMzffr0HHDAAdl9993TsmXLzJ8/P2+//XZee+21FAqFnHbaafnwww/z4osvVviezz777Pz973/PCy+8kM033zxt27ZNaWlppk6dmhdffDHjx48vxu62225lCvsAAAAAAEDFqVlVfc1qUW3atEmzZs3y6KOPZo899siPfvSj7LjjjlmwYEFGjBiRd999txi73nrrpVevXqlWrVqV5QsAAAAAAKwYtZmqr81Uq1Yt7dq1y3333Zck2WeffXLwwQenefPmqVHj/16Bu+WWW8qcd8cdd2TcuHF56qmnsmDBglx33XW55ZZbsuuuu2bzzTdPvXr1Mm3atIwePTpvvfVWpk+fvswc+vbtm969eyf5+v2d+++/P40bN14i7vjjj88zzzyT3r17Z8GCBTnxxBPz5ptvpkmTJkvEHn300XnmmWeSJJdcckmefvrp7LDDDqlTp04x5qyzzsoWW2xR3L7hhhsyYsSIDBo0KIVCIZdeemn++Mc/Zu+9906TJk0yevToDBgwIHPmzCme87vf/S777bff8j7iFXbLLbdk8ODBeeeddzJ//vycc845ueGGG7LXXnulXr16+eCDDzJo0KAsXLgwrVu3zuabb56+fftWag4AAAAAALA2U8Oq+hrWt/XrX/86o0ePzt13350k6d69e+655560atUq2267bRo2bJjp06dn7NixefPNNzNp0qQqzhgAAAAAAABWPxqrLWb//ffPySefnD59+iRJXnnllbzyyitlYlq2bLncIvJBBx2UFi1a5OOPPy7u69ix47fKZ5dddsljjz2Wk046KVOmTFmiOJ18XSC+6KKLcuONNy53rSZNmmTgwIG57LLL0q1bt8yZMyezZs3K888/v8xzqlWrlr322utb5f5tNGzYsMz2Rx99lI8++mipsdWrV8+55567xAuRi7rxxhszZsyYPPbYY0mSsWPH5oEHHigTs8MOO+TRRx/N0KFDVzjfG264IbNmzcqdd96Zd999t8yPZn5jww03TN++fbPnnnsudY1F73nOnDkZMWJEmaZsi9poo43So0ePHHDAActc66mnnsrBBx+ccePGJUmGDRuWYcOGlYk788wz06VLlxx00EEVus9vNG7cOE8//XSOPPLIvPPOO+nXr99S4w466KA8/PDDZV7yBAAAAAAAKk7Nasm1V2XNaml69uyZhQsX5q9//esy6zlbbrll/vrXv2brrbeuggwBAAAAAIBvS21mybWrojZz3XXX5fnnn8/YsWMze/bsPP7440vELP4eUc2aNfPkk0/mhhtuyPXXX5/p06dn7ty5GTRoUAYNGrTMa+2+++5lfjD0448/zllnnVXcvuKKK5b5LlDydUO3QYMG5cMPP8zo0aNzxhln5OGHH14i7pRTTsn999+fF154IYVCIQMGDMiAAQPKxBx++OFlGqvVqVMn/fr1S+fOnXPfffelUChk/PjxeeSRR5ZYv2HDhrn55pvTuXPnZeb6bTVq1CjPP/982rVrl8GDBydJxowZs8S7WXvuuWceeeSRXHbZZZWeAwAAAAAArM3UsJZcu6rfL1oRd911V370ox/l17/+dSZMmJAFCxbk1VdfzauvvrrMc1q2bJl11113FWYJAAAAAAAAq69qVZ3A6qh379558MEHc8QRR2SzzTZL3bp1V+j8kpKStGvXrri99957f6cfTzzssMPy9ttv56qrrkqrVq2y7rrrpk6dOmnRokVOP/30DBs2LDfddFNKSkrKXat27dq59dZb8/HHH+fGG2/MwQcfnE033TT16tVLzZo1s+6666ZVq1Y54YQT0r1794wePXqpLyGuLBdddFFGjRqVP//5zznttNOy6667Zr311kvNmjVTq1atrL/++vnxj3+cyy+/PO+8805uu+221Kix7P6AtWrVyqOPPprHHnsshx9+eDbYYIPUrFkzG2ywQfbee+9069Ytw4cPz7bbbvut8q1Ro0a6deuW559/PieeeGK22GKL1K1bN02aNMkuu+ySa6+9Nv/617+y7777LnONN954I4MGDcrvf//7/OxnP8s222yThg0bpnr16qlfv35atGiRn//85/nzn/+cDz74IIceeuhyc9pxxx3zr3/9K9dcc0122WWXNG7cuPi8nHjiiRkwYEDuvPPO1KpV61vd89Zbb51XXnkl3bp1y957750NN9wwtWrVykYbbZQjjjgijz32WP75z3+mcePG32p9AAAAAADga2pWVVezWpoGDRrksccey9/+9rf8/Oc/T/PmzVO7du00bdo0e+65Z7p06ZKRI0dmxx13rNI8AQAAAACAb0dtpuprM5tuumnefPPNXHPNNWnTpk2aNm263PeGvlFSUpLLLrsso0ePzp/+9Kf87Gc/S2lpaRo2bJgaNWqkSZMm2WGHHXL00UenS5cu+fe//52hQ4emdu3aSZL58+fnhBNOyLRp05J83SjsyiuvXO41GzRokAceeKDYnO2RRx7JPffcs0RcjRo18uyzz6Z79+45+OCDs/HGG6dOnTrl3lPdunXTu3fvDB8+POeff35atmyZddZZJzVr1syGG26YH//4x7nuuuvy4YcfrpSmat9Yf/318/LLL+e+++7LwQcfnA022CC1atXKJptskgMPPDC9evXKgAEDstFGG620HAAAAAAAYG2mhlX1NazvomPHjvnkk0/So0ePHHvssdlyyy3TuHHjVK9ePY0aNcrWW2+dI444IjfeeGPefPPNjBw5MhtvvHFVpw0AAAAAAACrhZJCoVDVOVSqkpKSwupwT//1X/+Vt956K0nSp0+fnHTSSRU+t23btnnxxReTJC+88ELatm27MlLkWxg1alRatGiRJGnevHlGjRpVtQmtJUpKSlIoFMr/VxIAAAAAALASrC71p29DzYpVQS0HAAAAAICqtDrVctRmoPKoQQEAAAAAUJVWpxrUd6WGxcqglgMAAAAAQGX7PtVnqsKi/XQqomfPnunQocNSj82ZMyd9+/bN008/nVdffTUTJkzIggULsv7662e33XbLkUcemWOOOSbVqlVb4tx58+Zlzz33zPDhw5Mke++9d1544YWlxn7jjTfeSOvWrTNnzpwkSdeuXXPOOeckSUpLS/PJJ59U6J5OOeWU9OrVq0Kx38bw4cOz2267JUkaNmyYcePGpX79+uWeVygUsuWWW+ajjz5KkvTt2zfHH3/8Ssvz+2TZTw3f2iuvvFIsIK+zzjo5+uijqzgjAAAAAAAA1jZqVgAAAAAAAKuO2gwAAAAAAACrGzUsAAAAAAAAVsTTTz+drbfeOqeddloeffTRfPzxx5kxY0Zmz56dTz75JI888kiOP/74tGrVKv/+97+XOL9mzZp54IEH0rBhwyTJwIED8/vf/36Z15s5c2aOP/74YlO1I444othUbXWz6667ZqeddkqSTJ8+PQ899FCFznv++eeLTdWaNm2aI488cqXl+H1To6oT+D7q0qVLcd6hQ4fUqVOnCrMBAAAAAABgbaRmBQAAAAAAsOqozQAAAAAAALC6UcMCAAAAAABYOzRq1KjYkKx3796ZPn16kqR9+/bFJmeL2m677ZbY171795x11llZuHBhkqRevXpp3bp1mjdvnmrVquWjjz7KoEGDMnfu3IwcOTKtW7fO0KFDs9VWW5VZZ4sttsgdd9yR9u3bJ0muueaa7L///tljjz2WuOYFF1yQ9957L0my8cYb59577y1z/JRTTsnEiRPTv3//Ytz++++fbbfddom1WrduvcS+Dh065C9/+UuSpHnz5hk1atQSMSuic+fOOfPMM5MkPXr0yGmnnVbuOT169CjOTz755NSuXfs75bA20VitkvXr1y8PPPBAkq+7IJ5//vlVnBEAAAAAAABrGzUrAAAAAACAVUdtBgAAAAAAgNWNGhYAAAAAAMDaY911103Xrl2TJE899VSxsdo111yT0tLScs9/+eWXc84552ThwoWpUaNGrrjiivzyl79Mo0aNysSNHTs2Z555Zv7+979n0qRJOeaYY/Lqq6+mWrVqZeJOPvnkPPvss7nvvvuyYMGCnHjiiXnjjTfSuHHjYszDDz+ce+65J0lSrVq13HfffWnatGmZda655pokXzdI+6ax2kknnZQOHTpU/MOpRCeccEL+53/+JzNmzMiQIUPy7rvvZvvtt19m/KRJk/K3v/2tuN2pU6dVkeb3hsZq39F//vOfdO3aNQsWLMh//vOfPPvssykUCkmSs846q0J/OQAAAAAAAMB3oWYFAAAAAACw6qjNAAAAAAAAsLpRwwIAAAAAAODbKBQK6dy5c+bPn58k6dmzZ0466aSlxm688cb529/+lgMOOCADBgzIG2+8kb/+9a85+uijl4jt1q1bBg8enI8++iijRo1K586d8+CDDyZJPvnkk5xxxhnF2EsuuST77rvvSri7ytWwYcOccMIJ6d69e5KkR48eufXWW5cZ36dPn8yZMydJssceeyy3CRtL0ljtO/r000/TpUuXJfa3atUqv//976sgIwAAAAAAANY2alYAAAAAAACrjtoMAAAAAAAAqxs1LAAAAAAAAL6Np59+Ou+++26S5MADD1xmU7VvVK9ePdddd1322GOPJF83D1taY7WGDRvmgQceyF577ZV58+bloYceysEHH5z27dvnhBNOyNSpU5Mku+++e377299W8l2tPJ07dy42VuvTp09uuOGG1KpVa6mx99xzT3HeqVOnVZLf90m1qk7g+6RmzZrZcsstc+mll2bgwIFp0KBBVacEAAAAAADAWkbNCgAAAAAAYNVRmwEAAAAAAGB1o4YFAAAAAABART311FPF+QknnFChc1q3bp369esnSV566aVlxu2222753e9+V9w+77zz0qlTpwwePDjJ183X+vbtmxo1anyb1MvVq1evFAqFFAqFjBo1qlLW3HnnnfOjH/0oSfLll1/m8ccfX2rcsGHDMnLkyCRJo0aNcswxx1TK9dcmK+epWIu0bds2hUKhUtccMGBApa5H5SktLa307xsAAAAAAOC7UrMCAAAAAABYddRmAAAAAAAAWN2oYQEAAAAAAPBtfNPkLEn+/ve/Z8SIESt0/uTJkzNz5sxio7XFXXzxxenXr1/69++fmTNnpmfPnsVjd955ZzbffPNvl3gVOuOMM4qfU48ePZbaNK1Hjx7F+Yknnph69eqtsvy+LzRWAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABYi4wdO7Y4/+tf//qt1pg8efIyG6tVq1Ytffr0yQ9/+MN8+eWXxf0nn3xyTjzxxG91vap2/PHH56KLLsr06dPTv3//fPLJJ2nevHnx+MyZM/PQQw8Vtzt27FgVaa7xqlV1AgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAArDpTpkz5zmvMnz9/ucfr16+fBg0alNnXrl2773zdqtKgQYNiU7iFCxfmnnvuKXP8wQcfzPTp05MkO++8c3beeedVnuP3gcZqAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABrkUUbng0cODCFQmGFR2lp6XKvcdZZZ2XUqFFl9p199tn54osvVsIdrRqdO3cuznv27JmFCxcWt3v06FGcd+rUaZXm9X2isRoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwFpkgw02KM4///zzSl+/d+/e6du3b5KkRo0a2WKLLYrXOvXUUyv9eqtKq1atsttuuyVJPv300zzzzDNJknfeeSdDhw5NktSrVy8nnHBCleW4ptNYDQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABgDVZSUrJC8a1bty7OX3755UrN5T//+U/OOeec4vZvf/vbPPHEE6lbt26S5Omnn06XLl3KXWdF72lV6dy5c3Heo0ePMv+bJMcee2waNWq0yvP6vtBYDQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABgDVanTp3ifN68eeXGH3744cX5fffdl+nTp1dKHvPmzcsJJ5yQGTNmJEn23XffXHLJJdlhhx1yyy23FOMuueSSvPnmm8tda0XvaVU57rjj0rhx4yTJU089lTFjxqRPnz7F4506daqq1L4XNFaDlahv376pUaNGSkpKUlJSkl122SWTJ0+u6rQAAAAAAAAAAAAAAACASjJ79uwMGTIkt99+ezp06JCWLVuWeaeoQ4cOVZ0iAAAAAAAAAAAAAAAAa4GmTZsW55999lm58e3atcvWW2+dJJk0aVJOPfXULFiwoELXWrhwYSZMmLDUY1dccUWGDx9ezKlPnz6pVu3rVllnn312jjjiiCTJnDlzcvzxx2fWrFmVdk+rSr169XLSSScl+brh2zHHHJOJEycmSXbYYYe0adOmKtNb42ms9j1WWlpafPlu1KhRVZ1OpejQoUPxnnr16lXV6SzXAw88kPbt25f5y/61117LgQcemClTplRdYgAAAAAAwPeeOhEAAADw/9r783Ar63p//H+uzWYGFUdSD4MhWV9QVFQyU8QJ0zySpjkwOIUTpKZpdiSt9HPKMCmOpqkoOOF4ciDTEEXEBBwxNUXdqOGAMiggCOz1+8Of67Bj2sCGDXs/Htf1vq77vtd7eK11c/zjPM8+LwAASORG68L555+fli1bZs8998zAgQNz00035R//+Ee1/4AUAAAAAACgJsiFAAAAAAAASJLOnTuXru+8886Vzm/QoEGuvfbaNGzYMEly9913p0ePHnnmmWeWu2bq1KkZPHhwOnbsmJEjRy71+ejRo/Pb3/62dH/ddddlm222qTLnhhtuyNZbb50keeWVV3L22WdX6zv9+c9/zueff77S75VUzZvatWtXrTWr6oc//GHp+u9//3vp+pRTTlkr59Un5bVdANRFt99+e3r37l36A8gOHTqkoqIiixYtyjPPPJMDDjggjzzySDbZZJPaLRQAAAAAAAAAAAAAAABYbR988IEmagAAAAAAAAAAAAAAAKwXjjzyyPzxj39Mklx11VWZNGlSdtlllzRv3rw05wc/+EG6du1aut9nn31y3XXX5eSTT87ChQszduzYdO3aNR06dMjOO++cTTfdNPPnz8/06dMzefLkvPPOO8s9/6OPPkqfPn1SWVmZJOnfv38OP/zwpeZtttlmGT58eA444IAUi8Vce+21Oeigg/K9731vqbkHH3xwmjVrlnnz5uX555/PDjvskO7du6dVq1YpFApJkt122y1HH330av1ma2LHHXdMt27dqjRVa9y4cXr37r3Oa6lrNFaDGnbHHXfk+OOPL/1BZM+ePXPnnXfmiSeeyPe///3MnTs3kyZNyoEHHphHHnkkG2+8cS1XDAAAAAAAAAAAAAAAAKyJ9u3bp2vXrqVx9dVX56677qrtsgAAAAAAAAAAAAAAAKhH9ttvv/Tu3TsjRoxIkkyYMCETJkyoMqdTp05VGqslSZ8+fdKhQ4f0798/L730UpJkypQpmTJlynLPat26dTp27Fjl2Yknnphp06YlSb7+9a/nd7/73QprPe+88/Kb3/wmSXLKKadk9913z7bbbltl3kYbbZQrr7wyp556aiorK/PWW2/lrbfeqjKnb9++tdJYLfmiedySjdW+973vZdNNN62VWuqSstouAOqSO+64I8cdd1ypqdqJJ56Y+++/Py1atMjBBx+cxx9/PK1bt06STJw4MQceeGBmz55dmyUDAAAAAACwllx88cUpFAopFAp57LHHarscAAAAAAAA1oKf/exn+fjjj/Pmm2/mjjvuyE9+8pP06NEjzZs3r+3SAAAAAAAAAAAAAAAAqIeGDx+e22+/PYcddljatGmTpk2bVmvdnnvumRdffDEPPPBA+vfvn86dO2fzzTdPeXl5mjdvnjZt2mT//ffPBRdckNGjR+fdd9/NgQceWFo/dOjQ3H///UmSxo0b57bbblvp2b/61a9KTd5mzJiR448/PpWVlUvNO+WUU/Lkk0+mX79+2WGHHdKiRYsUCoXq/iRrVa9evarUcsopp9RiNXVHeW0XAHXFXXfdleOOOy6LFi1K8sX/o8yf//znVebsuuuueeqpp3LwwQfn1VdfzYQJE3LggQfm4YcfzsYbb1wbZQMAAAAAAAAAAAAAAACrafvtt6/tEgAAAAAAAAAAAAAAAKCKo48+OkcfffQqrysUCjnkkENyyCGHrPLaM888M2eeeeYqrWnYsGEmTpxYrbndunVLt27dqr33jTfemBtvvHGV6lkdo0ePTrFYTJJ06NAh3bt3X+tn1gdltV0ANauioiKFQiGFQiFTp04tPW/fvn3p+ZJjRf/Du2DBggwbNizf//73s91226Vly5Zp1qxZ2rVrl6OOOiq33377Mjs0JsnChQuz++67l87ZZ599ljv3S88//3yaNGlSWvM///M/pc/atWuXQqGQm266qfTshBNOWOZ36tevX/V+rBp0991355hjjsmiRYtSXl6eYcOGLdVU7Uvt2rXL+PHj8+1vfztJMmHChBx00EH55JNP1mXJAAAAAABAHSMnqp2cCAAAAAAAYH0jN5IbAQAAAAAA9YNcSC4EAAAAAABA7bvuuutK1yeffHIKhUItVlN3aKzGMo0aNSodO3bMiSeemLvuuitvvfVW5syZk88++yxTp07NnXfemWOOOSZdunTJ66+/vtT6hg0b5rbbbkvLli2TJGPHjs2ll1663PPmzp2bY445JgsWLEiSHHbYYTnjjDPWzperYffcc09+8IMfZNGiRWnZsmUefPDBlYbsrVq1yiOPPJKjjjoqSfL0009rrgYAAAAAAKwX5EQAAAAAAAAkciMAAAAAAID6Qi4EAAAAAAAAq+ef//xn/vrXvyZJGjdunBNPPLGWK6o7ymu7AGrWRhttVAqGhw8fnk8//TRJ0qdPn1LYvKSvf/3rSz279tprc9ppp6WysjJJ0qxZs3Tr1i1t27ZNWVlZ3nzzzTz55JP5/PPPM3ny5HTr1i1///vfs/3221fZ56tf/Wr+53/+J3369EmSXHLJJdlvv/2y5557LnXmj370o7z66qtJkq233jo33HBDlc/79u2bjz/+OKNHjy7N22+//bLDDjsstVe3bt2WetavX7/cdNNNSZK2bdumoqJiqTmr4957783RRx+dRYsWZeutt86oUaOy0047VWtt48aNc/vtt2fbbbfNFVdckb///e/p2bNn/vrXvy7zXQEAAAAAAKyInGjd5kQAAAAAAADrK7mR3AgAAAAAAKgf5EJyIQAAAAAAAGrPggULcsYZZ5Sytj59+mSLLbao5arqjkKxWKztGmpUoVAo1rXvtLratWuXqVOnJkneeuuttGvXbqVrxo0bl3333TeLFi1KeXl5/uu//itnn312Ntpooyrzpk2bllNPPTX3339/kqRLly555plnUlZWttSevXv3zs0331yq6fnnn8/GG29c+vyOO+7I0UcfnSQpKyvL3/72t+y7777LrG/JoHrYsGHp16/fSr/Tv68TcG94CoVCisViobbrAAAAAACgftoQ8yc50bLXreuc6OKLL84ll1ySJBkzZky6d+++zs5el2Q5AAAAAADUpg0xy1mX5EbLXrcucqMlz+vbt29uvPHGtXpeXSODAgAAAACgNm2IGZRcaNnrajIXGjJkSF5//fUa2etLvXv3zh577FGje64LshwAAAAAAGrahpjPUD8NHTo0U6ZMyezZs/Poo4/m7bffTpK0aNEiL7/8cv7jP/6jliusO8pruwDWH8ViMf3798+iRYuSfBEgH3/88cucu/XWW+fee+/N/vvvn8ceeyzPP/987rnnnhx55JFLzb3qqqsyfvz4vPnmm6moqEj//v1z++23J0mmTp2aH/7wh6W5559//nLDbQAAAAAAANYOOREAAAAAAACJ3AgAAAAAAKC+kAutunvvvTePP/54je7ZtWvXDbKxGgAAAAAAQH111113LZUZFQqFXHXVVZqq1bCy2i6A9ceoUaPy8ssvJ0kOOOCA5YbbX2rQoEEuu+yy0v2IESOWOa9ly5a57bbb0rBhwyTJyJEjM2zYsCxevDjHHntsZs+enSTZY4898otf/KImvgoAAAAAAACrQE4EAAAAAABAIjcCAAAAAACoL+RCAAAAAAAAsGY22WSTHHTQQfnb3/6W3r1713Y5dY7GapQ88MADpetjjz22Wmu6deuW5s2bJ0meeOKJ5c7bfffd88tf/rJ0P2DAgJxyyikZP358ki9C8FtvvTXl5eWrU/pK3XjjjSkWiykWi6moqFgrZwAAAAAAAGyo5EQAAAAAAAAkciMAAAAAAID6Qi606h577LHSvjU1+vXrV2P1AQAAAAAAsPYtmRnNnDkzDz30UHr06FHbZdVJaydNZIP0ZdicJPfff38mTZq0SutnzpyZuXPnlgLvf3feeeflkUceyejRozN37twMGzas9NnVV1+d7bbbbvUKBwAAAAAAYI3IiVZd9+7d8/jjj1d7/r777rvSOcOGDfPHkAAAAAAAQK2SGwEAAAAAANQPciEAAAAAAABgfaaxGiXTpk0rXd9zzz2rtcfMmTOXG3CXlZVlxIgR2XHHHfPRRx+Vnvfu3TvHHXfcap0HAAAAAADAmpMTAQAAAAAAkMiNAAAAAAAA6gu5EAAAAAAAALA+01iNklmzZq3xHosWLVrh582bN0+LFi2qBNy9evVa43MBAAAAAABYfXKiVderV6906tRphXMmTJiQiRMnJkkOP/zwbLPNNiuc//Wvf73G6gMAAAAAAFgdciMAAAAAAID6QS4EAAAAAAAArM80VqOkRYsWpZB77Nix+fa3v13jZ5x22mmpqKio8uz000/Pt771rWy55ZY1fh4AAAAAAAArJydadT/60Y9WOufiiy8uNVb70Y9+lO7du6/lqgAAAAAAANaM3AgAAAAAAKB+kAutuiFDhuT111+v0T179+6dPfbYo0b3BAAAAAAAgLpAYzVKttpqq1LA/f7779f4/sOHD8+tt96aJCkvL0/btm3zxhtv5P33388JJ5yQBx98sMbPBAAAAAAAYOXkRAAAAAAAACRyIwAAAAAAgPpCLrTq7r333jz++OM1umfXrl01VgMAAAAAAIBlKKvtAlh7CoXCKs3v1q1b6XrcuHE1WsuUKVNyxhlnlO5/8Ytf5M9//nOaNm2aJBk1alSGDBmy0n1W9TsBAAAAAADUR3IiAAAAAAAAErkRAAAAAABAfSEXAgAAAAAA4EuFQqE0YEOlsVod1qRJk9L1woULVzr/0EMPLV3ffPPN+fTTT2ukjoULF+bYY4/NnDlzkiT77rtvzj///Px//9//l9/+9releeeff35eeOGFFe61qt8JAAAAAACgPpITAQAAAAAAkMiNAAAAAAAA6gu50Nr32GOPpVgs1ujo169fbX8tAAAAAACA9V6/fv2qNItr0qRJ3n777WqtnTRpkkZzGyiN1eqwzTbbrHT9r3/9a6Xze/XqlY4dOyZJZsyYkRNOOCGLFy+u1lmVlZWZPn36Mj/7r//6r0ycOLFU04gRI1JW9sU/vdNPPz2HHXZYkmTBggU55phjMm/evBr7TgAAAAAAAPWRnAgAAAAAAIBEbgQAAAAAAFBfyIUAAAAAAACoLxYsWJBLLrmktstgLdNYrQ7r3Llz6frOO+9c6fwGDRrk2muvTcOGDZMkd999d3r06JFnnnlmuWumTp2awYMHp2PHjhk5cuRSn48ePTq//e1vS/fXXXddttlmmypzbrjhhmy99dZJkldeeSVnn312tb7Tn//853z++ecr/V5J1c6R7dq1q9YaAAAAAACADZWc6P/IiQAAAAAAgPpMbvR/5EYAAAAAAEBdJhf6P3IhAAAAAACAuu+mm27Ka6+9VttlsBaV13YBrD1HHnlk/vjHPyZJrrrqqkyaNCm77LJLmjdvXprzgx/8IF27di3d77PPPrnuuuty8sknZ+HChRk7dmy6du2aDh06ZOedd86mm26a+fPnZ/r06Zk8eXLeeeed5Z7/0UcfpU+fPqmsrEyS9O/fP4cffvhS8zbbbLMMHz48BxxwQIrFYq699tocdNBB+d73vrfU3IMPPjjNmjXLvHnz8vzzz2eHHXZI9+7d06pVqxQKhSTJbrvtlqOPPnq1fjMAAAAAAIC6QE4EAAAAAABAIjdaF6ZNm5bvfOc7Sz1/++23S9f33XdfunTpstSc559/fi1WBgAAAAAA1CdyIQAAAAAAAOqTxYsX56KLLsrIkSNruxTWEo3V6rD99tsvvXv3zogRI5IkEyZMyIQJE6rM6dSpU5WAO0n69OmTDh06pH///nnppZeSJFOmTMmUKVOWe1br1q3TsWPHKs9OPPHETJs2LUny9a9/Pb/73e9WWOt5552X3/zmN0mSU045Jbvvvnu23XbbKvM22mijXHnllTn11FNTWVmZt956K2+99VaVOX379hVwAwAAAAAA9ZqcCAAAAAAAgERutC58/vnneeGFF1Y4Z+bMmZk5c+Y6qQcAAAAAAKif5EIAAAAAAADUB3vssUeefvrpJMmdd96Zn/70p+nSpUvtFsVaUVbbBbB2DR8+PLfffnsOO+ywtGnTJk2bNq3Wuj333DMvvvhiHnjggfTv3z+dO3fO5ptvnvLy8jRv3jxt2rTJ/vvvnwsuuCCjR4/Ou+++mwMPPLC0fujQobn//vuTJI0bN85tt9220rN/9atflcL2GTNm5Pjjj09lZeVS80455ZQ8+eST6devX3bYYYe0aNEihUKhuj8JAAAAAABAvSAnAgAAAAAAIJEbAQAAAAAA1BdyIQAAAAAAAOq6rl27plevXkmSYrGYCy+8sJYrYm0pFIvF2q6hRhUKhWJd+05Q3xUKhRSLRf9XDAAAAAAA1Ar5E6yYLAcAAAAAgNoky4G6SQYFAAAAAEBtkkHBislyAAAAAACoaXUln2nXrl2mTp2aJHnrrbdK98OGDcsDDzyQd955J9OnT89GG22UWbNmLbV+2rRpuf766/PXv/41U6ZMyYwZM7LRRhulTZs22W+//dK3b9906tRppXUUCv/3v8avzu+6YMGC3HrrrRk1alSeeeaZTJ8+PYsXL86WW26Z3XffPd/73vdy1FFHpaysrPo/Rg3q169fbrrppiTJGWeckdNPPz2dO3dOZWVlkuSJJ57IXnvttcy1kyZNym677Va6r87vMWvWrFx//fUZNWpUXn311Xz00Udp3rx5tt5663Tv3j3HHXdcvvnNb650n2X9e3j//ffzpz/9Kffee2+mTp2aefPmpXXr1tl7770zYMCAdO3adaX7Lmn8+PG54447MmbMmEybNi2zZ89Oq1atsv322+eggw7Kqaeemi222GKV9lxfaKwGrPeE5wAAAAAA1Cb5E6yYLAcAAAAAgNoky4G6SQYFAAAAAEBtkkHBislyAAAAAACoaXUln/n3RlqPPvpoBg4cmLlz51aZt/HGGy/VWG3w4MEZNGhQ5s2bt9z9y8rKcuqpp+bKK69Mw4YNlztvVRqrjRo1KqeddlrefvvtFc7r3Llz7r777my//fYrnLc2/HtjtaFDh6Zv374ZPnx4kmTvvffO448/vsy1q9pY7ZZbbsnAgQMzY8aMFc478sgjc8MNN6Rly5bLnfPv/x5eeuml9O3bd7l7FwqFXHbZZbngggtWeHaSfPDBBznppJPy4IMPrnBeixYt8rvf/S4nn3zySvdc35TXdgEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHXB3XffnXPPPTdJstVWW2WvvfbKpptumg8//DDPPfdclbnnnntuBg8eXLpv0qRJunfvnm233TYzZszIY489lhkzZqSysjJXXXVV3nzzzTzwwANp0KDBGtV47bXX5rTTTktlZWWSpFmzZunWrVvatm2bsrKyvPnmm3nyySfz+eefZ/LkyenWrVv+/ve/L7e5WkVFRdq3b1+6HzZsWPr167dGNS7PxRdfnNtuuy0LFy7M2LFj89BDD6Vnz55rtOfQoUMzYMCA0n15eXm+/e1vZ7vttsucOXMyduzYvPfee0mSu+66K2+88UYef/zxFTZX+9KYMWPywx/+MIsWLUrr1q3zrW99K5tttlnee++9jBkzJnPmzEmxWMxPf/rT7LDDDjn88MOXu9cbb7yR/fffPxUVFaVnnTt3TqdOndKyZct8/PHHGTduXD744IPMmTMnp5xySj755JOcc845q/3b1AaN1QAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAasAFF1yQhg0b5ne/+11OO+20lJWVlT6bP39+6fp///d/qzRVO+KII3LNNddks802Kz1bsGBBBg0alN/85jdJkoceeii/+MUvcskll6x2fePGjcsZZ5yRysrKlJeX57/+679y9tlnZ6ONNqoyb9q0aTn11FNz//33Z8aMGTnqqKPyzDPPVPk+taF9+/Y55ZRTctVVVyVJfvazn+Wggw5KoVBYrf0mTpyYs88+u3S/9957Z/jw4Wnbtm3pWWVlZYYMGZJzzz03lZWVee6553LmmWfmpptuWun+p512Who0aJChQ4fm5JNPrtIUb/r06TnyyCMzduzYJMn555+/3MZq8+fPz5FHHllqqrbvvvtm6NCh+cY3vlFl3qJFi3LVVVflvPPOy+eff57zzz8/3/72t7PbbrtV9yepdbX7LwwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAoI5YtGhR/vSnP+WMM85YqglZkyZNknzRqOu8884rPT/ooIMycuTIKk3VkqRx48b59a9/nXPOOaf07Ne//nU+/PDD1aqtWCymf//+WbRoUZJk2LBh+fnPf75UU7Uk2XrrrXPvvfeme/fuSZLnn38+99xzz2qdW9MuuuiiNGvWLEny7LPP5q677lrtvc4///zS77HTTjvlL3/5S5WmaklSVlaWs88+O1deeWXp2fDhw/PCCy+sdP8FCxbktttuS//+/as0VUuSLbbYIiNHjix9l9deey0TJ05c5j5DhgzJ888/nyTp2bNnHn744aWaqiVJeXl5Bg4cWGo8t2jRojVqxFcbNFYDAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKgBXbt2Td++fVc455FHHsmUKVOSfNG06+qrr16q6daSfvWrX6V169ZJvmjUdf31169WbaNGjcrLL7+cJDnggANy/PHHr3B+gwYNctlll5XuR4wYsVrn1rTWrVtnwIABpftBgwZl8eLFq7zPP//5z4wZM6Z0P2TIkFKTs2U588wz06VLl9L9l83LVqRnz57p1avXcj9v3bp1DjnkkNL9hAkTlpqzaNGiDBkyJMkX7+Saa65JeXn5Cs898cQT87WvfS3JF+/9448/Xmmt6wuN1QAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAasAxxxyz0jmPPvpo6XrfffdN+/btVzi/adOmOe6445a5flU88MADpetjjz22Wmu6deuW5s2bJ0meeOKJZc5p165disViafTr12+16lsV559/fjbeeOMkyauvvprhw4ev8h5L/o5f/epXs88++6xwfqFQyEknnbTM9cvzgx/8YKVzdtlll9J1RUXFUp8/88wzee+995Ike+21V9q0abPSPQuFQnr06JEkKRaLefLJJ1e6Zn2x4pZxAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABUy6677rrSOc8991zp+lvf+la19t1rr70yePDgJMmzzz67WrWNHz++dH3//fdn0qRJq7R+5syZmTt3bqnRWm1q1apVzj333Fx00UVJkksuuSTHHXdcGjVqVO09Vvc9fGnKlCn55JNPstFGGy13/o477rjSPTfbbLPS9ezZs5f6fMn39uGHH+bMM8+sVq0TJ04sXb/zzjvVWrM+0FgNAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKAGbLnlliudM3369NJ127Ztq7Vv+/btS9czZ87M4sWL06BBg1Wqbdq0aaXre+65Z5XWLnn2+tBYLUnOOuus/OEPf8iHH36YqVOn5pprrsmAAQOqvX5N30OSfPTRRytsrLbJJpusdM+GDRuWrj///POlPl/yvb3yyit55ZVXqlFpVTNnzlzlNbWlrLYLAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACoC5o2bbrSOZ9++mnpurpNypacVywWM2fOnFWubdasWau85t8tWrRojfeoKS1atMiFF15Yur/00kszd+7caq9f0/fw73ssS6FQqHY9y1PX3tvKaKwGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALCOtGzZsnRd3UZgS84rFApp0aLFKp+75JqxY8emWCyu8mjXrt0qn7s2nXrqqWnTpk2S5IMPPsiQIUOqvXZN38O/77G2LPneTjzxxNV6bxdffPFar7OmaKwGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALCObLHFFqXrqVOnVmtNRUVF6bpVq1Zp0KDBKp+71VZbla7ff//9VV6/PmrcuHEGDRpUur/88ssza9asaq1d0/fw73usLXXxva2Ixmr1TKFQKA0AAAAAAADqDzkRAAAAAABA/SQnAgAAAAAAqJ/kRAAAAAAAAOu3XXbZpXQ9fvz4aq158sknS9e77rrrap3brVu30vW4ceNWa4/1Ub9+/dKxY8ckyaxZs/Kb3/ymWuvW9D1sv/32admy5SpUunqWfG9PPfVUFi9evNbPrE0aq7FB69evX5XQvkmTJnn77bertXbSpEkCfwAAAAAAgA2UnAgAAAAAAKB+khMBAAAAAADUT3IiAAAAAACgrunRo0fpesyYMamoqFjh/Pnz5+eWW25Z5vpVceihh5aub7755nz66aertc/6pkGDBvnFL35Ruh8yZEg++OCDla5b8necMmXKSpvNFYvFXH/99ctcvzbtueeeadWqVZJk5syZuf3229fJubVFYzXqlAULFuSSSy6p7TIAAAAAAABYx+REAAAAAAAA9ZOcCAAAAAAAoH6SEwEAAAAAABu6/fffPx06dEiSLF68OGeeeWYqKyuXO3/QoEGZNm1akqRx48Y56aSTVuvcXr16pWPHjkmSGTNm5IQTTsjixYurtbaysjLTp09frXPXhaOOOipdunRJksybNy+XXnrpStd07NixSnO0gQMHZv78+cudf/XVV+fZZ58t3Z9++umrX/AqaNSoUc4555zS/VlnnZU33nij2uvff//9tVHWWqOxGnXOTTfdlNdee622ywAAAAAAAGAdkxMBAAAAAADUT3IiAAAAAACA+klOBAAAAAAAbMjKyspy+eWXl+4ffPDBHHvssZkxY0aVeQsWLMiFF15YZe4FF1yQLbbYYrXObdCgQa699to0bNgwSXL33XenR48eeeaZZ5a7ZurUqRk8eHA6duyYkSNHLnNORUVFCoVCadx4442rVd+aKBQKVZqpPfXUU9Va99///d8pLy9Pkjz33HM55JBD8u6771aZU1lZmd///vcZOHBg6Vnfvn2z44471kDl1XPOOedkp512SpJ89NFH2X333TNs2LAsXLhwmfPnzZuXu+++O9/97ndz2GGHrbM6a0J5bRcANW3x4sW56KKLlvsfUQAAAAAAAOomOREAAAAAAED9JCcCAAAAAACon+REAAAAAADAhu7www/Pj3/84wwePDhJMnLkyNx///3p3r17ttlmm8ycOTOPPvpolWZrPXv2zEUXXbRG5+6zzz657rrrcvLJJ2fhwoUZO3Zsunbtmg4dOmTnnXfOpptumvnz52f69OmZPHly3nnnnTU6b136zne+k7322ivjxo2r9prddtstV1xxRalp2qOPPprtttsue++9d9q3b585c+Zk7NixmTZtWmnNzjvvnD/84Q81Xv+KNGvWLPfdd1/233//vP7665kxY0ZOPPHEnH322fnmN7+Zr3zlKykvL8+sWbPy2muv5eWXXy41Xdt1113Xaa1rSmM16ow99tgjTz/9dJLkzjvvzE9/+tN06dKldosCAAAAAABgrZMTAQAAAAAA1E9yIgAAAAAAgPpJTgQAAAAAANQlv/3tb9O6desMGjQon332WebNm5dRo0YtNa+srCz9+/fPkCFD0qBBgzU+t0+fPunQoUP69++fl156KUkyZcqUTJkyZblrWrdunY4dO67x2WvbpZdemn322WeV1gwYMCCtWrXKwIEDM3PmzCxcuDCjR49e5twjjjgiw4YNS8uWLWui3FXSpk2bTJw4MQMGDMitt96axYsXZ/bs2XnooYeWu6Zhw4bp1q3bOqxyzZXVdgFQU7p27ZpevXolSYrFYi688MJarggAAAAAAIB1QU4EAAAAAABQP8mJAAAAAAAA6ic5EQAAAAAAUNece+65ef3113PJJZfkm9/8Zrbccss0bNgwm266aXbaaaecc845eeGFF3LVVVelYcOGNXbunnvumRdffDEPPPBA+vfvn86dO2fzzTdPeXl5mjdvnjZt2mT//ffPBRdckNGjR+fdd9/NgQceWGPnry177713evbsucrrjj/++Lzxxhu5/PLLs+++++YrX/lKGjVqlE022STf+MY3ctppp+XJJ5/MXXfdVStN1b608cYbZ/jw4Xn11Vdz8cUXp3v37tlmm23SpEmTNGrUKJtvvnl22223nHDCCRk+fHimTZuWoUOH1lq9q0NjtXWkXbt2KRQKKRQKqaioSJJMnTo1F198cbp27ZqtttoqZWVl2WSTTZa5ftq0afnlL3+ZvfbaK61bty79A9xll11y3nnnlbo21rQFCxZk2LBh+f73v5/tttsuLVu2TLNmzdKuXbscddRRuf3221NZWblWzl4dv/rVr1JW9sU/67/85S8ZN25cje4/a9asDB48OPvtt1+22WabNG7cOJtuumk6deqUM888M0899VS19lnWv4f3338/v/zlL7PLLrtks802S9OmTdO+ffv07ds3kyZNWuVax48fn7POOis77bRTtthiizRq1ChbbbVV9tprr/zyl7/M9OnTV3lPAAAAAABg1cmJ1g050f+REwEAAAAAwPpBTrRuyIn+j5wIAAAAAADWD3KidUNO9H/kRAAAAAAAsH6oqKhIsVhMsVhMu3btVnn9Nttsk0GDBmX8+PH54IMP8vnnn+fjjz/O888/n8GDB6dTp07V2ufLGorFYrXmFwqFHHLIIfnjH/+YF198MdOnT8/ChQszZ86cTJ06NY888kj+3//7f+nRo0caNGiw3H3atWtX5ex+/fpV6/yVufHGG0t7rkpzsL/85S9V6qnu79GqVauce+65efTRRzNt2rQsWLAgM2fOzD/+8Y9cddVV2XPPPau1z6r+e+jXr19p/o033litMzp06JCf//znGTNmTN5999189tlnWbBgQaZPn54JEybkhhtuSO/evbP55ptXa7/1SaG6L2xDUSgUiuvjd2rXrl2mTp2aJHnrrbfy6KOPZuDAgZk7d26VeRtvvHFmzZpV5dngwYMzaNCgzJs3b7n7l5WV5dRTT82VV165wq6QhUKhdL2y32nUqFE57bTT8vbbb69wXufOnXP33Xdn++23X+G8taFfv3656aabkiRnnHFGhg4dmr59+2b48OFJvuj++Pjjjy9z7aRJk7LbbruV7lf2e9xyyy0ZOHBgZsyYscJ5Rx55ZG644YYVdoX8938PL730Uvr27bvcvQuFQi677LJccMEFKzw7ST744IOcdNJJefDBB1c4r0WLFvnd736Xk08+eaV71rZCoZBisVhY+UwAAAAAAKh5a5o/yYnWDjnR0morJ5LlAAAAAABQm9bXvyVK5ERri5xoaf6eCAAAAAAAapa/J1o+OVFV9TUnkuUAAAAAAFDT1ue/EQJqR3ltF1Af3X333Tn33HOTJFtttVX22muvbLrppvnwww/z3HPPVZl77rnnZvDgwaX7Jk2apHv37tl2220zY8aMPPbYY5kxY0YqKytz1VVX5c0338wDDzywwu6M1XHttdfmtNNOS2VlZZKkWbNm6datW9q2bZuysrK8+eabefLJJ/P5559n8uTJ6datW/7+978vN+SuqKhI+/btS/fDhg2rsa6Q/+7iiy/ObbfdloULF2bs2LF56KGH0rNnzzXac+jQoRkwYEDpvry8PN/+9rez3XbbZc6cORk7dmzee++9JMldd92VN954I48//vgKQ+4vjRkzJj/84Q+zaNGitG7dOt/61rey2Wab5b333suYMWMyZ86cFIvF/PSnP80OO+yQww8/fLl7vfHGG9l///1TUVFReta5c+d06tQpLVu2zMcff5xx48blgw8+yJw5c3LKKafkk08+yTnnnLPavw0AAAAAAFB9ciI50ZLkRAAAAAAAUH/IieRES5ITAQAAAABA/SEnkhMtSU4EAAAAAAAA1VQsFuvU+OIrrX/atm1bTFJMUiwvLy82bNiwOHTo0OLixYurzPvss89K1/fee29pTZLiEUccUfzoo4+qzJ8/f37xJz/5SZV5gwYNWm4dS85bnieeeKJYXl5eqvXiiy8uzp49e6l5//rXv4rf/e53S/t16dJlqe/zpbfeeqvK2cOGDVvu+auib9++pT3POOOM0vPTTz+99HyXXXYpVlZWLrV24sSJ1fo9JkyYUPo9khT33nvvYkVFRZU5ixcvLl5xxRXFsrKy0rw+ffosd88l/z00bty42Lhx4+If//jH4qJFi6rM+/DDD4t77713aW7Hjh2Xu+dnn31W7NKlS2nuvvvuW/zHP/6x1LyFCxcWhwwZUmzUqFHpHU+YMGG5+64P/v/vp9b/+2IYhmEYhmEYhmEYhmEYhmHUz7Gm+ZOc6C050RLqYk4kyzEMwzAMwzAMwzAMwzAMwzBqc6yvf0tULMqJ5ERV1cWcaG2SQRmGYRiGYRiGYRiGYRiGYRi1Ofw9kZxITrRishzDMAzDMAzDMAzDMAzDMAyjpsf6/DdCQO0oC+vcokWL8qc//SlnnHFGysqqvoImTZokSSorK3PeeeeVnh900EEZOXJkNttssyrzGzdunF//+tc555xzSs9+/etf58MPP1yt2orFYvr3759FixYlSYYNG5af//zn2WijjZaau/XWW+fee+9N9+7dkyTPP/987rnnntU6t6ZddNFFadasWZLk2WefzV133bXae51//vml32OnnXbKX/7yl7Rt27bKnLKyspx99tm58sorS8+GDx+eF154YaX7L1iwILfddlv69++fBg0aVPlsiy22yMiRI0vf5bXXXsvEiROXuc+QIUPy/PPPJ0l69uyZhx9+ON/4xjeWmldeXp6BAwfmqquuSvLFv8dLLrlkpXUCAAAAAABrTk609smJ5EQAAAAAALA+khOtfXIiOREAAAAAAKyP5ERrn5xITgQAAAAAAEDdo7FaLejatWv69u27wjmPPPJIpkyZkuSL8PTqq69eKvxc0q9+9au0bt06yReB6fXXX79atY0aNSovv/xykuSAAw7I8ccfv8L5DRo0yGWXXVa6HzFixGqdW9Nat26dAQMGlO4HDRqUxYsXr/I+//znPzNmzJjS/ZAhQ0ph87KceeaZ6dKlS+n+yxB5RXr27JlevXot9/PWrVvnkEMOKd1PmDBhqTmLFi3KkCFDknzxTq655pqUl5ev8NwTTzwxX/va15J88d4//vjjldYKAAAAAACsGTnR2icnkhMBAAAAAMD6SE609smJ5EQAAAAAALA+khOtfXIiOREAAAAAAAB1j8ZqteCYY45Z6ZxHH320dL3vvvumffv2K5zftGnTHHfccctcvyoeeOCB0vWxxx5brTXdunVL8+bNkyRPPPHEMue0a9cuxWKxNPr167da9a2K888/PxtvvHGS5NVXX83w4cNXeY8lf8evfvWr2WeffVY4v1Ao5KSTTlrm+uX5wQ9+sNI5u+yyS+m6oqJiqc+feeaZvPfee0mSvfbaK23atFnpnoVCIT169EiSFIvFPPnkkytdAwAAAAAArBk5kZxoSXIiAAAAAACoP+REcqIlyYkAAAAAAKD+kBPJiZYkJwIAAAAAAIDqKa/tAuqjXXfddaVznnvuudL1t771rWrtu9dee2Xw4MFJkmeffXa1ahs/fnzp+v7778+kSZNWaf3MmTMzd+7cUuBdm1q1apVzzz03F110UZLkkksuyXHHHZdGjRpVe4/VfQ9fmjJlSj755JNstNFGy52/4447rnTPzTbbrHQ9e/bspT5f8r19+OGHOfPMM6tV68SJE0vX77zzTrXWAAAAAAAAq09OtG7IiVZOTgQAAAAAAOuWnGjdkBOtnJwIAAAAAADWLTnRuiEnWjk5EQAAAAAAABsSjdVqwZZbbrnSOdOnTy9dt23btlr7tm/fvnQ9c+bMLF68OA0aNFil2qZNm1a6vueee1Zp7ZJnrw8Bd5KcddZZ+cMf/pAPP/wwU6dOzTXXXJMBAwZUe/2avock+eijj1YYcG+yySYr3bNhw4al688//3ypz5d8b6+88kpeeeWValRa1cyZM1d5DQAAAAAAsGrkROuOnKj65EQAAAAAALD2yYnWHTlR9cmJAAAAAABg7ZMTrTtyouqTEwEAAAAAALC+K6vtAuqjpk2brnTOp59+Wrqubli85LxisZg5c+ascm2zZs1a5TX/btGiRWu8R01p0aJFLrzwwtL9pZdemrlz51Z7/Zq+h3/fY1kKhUK161meuvbeAAAAAACgrpITrTtyoupbn94bAAAAAADUVXKidUdOVH3r03sDAAAAAIC6Sk607siJqm99em8AAAAAAACwLBqrradatmxZuq5uILvkvEKhkBYtWqzyuUuuGTt2bIrF4iqPdu3arfK5a9Opp56aNm3aJEk++OCDDBkypNpr1/Q9/Psea8uS7+3EE09crfd28cUXr/U6AQAAAACAlZMT1Rw5kZwIAAAAAAA2JHKimiMnkhMBAAAAAMCGRE5Uc+REciIAAAAAAADqBo3V1lNbbLFF6Xrq1KnVWlNRUVG6btWqVRo0aLDK52611Val6/fff3+V16+PGjdunEGDBpXuL7/88syaNataa9f0Pfz7HmtLXXxvAAAAAABQX8mJao6cCAAAAAAA2JDIiWqOnAgAAAAAANiQyIlqjpwIAAAAAAAA6gaN1dZTu+yyS+l6/Pjx1Vrz5JNPlq533XXX1Tq3W7dupetx48at1h7ro379+qVjx45JklmzZuU3v/lNtdat6XvYfvvt07Jly1WodPUs+d6eeuqpLF68eK2fCQAAAAAArB1yopolJwIAAAAAADYUcqKaJScCAAAAAAA2FHKimiUnAgAAAAAAgA2fxmrrqR49epSux4wZk4qKihXOnz9/fm655ZZlrl8Vhx56aOn65ptvzqeffrpa+6xvGjRokF/84hel+yFDhuSDDz5Y6bolf8cpU6asNPQvFou5/vrrl7l+bdpzzz3TqlWrJMnMmTNz++23r5NzAQAAAACAmicnqllyIgAAAAAAYEMhJ6pZciIAAAAAAGBDISeqWXIiAAAAAAAAasrHH3+chx9+OJdddlmOOOKItG3bNoVCoTRuvPHG2i6xztJYbT21//77p0OHDkmSxYsX58wzz0xlZeVy5w8aNCjTpk1LkjRu3DgnnXTSap3bq1evdOzYMUkyY8aMnHDCCVm8eHG11lZWVmb69Omrde66cNRRR6VLly5Jknnz5uXSSy9d6ZqOHTtWCakHDhyY+fPnL3f+1VdfnWeffbZ0f/rpp69+waugUaNGOeecc0r3Z511Vt54441qr3///ffXRlkAAAAAAMBqkBPVPDnR8smJAAAAAABg/SEnqnlyouWTEwEAAAAAwPpDTlTz5ETLJycCAAAAAAConj333DObb755DjrooPzsZz/LPffck7fffru2y6o3NFZbT5WVleXyyy8v3T/44IM59thjM2PGjCrzFixYkAsvvLDK3AsuuCBbbLHFap3boEGDXHvttWnYsGGS5O67706PHj3yzDPPLHfN1KlTM3jw4HTs2DEjR45c5pyKiopa75ZYKBSqhNpPPfVUtdb993//d8rLy5Mkzz33XA455JC8++67VeZUVlbm97//fQYOHFh61rdv3+y44441UHn1nHPOOdlpp52SJB999FF23333DBs2LAsXLlzm/Hnz5uXuu+/Od7/73Rx22GHrrE4AAAAAAGDF5EQ1T05UlZwIAAAAAADWT3KimicnqkpOBAAAAAAA6yc5Uc2TE1UlJwIAAAAAAFh106ZNq+0S6rXy2i6A5Tv88MPz4x//OIMHD06SjBw5Mvfff3+6d++ebbbZJjNnzsyjjz5aJfTu2bNnLrroojU6d5999sl1112Xk08+OQsXLszYsWPTtWvXdOjQITvvvHM23XTTzJ8/P9OnT8/kyZPzzjvvrNF569J3vvOd7LXXXhk3bly11+y222654oorSuH1o48+mu222y5777132rdvnzlz5mTs2LFV/mO288475w9/+EON178izZo1y3333Zf9998/r7/+embMmJETTzwxZ599dr75zW/mK1/5SsrLyzNr1qy89tprefnll0vh96677rpOawUAAAAAAFZMTlTz5ERyIgAAAAAA2BDIiWqenEhOBAAAAAAAGwI5Uc2TE8mJAAAAAAAA1lR5eXm+8Y1vpGvXrqVxxBFHbFC52YZKY7X13G9/+9u0bt06gwYNymeffZZ58+Zl1KhRS80rKytL//79M2TIkDRo0GCNz+3Tp086dOiQ/v3756WXXkqSTJkyJVOmTFnumtatW6djx45rfPbadumll2afffZZpTUDBgxIq1atMnDgwMycOTMLFy7M6NGjlzn3iCOOyLBhw9KyZcuaKHeVtGnTJhMnTsyAAQNy6623ZvHixZk9e3Yeeuih5a5p2LBhunXrtg6rBAAAAAAAqkNOVPPkRFXJiQAAAAAAYP0kJ6p5cqKq5EQAAAAAALB+khPVPDlRVXIiAAAAAACA6rvvvvuy/fbbp2nTplWel5WV1VJF9YvGahuAc889N8ccc0yuv/76PPTQQ3njjTcyc+bMtGzZMv/xH/+R/fbbLyeccEI6depUo+fuueeeefHFFzNq1Kjcf//9GT9+fN57773MmjUrjRs3zmabbZaOHTuma9euOeCAA7LPPvvUSLi+tu29997p2bPnCkPfZTn++ONzyCGH5Prrr8+oUaPy6quv5uOPP06zZs2y9dZbZ5999snxxx+fPffccy1VXj0bb7xxhg8fnkGDBuWWW27JY489ltdffz0ff/xxKisrs9FGG6V9+/bp1KlT9t133xx88MHZfPPNa7VmAAAAAABg2eRENUtOJCcCAAAAAIANhZyoZsmJ5EQAAAAAALChkBPVLDmRnAgAAAAAAGB17bjjjrVdQr1WKBaLtV1DjSoUCsW69p2gvisUCikWi4XargMAAAAAgPpJ/gQrJssBAAAAAKA2yXKgbpJBAQAAAABQm2RQsGKyHAAAAAAAapp8Zu3Zaaed8uKLLyZJrrnmmvzwhz+s1rrf//73+dGPfpQk2X333fP0008vNeedd97JqFGj8sQTT2Ty5Ml5++23M2fOnDRv3jxbbbVVunXrliOPPDKHHnpoCoUVRwsVFRVp3759kqRt27apqKhIkjz99NO56aab8thjj+W9997LrFmz8p//+Z/53//932r+AjWvXbt2mTp1apJk2LBh6devX63VUpeV13YBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADUnN69e+e8885Lktx8883Vbqw2YsSI0nWfPn2W+vy8887L4MGDs6yGeLNnz87s2bPz2muvZfjw4dljjz1y1113Zdttt6123QsXLsyPf/zj/OEPf6j2GuoWjdUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOqQY489Nueff34qKyszbty4VFRUpF27ditc8+qrr2bSpElJkoYNG+boo49eas4777yTYrGYQqGQDh065Gtf+1o233zzNGnSJLNnz87LL7+cF154IUny9NNP59vf/naee+65bLLJJtWq+5xzzsnQoUOTJDvssEN22WWXNG3aNG+99VbKy6u23GrXrl2mTp2aJOnbt29uvPHGap3B+k1jNQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAOmTrrbfOfvvtl0ceeSTFYjG33HJLfvazn61wzc0331y6Pvjgg7P55psvNWeXXXbJAQcckO9+97vZcsstl7nP66+/ntNOOy2jR49ORUVFLrjggvzxj39cac3vvvtuhg4dmq233jojRoxIjx49qnw+f/78le7Bhq+stgsAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKBm9enTp3S9ZNO0Zfmy+dqy1i7pJz/5SU466aTlNlVLku233z6jRo1Kp06dkiTDhw/PrFmzVlrv4sWL07Rp04wePXqppmpJ0qRJk5XuwYZPYzUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgDqmV69ead68eZLk1VdfzaRJk5Y7d9y4camoqEiSbLLJJjn00EPX6OxGjRrl+OOPT5J89tlnGTduXLXWnXHGGdlhhx3W6Gw2bOW1XQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADWrefPm+d73vpcRI0YkSUaMGJGuXbsuc+6Xc5LkqKOOSuPGjVe6//vvv5+nn346r776ambOnJm5c+emWCyWPn/11VdL188991y1mrUdc8wxK53zpS8bwVG3aKwGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFAH9e7du9Q07fbbb8/gwYNTXl61ddWCBQty5513VlmzIs8++2x++tOf5m9/+1sqKyurVcdHH3200jkNGzZM586dq7UfdVdZbRcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEDN22+//bLNNtskST788MM8/PDDS8154IEHMmvWrCTJdtttl7322mu5+916663Zfffd8/DDD1e7qVqSfPrppyud06pVqzRs2LDae1I3aawGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFAHlZWV5dhjjy3d33zzzUvNWfLZ8ccfv9y9XnvttfTr1y+LFy9OknzjG9/IFVdckaeeeirvvfde5s2bl8rKyhSLxRSLxQwbNqy0tjpN2Jo2bVqt70TdVl7bBQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAsHb07t07l19+eZLkz3/+cz799NO0bNkySTJjxoyMGjWqNHdFjdWuuOKKLFy4MEly0EEH5b777kujRo2WO/+TTz6pifKpZ8pquwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANaOzp07Z6eddkqSzJs3L/fcc0/pszvuuCOff/55kqRbt27Zfvvtl7vP3/72t9L1pZdeusKmakkyderUNSmbekpjNQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAOqxPnz6l6xEjRizzesk5yzJt2rTSdadOnVZ65mOPPbYKFcIXNFYDAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKjDjj322DRo0CBJMmbMmPzrX//Km2++mfHjxydJGjVqlKOPPnqFexQKhdL13LlzVzj3qaeeyrPPPruGVVMfaawGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFCHtW7dOvvvv3+SpLKyMrfeemtuvvnm0uff+c53summm65wj69+9aul63vvvXe58z755JOccsopa1gx9ZXGagAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAdVzv3r1L1zfffHNuueWWZX62PIcddljp+sc//nHuu+++peZMnjw5++67b/7xj3+kRYsWa1jxirVr1y6FQiGFQiH9+vVbq2ex7pTXdgEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKxdvXr1SosWLTJnzpy8+OKLpeebbrppDj300JWuP+uss3Ldddflgw8+yOzZs/Of//mf6dy5czp16pRGjRrllVdeycSJE1MsFtOmTZsMGDAg55133tr8SmvFfffdl0GDBi31fNq0aaXrQYMG5corr6zyedeuXXPdddet7fLqPI3VAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADquGbNmuWII47ITTfdVOX5UUcdlUaNGq10/eabb54HHngghx12WN57770kyeTJkzN58uQq87p06ZKRI0dm/PjxNVf8OjRjxoy88MILK5zzzjvv5J133qnybJNNNlmLVdUfZbVdAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAa1/v3r2r9Wx5unbtmsmTJ+fnP/95dt5557Rs2TKNGzdO27Ztc/DBB+fGG2/M008/nY4dO9Zk2dQjhWKxWNs11KimTZu+P3/+/K1quw6g5jRp0uSDzz77rHVt1wEAAAAAQP0kf4IVk+UAAAAAAFCbZDlQN8mgAAAAAACoTTIoWDFZDgAAAAAANa1QKBTrWg8lYM3UucZqAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMD6T2M14N+V1XYBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGqsBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC1TmM1AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACg1mmsBgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA1DqN1QAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgFqnsRoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFDrNFYDAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABqncZqAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABArdNYDQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAqHUaqwEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALVOYzUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKDWaawGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADUOo3VAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAWqexGgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAUOs0VgMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGqdxmoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAECt01gNAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACodeW1XQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAUP80adLkg0KhsFVt1wGsPwrFYrG2awAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOq5stouAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQGM1AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACg1mmsBgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA1DqN1QAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgFqnsRoAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFDrNFYDAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABqncZqAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABArdNYDQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAqHX/P9NaFuhdwYi0AAAAAElFTkSuQmCC\n" + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAADnCAYAAAC9roUQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAWAElEQVR4nO3dW3BT94HH8e9fV0u2ZRswGEPC/ZaEhBAIuW2apM10uk3KtLPdzs4+93ln9mH7ug/70vdt+7Az2Z3pdHZ220mbaTZt2iYQciOhBBICOKHcwTeChWTdjnR0zj7IdoFgY8vS/xzC7/NEIkv/3zmSf4j//38k4/s+IiJiRyToACIidxOVroiIRSpdERGLVLoiIhapdEVELFLpiohYpNIVEbFIpSsiYpFKV0TEIpWuiIhFKl0REYtUuiIiFql0RUQsUumKiFik0hURsUilKyJikUpXRMQila6IiEUqXRERi1S6IiIWqXRFRCxS6YqIWKTSFRGxSKUrImKRSldExCKVroiIRSpdERGLVLoiIhapdEVELIrZHCwST476bnWFzTFnY2KJMa/mDLTr8W0f662OJ0zne9rNOYPIGPZzNdtrM+jXVJjO0fXa/bvcasb3fXuDGeOv+dGr1saby/kfv4Dv+6Zdj2/7WG91PGE639NuzhlExrCfq9lem0G/psJ0jq7X7t/lVgv19EL57Eczf8598PINt/l1l+z+/+Tau/9tO1bLzHl8bpX8oVfI7nspNJkAnNG/cPX1n9iMdNtM4y//G4Xj+2xGAm6fK3/4t+Q/fBnfq4cik1erkD/0CuO//Fc8pxSKTL7vk93/X0z88WfUy3lrmYIU6tKtjp/DzY2TPfBznOGTeLXKzBNYHT9Lav0uYpnld+yTNdfxmViC+LJ7qZcnQ5PJr7vUrpwj1rsyNJkAIh3d+DXHaqbb5fIqBaojn4OJhiZTJN5BZvdeEoNbiCTTochkjMGvlfHrLpGEvUxBCnXpAjiXT9L14PMklq0hEu8gtW5n0JFaaq7jS617mOTgFvy6G4pM1fEz1AsTOJeOUy/lQpEJYNnf/hNepYAXQPHOlsv3PWI9A8SXrqY6djoUmQDc3BixHvvTsrNl8qoVkqvvJ735CWpfXLCeKwihL93kqq2UPnuX2tWLjb8hzxwGILF8HeUzh3Hz40RTmYBTNm+243MLE+Te/9/GL2zE7rul2TIlV26m5/G/J7n6fqLpnlBkqpfz5A7+knphgkg8aTXTXLmiqQwYqJz/mHjfYCgyAZROHSS98VGreebKZCJRKhc/pXzuCLGe5dZzBUELaW0S9KJHEBnmIwyLM2E/V1pIWxgtpImIyKwCKd1brfR+lcZrZYbr73f94lG7x52LmxujOPQOlQuf4IycmvXnpv8VtZAMQZ2nVo6/kNubOUfzHXMuN/8L18ZzNJd2vqbCxurFEdPc3CilUwdxs8N073yRyY9/jzM8RHJgMx1rH6L8lw/xvTrpzY/jZodJb34CgNzBXxFNZ8BESG3YTeHYG0TTGbxKEfw6sd4B6qUcHWsfpvTZeyRXbsTEUziXPiVbyZN5ZC/Rrr4gDrnpY3azw+Q//DXx/jVUx89RL0zgVcv4TomeJ37QtnGv/uGnRFMZTDyJm7/Ckud+yOTR1/DKk3Ss3YFz8RjJex+kevYIxRP7yezaS+Hj1yEaI5bpb2xJ8lxMLIHnFEJ/nhY7fu3KWcrnjuJmh/GqZRIrNuBcPE60q4/U+l1UR0/h5saZPPIaAB3rHsa5cIxY70o8p0g9PzbvjIvNevUPPyW1YTfOpRNgImQeecHauEG8psImkHe6sZ4B0pseI9Y3yORHr5LasJt470q6HnyeyoVjVMfOEO1aSj3/xcwTBhDrHcBzysDU9I3vAZDatAe/7hJfck/j9ql9kb7XuD3Rv47ObU/jjM7+N2i7NXvM0XQPmUe/S/W6v/07t/4NRONtHTeW6af7kReJpDIklq/Hr1cBcPNXiGX6Sd6znWiqm/TGPUS7luBcPknn9m+AaTw3nduexq+7dO98AWPm/zIL6jwtdvx4/zpSa3fgjHxONN2DV8qRXH0ftYnLxHqWkxjYRL0wQcfaHcT6VuKVcqQ27KZeuEpm13eIdHQvKOdissZ6lpMc2Ei9kCXa2Ys7edXOuAG9psImkOTRrj6KJ94itX4XzuWTxHsHqGVHyB/6DR33PEBixXp81yHWt5LiZ+/O3M+vlvDKeeL9ayh88kdqE5cAKJ08AL5PvVIgkujAzY3jla5ROXcEgOqVsxRPHCAxsDGIwwWaP+Z6KUf+w1/fmN3Mf82g2XGn95eaSBSMwasU8etuYz9lR/fUtrH81M4KQ3LVNorH/gTT/2w1hmjXUoon9uN789/yFtR5Wuz4kXiS8ulDJAc24TlFYktW4QwPkehfS+3aKPX8OCaeoHLuKG52hEi6B0yE+LJ7KRzf19R+7KafWyJE0r1Eu3oxxhDrWmpn3IBeU2ET6O6F8rmjeE6Rzi1PkvvgZXr2fG/Bj1k49idSG3YveAtTULsXWnHMt3K7Ffl2jbvYnLbP060y3CpHkOdrobsXbL2mwnSO5soZdoHM6U5Lrd0x8+dmn7Cu7d9oURo7WnHMd9K4zQo6b9DjL4ReU3eWUO5euPl2Nz9O+eyRto1nU7Or3LUvLrYjzm3Hne/trfZVOk/FE/vJHvh5uyKF7rmbz5hh+p20LdDdC37NoV6YINq9FDc3TrSrj1jPCqqjp6heOY+bvUx68xPEMsspnz1C5eKnVMfO0HHvdspnj9C59UmckVOUTr0/s3JaPHmAxPL1uLlxevZ8r/Hkei75w7/FGEP3zoWt1AZ97NNKpw/Rs+yeUOW69vYvGqvzmx4j1rUk8DwQzvPUed8zuG0smWYyFYfeoT61gJbe+hSx7oXN67Y6k+/Vmfzo/6Yue6/Rcc8DLc0TJoHuXnCGh4ikuvGcEunNj1P6/P3GRPzAJhL9a274ZQIaH7TiufhuY9VzenfC9Sun8d4B/Op1n6Dke/hene4d32psLQtYM8dePPEWzqXj5A+9suCV5nbmmlmdb3HhNpsnrOep3b4KmUwkCvg4l46TXH2/tZxBCHT3QnJwC155kviSQUpD79C1/Xmcy0N4lUmq42duWPmMpntwr41gEmnc3Dh+rULp8/cArls5vR+v5lAvXiO+dBWTR39HffILTCTK5NHfYSx+stJsmjn2zvu+RnL1/WR27235O5Jmc/m+d8PqfNB5IJznCaB85jDOpePUJi6HJlPn1qfI7N7btnPVTKbk6sbvr2li18mdJJDpha77n/3S/+u498GZPydXbQUgsXz9DbfXS7lb/rNjeirh5vuEUTPHDu1fqFhoLmMi9D75D6HJMy1s5wkgtf4RUusfCVWmdmsmk3PhGJ33fa394QIW6O6FhYgk07N+BqhWTkXufJlHvxt0BCtCcVnHQq4DLxz7E7kPXsYZOUXlwiftjtYSYTw+ZbpzM4U1VxgzhVHg73Sz+14iMbARNzuC51bpfuibQGNRZHrVM5JI07HmIUyscUmn5xRmLiHMvfc/M/fL7nuJWM9yMo/9HfkPfz1zXXm0M5jPWwjr8SnTnZsprLnCmCmsAn+nG0ln6Nz2NABd27+OMzwEcMOqZ2rDrpknChrXYJdOHpj57+n7JVZuJLV+F252pOnrylstjMenTHduprDmCmOmsAq8dK+PUDj2BsnBxgT7Dauepw/NbBMDvvQ33l/vF2l8Fo4xTV9X3nphPD5lunMzhTVXGDOFk745ok2C/pT/IDLMRxi+jSDs50rfHLEwd9pnL4Tgna6IyN3D6jvdSDw56rtV+19Fegsmlhjzas5Aux7f9rHe6njCdL6n3ZwziIxhP1ezvTaDfk2F6Rxdr92/y61mtXQXyhgTBa4A9/m+/6XLnowx3wb+2ff9r1sPJyLShLBPL+wARm9VuFPeBvYYYzrsRRIRaV7YS/c54M3ZbvR9Pw98CjxuLZGIyCKEvXSfZY7SnfLm1M+JiIReaEvXGBMHngLeus2P7qPxjlhEJPRCW7rAbuC07/u3uxTlXWCHMabLQiYRkUUJc+nOZ2oB3/dLwGHgybYnEhFZpDCX7nM0pg7mQ1MMInJHCGXpTm0B2wMcuN3PTnkTla6I3AFCWbrAY8CnU1vC5uMDYKsxprd9kUREFi+spbuQqQV833eAg8DTbUskItICYS7d2y6i3URTDCISeqErXWNMJ43Lf9+9zY/eTBdJiEjoha50aVwQ8dHUVrCFOAysNcb0tyGTiEhLhLF0FzSfO833fZfGB+A80+pAIiKtEsbSfRZ4o8n7aopBREItVKU7teVrG40tYM3QYpqIhFqoSpfGlq+DU1vAmvEJ0G+MGWxhJhGRlglb6c7r8xZm4/u+B+xHUwwiElJhK91m9ufeTFMMIhJaoSndqa1ea2ls/VoMffiNiIRWaEqXxlavt6e2fi3GSSBljFm3+EgiIq0VptJd1HzuNL/x9cbaOiYioRSm0m3qoohZaIpBREIpFKU7tcWrH/i4RQ/5JvCcMca06PFERFoiFKVLYypg/9SWr1Y4A9SAzS16PBGRlghL6bZiq9iM6+Z1NcUgIqESptJt1XzuNM3rikjoBF66U1u7UjS2erXSPuAZY0zgxygiMi0MhfQs8ObUlEDL+L5/EcgCD7TycUVEFiMMpduOqYVpmmIQkVAJtHSntnS15KKIWegiCREJlaDf6W4GXBpbvNphP/C0MSbWpscXEVmQoEv3OWBfq+dzp/m+PwZcBh5ux+OLiCxUGEq3XVML07RfV0RCI7DSndrK9QztW0SbpnldEQmNIN/pPgBcm9ra1U5vAU8aYxJtHkdE5LaCLF0bUwv4vp8FPgcebfdYIiK3E0jpGmOitHer2M3eBJ6dGldEJDBBvdN9j0bpbmr3QMaYFNAH/AD4WbvHExGZS1ClmwfSNMqw3ao09gPfD5yzMJ6IyKyCKt0hGh9w8y/tHsj3/TqwF5gAPm33eCIiczFtui5BRERuIeiLI0RE7ioqXRERi+b8IJhIPDnqu9UVtsLMl4klxgDCkM3EEmNezRkIOoeI3BnmnNM1xvhrfvSqxTjzc/7HLwAQhmznf/wCvu/rW4dFZF40vSAiYlFbSrd89qOZP+c+ePlLt2f3vURx6J12DD2ruTLVSzlyB3/JxBv/YTWTiNx92lK61fFzuLlxsgd+jjN8Eq9WuaH0und+ux3DNp0pmu6h57HvE0l2Ws8lIneXtk0vOJdP0vXg8ySWrSES7yC1bme7hmpJpvLZIyRXtv2qZBG5y7WtdJOrtlL67F1qVy823lWeOTxzW3HoHcqn/4xXq7Rr+AVlqhez5D/4FbWJYXSxiIi0k3YvLJJ2L4jIQmj3goiIRfMq3VvtQFgsNzdGcegdKhc+wRk5NevPTb8Tv1WGZnPd7n6z3T5XFhGR+ZjXV5O7uVFKpw7iZofp3vkikx//Hmd4iOTAZjrWPkT5Lx/ie3XSmx/HzQ6T3vwEAFf/8FOiqQwmnsTNX2HJcz9k8uhreOVJOtbuwLl4jOS9D1I9e4Tiif1kdu2l8PHrEI0Ry/TjOSXwXEwsgecUWparduUs5XNHcbPDeNUyiRUbcC4eJ9rVR2r9Lqqjp3Bz40weeQ2AjnUP41w4Rqx3JZ5TpJ4fa9X5F5G7zLze6cZ6BkhveoxY3yCTH71KasNu4r0r6XrweSoXjlEdO0O0ayn1/BczxQYQy/TT/ciLRFIZEsvX49erALj5K8Qy/STv2U401U164x6iXUtwLp+kc/s3wDSmSDu3PY1fd+ne+QKN77FsTa54/zpSa3fgjHxONN2DV8qRXH0ftYnLxHqWkxjYRL0wQcfaHcT6VuKVcqQ27KZeuEpm13eIdHQv6qSLyN1rXqUb7eqjeOItUut34Vw+Sbx3gFp2hPyh39BxzwMkVqzHdx1ifSspfvbuX+849e04JhIFY/AqRfy6i193iXR041w6Tr2Uh0gUMCRXbaN47E8wvbhnDNGupRRP7Mf33JblisSTlE8fIjmwCc8pEluyCmd4iET/WmrXRqnnxzHxBJVzR3GzI0TSPWAixJfdS+H4PurlyebPuIjc1Ra0e6F87iieU6Rzy5PkPniZnj3fs5HxS27evRBkLu1eEJGFWNDuhdTaHXRueRLgS8XW7OJUK8yWq5lM+T+/wtXXf0K9eK2lGUVEYJ4LafMxvajl1xzqhQmi3Utxc+NEu/qI9aygOnqK6pXzuNnLM/Or197+RWPxatNjxLqWtCrKojJldu2leOItvEqBaGdvyzOJyN2tZft0pxe1nOEhIqluPKdEevPjlD5/vzG/OrCJRP+aGxa0Zhav2lC4zWZy81eoF7PEl65uSyYRubu1rHSnF7WSg1vwypPElwxSGnqHru3P41wewqtMUh0/M7Og5fveDYtX7bDQTABXX/938H3c/JW2ZBKRu5suA14kLaSJyELoMmAREYsWVbo3v0uea7eA7UtnF5JNRMSWpncvZPe9RGJgI252BM+t0v3QNwEonnhrZqdAJJGmY81DmFgcPJf84d9ijCExuJXK+aPE+1ZRL1wFEyG1fheTR1/DROL0/s0/LuqgFpqt8Okb+DWHxMrNuNdG8WsV4svWUPj490Q7l9Dz+PcxscSiMomIwCLe6UbSGTq3PQ1A1/av4wwPAdywUyC1YVejcAHfq9O941uNq9KqZSIdGapfnCfWN4jnlBr3S3Y1ftatLe6gFpgtvmQ1nlMimspMZfUASA5uITGwkVp2eFF5RESmLWJ64a93LRx7g+TgVoAbdwqcPoTvNj5vwUSiTB79HSaZxs1faRRevY7nlPDrNZKrtjX2xnb1zZShrWxepQAmgucUcIaHcC6fAKBy8QSVi8eI9w0uMo+ISIN2L8zCzY3hjJyic+tTt82i3QsiMl8tuyLtqybWs4JYz4qgY4jIV8yc73Qj8eSo71ZD1zwmlhgDCEM2E0uMeTVnIOgcInJnmLN0RUSktXRxhIiIRSpdERGLVLoiIhapdEVELFLpiohYpNIVEbFIpSsiYpFKV0TEIpWuiIhFKl0REYtUuiIiFql0RUQsUumKiFik0hURsUilKyJikUpXRMQila6IiEUqXRERi1S6IiIWqXRFRCxS6YqIWKTSFRGxSKUrImKRSldExCKVroiIRSpdERGLVLoiIhapdEVELFLpiohYpNIVEbFIpSsiYpFKV0TEIpWuiIhFKl0REYtUuiIiFql0RUQsUumKiFik0hURsUilKyJikUpXRMQila6IiEUqXRERi1S6IiIWqXRFRCxS6YqIWKTSFRGxSKUrImKRSldExCKVroiIRSpdERGLVLoiIhapdEVELFLpiohYpNIVEbFIpSsiYpFKV0TEIpWuiIhFKl0REYtUuiIiFql0RUQsUumKiFik0hURsUilKyJikUpXRMQila6IiEUqXRERi1S6IiIWqXRFRCxS6YqIWKTSFRGxSKUrImKRSldExCKVroiIRSpdERGLVLoiIhapdEVELFLpiohYpNIVEbFIpSsiYpFKV0TEIpWuiIhFKl0REYtUuiIiFql0RUQsUumKiFik0hURsUilKyJikUpXRMQila6IiEUqXRERi1S6IiIWqXRFRCxS6YqIWKTSFRGxSKUrImKRSldExCKVroiIRSpdERGLVLoiIhapdEVELFLpiohY9P8QdEv7a4+pVQAAAABJRU5ErkJggg==" }, "metadata": { "needs_background": "light" - }, - "output_type": "display_data" + } } ], - "source": [ - "ForestPlotter().export(\n", - " f.ast, root_list=[node[\"val\"][\"id\"] for node in f.element if node[\"structure\"][\"father\"] is None],\n", - ")\n", - "plt.show()" - ], "metadata": { "collapsed": false, "pycharm": { @@ -100,22 +199,42 @@ } } }, + { + "cell_type": "markdown", + "source": [ + "## 变量标准化\n", + "\n", + "下面这个例子中,`var` 为变量编号。同一变量拥有相同的变量编号。 \n", + "如:`x` 变量的编号为 `0`, `y` 变量的编号为 `1`。" + ], + "metadata": {} + }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 20, + "source": [ + "f.variable_standardization().elements" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "OutEdgeView([(1, 0), (2, 0)])" + "text/plain": [ + "[{'id': 0, 'type': 'supsub', 'text': '\\\\supsub', 'role': None},\n", + " {'id': 1, 'type': 'mathord', 'text': 'x', 'role': 'base', 'var': 0},\n", + " {'id': 2, 'type': 'textord', 'text': '2', 'role': 'sup'},\n", + " {'id': 3, 'type': 'bin', 'text': '+', 'role': None},\n", + " {'id': 4, 'type': 'mathord', 'text': 'x', 'role': None, 'var': 0},\n", + " {'id': 5, 'type': 'bin', 'text': '+', 'role': None},\n", + " {'id': 6, 'type': 'textord', 'text': '1', 'role': None},\n", + " {'id': 7, 'type': 'rel', 'text': '=', 'role': None},\n", + " {'id': 8, 'type': 'mathord', 'text': 'y', 'role': None, 'var': 1}]" + ] }, - "execution_count": 13, "metadata": {}, - "output_type": "execute_result" + "execution_count": 20 } ], - "source": [ - "f.ast.edges" - ], "metadata": { "collapsed": false, "pycharm": { @@ -123,22 +242,38 @@ } } }, + { + "cell_type": "markdown", + "source": [ + "## 方程组结构解析\n", + "\n", + "调用 `FormulaGroup` 类解析公式方程组,相关的属性和函数方法同上。" + ], + "metadata": {} + }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 21, + "source": [ + "fs = FormulaGroup([\n", + " \"x^2 = y\",\n", + " \"x^3 = y^2\",\n", + " \"x + y = \\pi\"\n", + "])\n", + "fs" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "[{'structure': {'bro': [None, 3],\n 'child': [1, 2],\n 'father': None,\n 'forest': None},\n 'val': {'id': 0, 'role': None, 'text': '^', 'type': 'supsub'}},\n {'structure': {'bro': [None, 2], 'child': None, 'father': 0, 'forest': None},\n 'val': {'id': 1, 'role': 'base', 'text': 'x', 'type': 'mathord', 'var': 0}},\n {'structure': {'bro': [1, None], 'child': None, 'father': 0, 'forest': None},\n 'val': {'id': 2, 'role': 'sup', 'text': '2', 'type': 'textord'}},\n {'structure': {'bro': [0, 4], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 3, 'role': None, 'text': '+', 'type': 'bin'}},\n {'structure': {'bro': [3, 5], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 4, 'role': None, 'text': '1', 'type': 'textord'}},\n {'structure': {'bro': [4, 6], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 5, 'role': None, 'text': '=', 'type': 'rel'}},\n {'structure': {'bro': [5, None],\n 'child': None,\n 'father': None,\n 'forest': None},\n 'val': {'id': 6, 'role': None, 'text': 'y', 'type': 'mathord', 'var': 1}}]" + "text/plain": [ + ";;>" + ] }, - "execution_count": 14, "metadata": {}, - "output_type": "execute_result" + "execution_count": 21 } ], - "source": [ - "f.variable_standardization()" - ], "metadata": { "collapsed": false, "pycharm": { @@ -148,47 +283,199 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 22, + "source": [ + "fs.elements" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "[[{'structure': {'bro': [None, 3],\n 'child': [1, 2],\n 'father': None,\n 'forest': None},\n 'val': {'id': 0, 'role': None, 'text': '^', 'type': 'supsub'}},\n {'structure': {'bro': [None, 2],\n 'child': None,\n 'father': 0,\n 'forest': [6, 12]},\n 'val': {'id': 1, 'role': 'base', 'text': 'x', 'type': 'mathord'}},\n {'structure': {'bro': [1, None], 'child': None, 'father': 0, 'forest': None},\n 'val': {'id': 2, 'role': 'sup', 'text': '2', 'type': 'textord'}},\n {'structure': {'bro': [0, 4], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 3, 'role': None, 'text': '=', 'type': 'rel'}},\n {'structure': {'bro': [3, None],\n 'child': None,\n 'father': None,\n 'forest': [10, 14]},\n 'val': {'id': 4, 'role': None, 'text': 'y', 'type': 'mathord'}}],\n [{'structure': {'bro': [None, 8],\n 'child': [6, 7],\n 'father': None,\n 'forest': None},\n 'val': {'id': 5, 'role': None, 'text': '^', 'type': 'supsub'}},\n {'structure': {'bro': [None, 7],\n 'child': None,\n 'father': 5,\n 'forest': [1, 12]},\n 'val': {'id': 6, 'role': 'base', 'text': 'x', 'type': 'mathord'}},\n {'structure': {'bro': [6, None], 'child': None, 'father': 5, 'forest': None},\n 'val': {'id': 7, 'role': 'sup', 'text': '3', 'type': 'textord'}},\n {'structure': {'bro': [5, 9], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 8, 'role': None, 'text': '=', 'type': 'rel'}},\n {'structure': {'bro': [8, None],\n 'child': [10, 11],\n 'father': None,\n 'forest': None},\n 'val': {'id': 9, 'role': None, 'text': '^', 'type': 'supsub'}},\n {'structure': {'bro': [None, 11],\n 'child': None,\n 'father': 9,\n 'forest': [4, 14]},\n 'val': {'id': 10, 'role': 'base', 'text': 'y', 'type': 'mathord'}},\n {'structure': {'bro': [10, None], 'child': None, 'father': 9, 'forest': None},\n 'val': {'id': 11, 'role': 'sup', 'text': '2', 'type': 'textord'}}],\n [{'structure': {'bro': [None, 13],\n 'child': None,\n 'father': None,\n 'forest': [1, 6]},\n 'val': {'id': 12, 'role': None, 'text': 'x', 'type': 'mathord'}},\n {'structure': {'bro': [12, 14], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 13, 'role': None, 'text': '+', 'type': 'bin'}},\n {'structure': {'bro': [13, 15],\n 'child': None,\n 'father': None,\n 'forest': [4, 10]},\n 'val': {'id': 14, 'role': None, 'text': 'y', 'type': 'mathord'}},\n {'structure': {'bro': [14, 16], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 15, 'role': None, 'text': '=', 'type': 'rel'}},\n {'structure': {'bro': [15, None],\n 'child': None,\n 'father': None,\n 'forest': None},\n 'val': {'id': 16, 'role': None, 'text': '\\\\pi', 'type': 'mathord'}}]]" + "text/plain": [ + "[{'id': 0, 'type': 'supsub', 'text': '\\\\supsub', 'role': None},\n", + " {'id': 1, 'type': 'mathord', 'text': 'x', 'role': 'base'},\n", + " {'id': 2, 'type': 'textord', 'text': '2', 'role': 'sup'},\n", + " {'id': 3, 'type': 'rel', 'text': '=', 'role': None},\n", + " {'id': 4, 'type': 'mathord', 'text': 'y', 'role': None},\n", + " {'id': 5, 'type': 'supsub', 'text': '\\\\supsub', 'role': None},\n", + " {'id': 6, 'type': 'mathord', 'text': 'x', 'role': 'base'},\n", + " {'id': 7, 'type': 'textord', 'text': '3', 'role': 'sup'},\n", + " {'id': 8, 'type': 'rel', 'text': '=', 'role': None},\n", + " {'id': 9, 'type': 'supsub', 'text': '\\\\supsub', 'role': None},\n", + " {'id': 10, 'type': 'mathord', 'text': 'y', 'role': 'base'},\n", + " {'id': 11, 'type': 'textord', 'text': '2', 'role': 'sup'},\n", + " {'id': 12, 'type': 'mathord', 'text': 'x', 'role': None},\n", + " {'id': 13, 'type': 'bin', 'text': '+', 'role': None},\n", + " {'id': 14, 'type': 'mathord', 'text': 'y', 'role': None},\n", + " {'id': 15, 'type': 'rel', 'text': '=', 'role': None},\n", + " {'id': 16, 'type': 'mathord', 'text': '\\\\pi', 'role': None}]" + ] }, - "execution_count": 15, "metadata": {}, - "output_type": "execute_result" + "execution_count": 22 } ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, "source": [ - "fs = FormulaGroup([\n", - " \"x^2 = y\",\n", - " \"x^3 = y^2\",\n", - " \"x + y = \\pi\"\n", - "])\n", - "fs" + "fs.ast" ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[{'val': {'id': 0, 'type': 'supsub', 'text': '\\\\supsub', 'role': None},\n", + " 'structure': {'bro': [None, 3],\n", + " 'child': [1, 2],\n", + " 'father': None,\n", + " 'forest': None}},\n", + " {'val': {'id': 1, 'type': 'mathord', 'text': 'x', 'role': 'base'},\n", + " 'structure': {'bro': [None, 2],\n", + " 'child': None,\n", + " 'father': 0,\n", + " 'forest': [6, 12]}},\n", + " {'val': {'id': 2, 'type': 'textord', 'text': '2', 'role': 'sup'},\n", + " 'structure': {'bro': [1, None], 'child': None, 'father': 0, 'forest': None}},\n", + " {'val': {'id': 3, 'type': 'rel', 'text': '=', 'role': None},\n", + " 'structure': {'bro': [0, 4], 'child': None, 'father': None, 'forest': None}},\n", + " {'val': {'id': 4, 'type': 'mathord', 'text': 'y', 'role': None},\n", + " 'structure': {'bro': [3, None],\n", + " 'child': None,\n", + " 'father': None,\n", + " 'forest': [10, 14]}},\n", + " {'val': {'id': 5, 'type': 'supsub', 'text': '\\\\supsub', 'role': None},\n", + " 'structure': {'bro': [None, 8],\n", + " 'child': [6, 7],\n", + " 'father': None,\n", + " 'forest': None}},\n", + " {'val': {'id': 6, 'type': 'mathord', 'text': 'x', 'role': 'base'},\n", + " 'structure': {'bro': [None, 7],\n", + " 'child': None,\n", + " 'father': 5,\n", + " 'forest': [1, 12]}},\n", + " {'val': {'id': 7, 'type': 'textord', 'text': '3', 'role': 'sup'},\n", + " 'structure': {'bro': [6, None], 'child': None, 'father': 5, 'forest': None}},\n", + " {'val': {'id': 8, 'type': 'rel', 'text': '=', 'role': None},\n", + " 'structure': {'bro': [5, 9], 'child': None, 'father': None, 'forest': None}},\n", + " {'val': {'id': 9, 'type': 'supsub', 'text': '\\\\supsub', 'role': None},\n", + " 'structure': {'bro': [8, None],\n", + " 'child': [10, 11],\n", + " 'father': None,\n", + " 'forest': None}},\n", + " {'val': {'id': 10, 'type': 'mathord', 'text': 'y', 'role': 'base'},\n", + " 'structure': {'bro': [None, 11],\n", + " 'child': None,\n", + " 'father': 9,\n", + " 'forest': [4, 14]}},\n", + " {'val': {'id': 11, 'type': 'textord', 'text': '2', 'role': 'sup'},\n", + " 'structure': {'bro': [10, None],\n", + " 'child': None,\n", + " 'father': 9,\n", + " 'forest': None}},\n", + " {'val': {'id': 12, 'type': 'mathord', 'text': 'x', 'role': None},\n", + " 'structure': {'bro': [None, 13],\n", + " 'child': None,\n", + " 'father': None,\n", + " 'forest': [1, 6]}},\n", + " {'val': {'id': 13, 'type': 'bin', 'text': '+', 'role': None},\n", + " 'structure': {'bro': [12, 14],\n", + " 'child': None,\n", + " 'father': None,\n", + " 'forest': None}},\n", + " {'val': {'id': 14, 'type': 'mathord', 'text': 'y', 'role': None},\n", + " 'structure': {'bro': [13, 15],\n", + " 'child': None,\n", + " 'father': None,\n", + " 'forest': [4, 10]}},\n", + " {'val': {'id': 15, 'type': 'rel', 'text': '=', 'role': None},\n", + " 'structure': {'bro': [14, 16],\n", + " 'child': None,\n", + " 'father': None,\n", + " 'forest': None}},\n", + " {'val': {'id': 16, 'type': 'mathord', 'text': '\\\\pi', 'role': None},\n", + " 'structure': {'bro': [15, None],\n", + " 'child': None,\n", + " 'father': None,\n", + " 'forest': None}}]" + ] + }, + "metadata": {}, + "execution_count": 23 } - } + ], + "metadata": {} }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 25, + "source": [ + "ForestPlotter().export(\n", + " fs.ast_graph, root_list=[node[\"val\"][\"id\"] for node in fs.ast if node[\"structure\"][\"father\"] is None],\n", + ")" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "[[{'structure': {'bro': [None, 3],\n 'child': [1, 2],\n 'father': None,\n 'forest': None},\n 'val': {'id': 0, 'role': None, 'text': '^', 'type': 'supsub'}},\n {'structure': {'bro': [None, 2],\n 'child': None,\n 'father': 0,\n 'forest': [6, 12]},\n 'val': {'id': 1, 'role': 'base', 'text': 'x', 'type': 'mathord', 'var': 0}},\n {'structure': {'bro': [1, None], 'child': None, 'father': 0, 'forest': None},\n 'val': {'id': 2, 'role': 'sup', 'text': '2', 'type': 'textord'}},\n {'structure': {'bro': [0, 4], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 3, 'role': None, 'text': '=', 'type': 'rel'}},\n {'structure': {'bro': [3, None],\n 'child': None,\n 'father': None,\n 'forest': [10, 14]},\n 'val': {'id': 4, 'role': None, 'text': 'y', 'type': 'mathord', 'var': 1}}],\n [{'structure': {'bro': [None, 8],\n 'child': [6, 7],\n 'father': None,\n 'forest': None},\n 'val': {'id': 5, 'role': None, 'text': '^', 'type': 'supsub'}},\n {'structure': {'bro': [None, 7],\n 'child': None,\n 'father': 5,\n 'forest': [1, 12]},\n 'val': {'id': 6, 'role': 'base', 'text': 'x', 'type': 'mathord', 'var': 0}},\n {'structure': {'bro': [6, None], 'child': None, 'father': 5, 'forest': None},\n 'val': {'id': 7, 'role': 'sup', 'text': '3', 'type': 'textord'}},\n {'structure': {'bro': [5, 9], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 8, 'role': None, 'text': '=', 'type': 'rel'}},\n {'structure': {'bro': [8, None],\n 'child': [10, 11],\n 'father': None,\n 'forest': None},\n 'val': {'id': 9, 'role': None, 'text': '^', 'type': 'supsub'}},\n {'structure': {'bro': [None, 11],\n 'child': None,\n 'father': 9,\n 'forest': [4, 14]},\n 'val': {'id': 10, 'role': 'base', 'text': 'y', 'type': 'mathord', 'var': 1}},\n {'structure': {'bro': [10, None], 'child': None, 'father': 9, 'forest': None},\n 'val': {'id': 11, 'role': 'sup', 'text': '2', 'type': 'textord'}}],\n [{'structure': {'bro': [None, 13],\n 'child': None,\n 'father': None,\n 'forest': [1, 6]},\n 'val': {'id': 12, 'role': None, 'text': 'x', 'type': 'mathord', 'var': 0}},\n {'structure': {'bro': [12, 14], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 13, 'role': None, 'text': '+', 'type': 'bin'}},\n {'structure': {'bro': [13, 15],\n 'child': None,\n 'father': None,\n 'forest': [4, 10]},\n 'val': {'id': 14, 'role': None, 'text': 'y', 'type': 'mathord', 'var': 1}},\n {'structure': {'bro': [14, 16], 'child': None, 'father': None, 'forest': None},\n 'val': {'id': 15, 'role': None, 'text': '=', 'type': 'rel'}},\n {'structure': {'bro': [15, None],\n 'child': None,\n 'father': None,\n 'forest': None},\n 'val': {'id': 16, 'role': None, 'text': '\\\\pi', 'type': 'mathord'}}]]" + "text/plain": [ + "[Text(22.32, 181.2, 'id: 0\\ntype: supsub\\ntext: \\\\supsub\\nrole: None'),\n", + " Text(11.16, 108.72, 'id: 1\\ntype: mathord\\ntext: x\\nrole: base'),\n", + " Text(33.480000000000004, 108.72, 'id: 2\\ntype: textord\\ntext: 2\\nrole: sup'),\n", + " Text(55.8, 181.2, 'id: 3\\ntype: rel\\ntext: =\\nrole: None'),\n", + " Text(78.12, 181.2, 'id: 4\\ntype: mathord\\ntext: y\\nrole: None'),\n", + " Text(111.6, 181.2, 'id: 5\\ntype: supsub\\ntext: \\\\supsub\\nrole: None'),\n", + " Text(100.44, 108.72, 'id: 6\\ntype: mathord\\ntext: x\\nrole: base'),\n", + " Text(122.76, 108.72, 'id: 7\\ntype: textord\\ntext: 3\\nrole: sup'),\n", + " Text(145.08, 181.2, 'id: 8\\ntype: rel\\ntext: =\\nrole: None'),\n", + " Text(178.56, 181.2, 'id: 9\\ntype: supsub\\ntext: \\\\supsub\\nrole: None'),\n", + " Text(167.4, 108.72, 'id: 10\\ntype: mathord\\ntext: y\\nrole: base'),\n", + " Text(189.72, 108.72, 'id: 11\\ntype: textord\\ntext: 2\\nrole: sup'),\n", + " Text(212.04, 181.2, 'id: 12\\ntype: mathord\\ntext: x\\nrole: None'),\n", + " Text(234.36, 181.2, 'id: 13\\ntype: bin\\ntext: +\\nrole: None'),\n", + " Text(256.68, 181.2, 'id: 14\\ntype: mathord\\ntext: y\\nrole: None'),\n", + " Text(279.0, 181.2, 'id: 15\\ntype: rel\\ntext: =\\nrole: None'),\n", + " Text(301.32, 181.2, 'id: 16\\ntype: mathord\\ntext: \\\\pi\\nrole: None')]" + ] }, - "execution_count": 16, "metadata": {}, - "output_type": "execute_result" + "execution_count": 25 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAADnCAYAAAC9roUQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAU1klEQVR4nO3dWW+c133H8d95ZuNwJyVZFLVYtixLju3Ure0UKeJe1AHaAgZStEDQi170BfS2yAvoReEG6HLRl5CroldxgCBwbbR20sULHDu2KTm0ZUkWqZWSOFxmPb2YkTgcDWd7znPmmWe+H8CwuJ3nP3+e+fHZ5oyx1goA4Ecw7AIAYJwQugDgEaELAB4RugDgEaELAB4RugDgEaELAB4RugDgEaELAB4RugDgEaELAB4RugDgEaELAB4RugDgEaELAB4RugDgEaELAB4RugDgEaELAB4RugDgEaELAB4RugDgEaELAB4RugDgEaELAB4RugDgEaELAB4RugDgEaELAB4RugDgEaELAB4RugDgk7XW2X8mnV2XZF3+Z9LZ9WHV07ztKB7bII8xaT0Os/049CKqeeF67vmYy1HVHJfnnrOctNbKFWOMffxHbzgbT5K+fv01WWvNMOpp3nYUj63ddrpJWo/DbD8OvYhqXrieez7mclQ1x+W554q30wuVzVuSpJ1LH0mSSje+0uavf+Fr8wfWUdm8rc2Pfu58XEm69z//FnrcsHXsrL6v4rULQ62h+M1KLH7XO19+oO0Lvxra9quFDRU+fVulm5e0tfKu83HLG9dU+PTtSGq2tqZ7//vvzsfduvBLlW5ecj5u8dqFfc/FOEn72lDp+pfaWX1fQX5GteKWqtv3lJlf8rX5A+tITS/KVkrOx63ubCo1teig4nB1BJOzTh5fmBokK1vc8lpDuzpSM4e1s/p/w9v+9IJSUwvKHjmt8p1vnI+bml6ULRcjqbm0vqrMoZPOxzWpdKj5edC4lbvrCnJTA48bJc8X0vZOZaQmZ1W+u+53823qqG7eVJCbdD5ubee+qlt3ZKsVR2MPVkeQm1J18/ZQa7BWMpncEGrYX0eQyys17fsPYdOc2C2otHZRWyvvyqSzzset3r/laC4/OratFFW+fcX5uEFu0sH8fHTc9OJxVTZvhhw3Gt72dCef+s6+j7OPPansY0/62vyBdQS5KU0//33n4+aOPa3csadDjxu2jiA3pczi8aHWMHHiGU2ceMZrDe3qCHJTmn7u1aFtX5LmvvvDSMfNHDoR2dgTJ5+LZNwwOdBp3NzSUwOPGyXne7ou9l5dn4vpp6ZO2x7ksZXvrrcdc/fyx7K1at/jhamllcs+u+px1Ns+SJiafM2LbtvZu05xq6ft9yuKmpuNynMvLOd7ujur76myeFwygco3vlTQOL8y/dyrSs0e0fbFXymzsKzq9j1V7l2XLW7LZPPKHDrx8FC8ePUzBblJZ3uKXWv6/L+Umj2i0vpvlTl8avBxVt5RtbCh1Mwh1XYLSs0cUrWwIVvalmrV+uGZMcounVXx6ufKnXg2usfUR59dcNXjSLYdcS98zYtu26nt3FfhN28pNb2orU/+QxOPf1u54+2PMOJS8yg+98JyHrqpyXnJBDKpvaHTc489/HdmfknlO9+otluQrRQlWU2cel7Fq59JQSCTytS/0eGtbN1qCibnpFql66FZ13Hyswrys/UPalXVdjYVZHIK5h6TalVJRiadc3A+L359dtXjKLYddS98zYtu22k8CElSdvmcah0uqsWl5lF87oXlPHSnnnlFO6vvK3v8vGy1ovzpF/Z9PXv0jLJHz0hSfa+jVlVmYTnS847damr9OKpx8k+++PDfYc83xa3Prnocxbaj7oWvedFtO1PP/OHI1TyKz72wIrmQlj/zUv3/XRqRnjsaxebb6rUmX+O4ELc+D7M3w+6Fr8fucjujVnOcnnthRH4hrbZbUHVnU5JUvHahp1uooripuVNdcR673+0ltceDbHOYvZCi60e3cWvF7UjGHdQwxm3+3fdyYdGnSC6kFSemFWQnVd26o9zyOVW37ys9v6TMkdMqrl1U+dZlpRsvjKhsXFNq5rDKNy8pu/SUZAJVN29pa+VdTZ3/npe60rNHtHvpIwX5WeWOnXU6tqxV+fYV5ZbOKjW9EPljSWqPB9nmMHvRrbbU5Jw2P/ipZl76gYI+72Hu1ufCr3+u3Mnn++5zVPM4qnnR7XdfunlJxcufyGTzmnnhT/oaO0rO93RTk/MyQUqpmUNSrVb/q9u4QFGfXFa14pZspaTKxrX6D9maFASytarKN75yXVLXuuolVOt1OB47s3BMpbUvnAVut+0ltceDbHOYvehWm62W92pxOK4JUsounxvoD1tU8ziqedHtd//gomncRHIh7YF2J6wnTjyriabbNXYufbT/HM2Zl12X1FNd7W6ydjF2rVxUbtntiyTGsceDbnNYveiltkFfKNHLY3Y9bph5HNW86DZueuawUmcXlFlYHmj8qHh5GXDrObO77/5E26vvSfJ7UrxTHVGNG2Ryykf4xO6ljiT02FUdSehFlD0+aOyw83gYvUjPHY1d4EoRvgx488OfyWQnlMrX11gorV3U5PlXlFk4Vr9B+fInMiZQrbyr6v2byhw+peKVTzX9O3/c5t7D4dSR9Mfnqo441BCXOqKqIcrHNmo1x+H3HEZkoZtePC5b3pVkZCtFpRePS6a+bKVJZ6VUWjJGRkaZI6elWlWpmUN7N6rHvI6kP75RqyEudYzifBu1muPwew4jstBtPox7cH9d89d8HeZFVUfSH9+o1RCXOkZxvo1azXH4PYfBe6QBgE9O3/snBu9Z5bIe3iON90gbRg1RzD3eI8393B/4OWOb7peLmjHmY0l/ba390BhzXtIb1tp4Lno5oowxP5Z0x1r7942Pv5b0R9ba1eFW5p8x5l8lXbTW/kvj45uSvm2tXRtuZRhn3k4vGGNSkp6SdLHxqVVJJ4wxw3pLgaQ6J2ml6eOVxufGEb1A7Pg8p3tK0m1rbUGSrLVlSV+rHsRw57yk5neivND43DiiF4gdn6F7TvufAGp8zJ6HI8aYrOp/3JpPJYxlj40x05IWJV1u+vRY9gLx4jt0V1o+x+GeW09KumKtbV69elx7/LSkL6zd96L+ce0FYsRn6LYe6kkc7rlGj/fQC8QSe7rJ0q7H30iaMsbM+y9nqNr14itJy8aYiSHUA0iKyZ6uMY3X8CGsR3ps6/cEXtT4/XFr14uy6sHrbkFfoE9eQtcYMytpTtLV5s9ba29Jqkga/ioUydBu704azyMKeoFY8rWne071m9TbrVTMeTYHGkcL7Y4mpDHrsTEmUP1C2sU2Xx6rXiB+fIZuuzCQuI3HlcOSjKSbbb42bj0+KWnDWtvujbjGrReIGZ+h2+5QT+Jwz5VzklZs+9d1j1uPmW+ILV+he9Bhr8ThniudevyFpDONl2KPg67zjYu3GBb2dJPjwB5ba7clXZd02mdBQ9SpF3ck7Upa8loR0BB56Db2rs6q/UUNSfpSLHzjQqe9O2m8jijoBWLLx57uKUm3rLVb7b7IwjfOdLpYKY3XBSR6gdjyEbqdTi08wCmGEA5Y6KbVWPTYGDOjRxe6aTUWvUA8+Qjdbod6Eod7YZ3RowvdtBqXHrdb6KbVuPQCMcSebjLQ4z30ArHGnm4y9NLjaxqPhW966cWDhW/yHuoB9onVni73Tg6sa48bL5oYhwtIvfSiovpdM1y8hXeRhm5joZtZ1ZcXPJC19rbqC98cjbKeBOtl704ajyMKeoFYi3pP95y6X9R4YBz2wpxrHB10u0XqgUT3uLHQTad7wpsluheIr6hD97y6n1p4gIsbgznc+H+7hW5arSjZe3cnVX/7+XYL3bRivmEofOzp9rIHJnG4N6jzki4csNBNq6Tv3fV6akFivmFIfIQue7rR6qfHX0h60hiTjrCeYeqnFxfExVsMgY/TC+x5RKvnHo/Bwjf99OLBwjfHIq0IaBFZ6DYWunlKvV3UkOq38Bxn4Zu+9XMKR0r2EUW/vUj66RbEUJR7uo9LunnQQjetGgvfXBL3Tvarn0NqKdlHFP32Isl/gBBTUYZuv08AKdmB4FyPC920SmTQNC10c6WPH2O+wbsoQ7ef87kPJDIQInRG0mVrbamPn0lq0Dytg9/89CDMN3jHnu5o4w/bnkF6wXyDd3Hb0+XCRn/6vXAkSWuS8saYhQjqGaZBevGVpGMsfAOfot7THWgvjHsne9b30UTjRRQXlbw/boP0goVv4F0koWuMmZM0oy4L3bRi4Zu+DXI0ISXzFMOgveAUA7yKak/3nPq/qPFAEgPBuaaFbvo9by4lLGiaFrrhDxBiL8rQHSQMpIQFQoSONP5/a4CfTVrQnFJ9oZvCAD/LfINXUYXuoId6UvICISrnJK30uNBNq6QFTZg/8sw3eMWe7ugK84ftC0lPJGjhmzC9YOEbeBXHPV1uG+vNIHeHSJKstTuS1iU94bSi4QnTiw2x8A08ch66jYVuzqj3hW5aPVj4ZsJdVYkU5mhCStZhNb3AyIhiT/e0pBuNZQT7xsI3PQtzNCEl6zQOvcDIiCJ0Bz7Ua8KeRweN5S9Pqr+Fblol4jROY6GbefW30E0r5hu8iSp0wxzqSex5dDPIQjetkhI0/bz56UGYb/AmitANe6gnJWQvLEIujiaSEjSuesF8gxdOQ9cYMynpWbl5EnzLGDMdvqpkadza9JLCH02sSZowxnwrfFXD0Xgl2osKP9++krRkjDkduiigC9d7uq9J+gNJPww5zp9L+j1JfxG6ouSZlfS3Ct/j70nKS/rnsAUN0VFJfyPpL0OO86qkjKR/DF0R0IXr0H1L0o6kvws5zo8lbUv6z9AVJYy19p6ka5L+KeRQv1T9Qty7oYsaEmvtmqSbkv4h5FBvqn4h7p3QRQFdmMFeRQoAGETUb8EOAGhC6AKARx0XPAkyuXVbKUWyoLhJZWu2WnJ790Q6e71WLi65HDNqkfY4nb0uSS7Hj7LH9ALjoOM5XWOMffxHb0Sy4a9ff02ux/769ddkrR2p1aKi7rEkp32Ossf0AuMg1J5mZbO+fvbOpY8kSdXChgqfvh26qHZjlzfWVPjNW07GHjWP9OL2VW2thL/poHXcyuZt3X3nJ6HHjcpB82336mfa+nzwGw8OGre8cU3bF/87VM1Aq1DrqZauf6md1fcV5GdUK24pNb2g1JSbN5ltHTuzcEyl62GWGhhdrb0o37kqE4Q/M9M6bnrmkLLHzjqoOBoHzTdbKSmYnHU+bmZhWZV7Nxw+AsDJhbS90xO13YJKa4Ou6Nh57N2rn6pWHGjhsoTY60V6YVmV+7dka1Wn41prFf+1vB+dbyadVW37vvNxK5u3Vb55KeS4wH6h9nQnn/rOI5+b+27YF0q1H3vixLOaOPGsk7FHTWsvsrkpZQ+fcj6uJOXPvBx63KhENd86jTv78p+FHh9oxi1jAOBR19At313ve9Dy3fWHFyWa7V7+eN8hcbex23293efabWvU0OeDt9vLz/TSh17Gj1MvkExdTy/srL6nyuJxyQQq3/hSwdSCKnfXNf3cq0rNHtH2yjuqFjaUmjmk2m5BqZlDqhY2ZEvbUq2q8u0rkjHKLp1V8ernyjWdIug2dvHqZ6oW7qi0/ltlFpYlY1S5u6bSxLRstSKTyqhW2lF6fvRvlaTP0fdh1HqBZOoauqnJeckEMqm9b03PPfbw30F+VkG+ceW4VlVtZ1NBJqdg7jGpVpVkZNI5mXS277FTUwuqFu4oc+iEbKUsEwQKJmYa33dUQX5GxW8+7/MhxxN97q3WMH3oZfw49QLJ1DV0p555RTur7yt7/LxstaL86Rf2fb3141b5J198+O/c0v63Pes69hO/2608ZY+c7vo9o4A+10XZh57Gj1EvkEw9XUjLn3lJQWZi3wRtPc9V2y2ourPZdwGtY3cat1bc1uavf6Facavv7YyCfnoRZuxO49pqRYVP3lRl8/ZA23EhyvnWOv44zzcMx8C3jO2svqfixLSC7KSqW3eUWz6n6vZ9peeXlJqc0+YHP9XMSz9QkMk5Gzczv6T09IIUpAYte6QMq8cmlXHy4guXOtUsa1W+fUW5pbNKTff34hzmG3wb+JmVmpyXCVJKzRySarX6Cxca6zjYarn+TQO8V2CncWu7BRWvXZBqYd6DcHQMpcflXSlI7Y0fE51qziwcU2nti74Dt9u44zbf4MfAe7pTz7zy8N/tzp0NetN6t3HnX/mrgcYdRcPq8dT57w00bpQ61VwrF5Vbftr5uNJ4zTf4Ea9jSGAAQSYX61fSAc0GDt3Wm8Nd3yzOzed1UfZ51HpML5AEfZ1e2PzwZzLZCaXysyrfXVdp7aImz7+izMIxVTdvaWvlXVXv31B2+bzKty7LZHIKspOaPPv7occvfPKmJk49L1spqbq1ofKtK5p6/lUFmYmBHnicRdnnUesxvUDS9LWnm148riA3JcnIVopKLx6XWlalCiamVdu6q1pxSyaVqV+gcDB+en5JpRtfaffKp43vTu4bakbZ51HrMb1A0vS1p9t832T+zEv7vjb9/PdDF9Np/ImTzz389+7ljzX9wp/KJPRWnij7PGo9phdImo5v18N7pEWP9wXbQy8wDjqGLgDALW4ZAwCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8IjQBQCPCF0A8Oj/AWdihSPDqhKLAAAAAElFTkSuQmCC" + }, + "metadata": { + "needs_background": "light" + } } ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 28, "source": [ - "fs.variable_standardization()" + "for ft in fs.variable_standardization():\n", + " print(ft.elements)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[{'id': 0, 'type': 'supsub', 'text': '\\\\supsub', 'role': None}, {'id': 1, 'type': 'mathord', 'text': 'x', 'role': 'base', 'var': 0}, {'id': 2, 'type': 'textord', 'text': '2', 'role': 'sup'}, {'id': 3, 'type': 'rel', 'text': '=', 'role': None}, {'id': 4, 'type': 'mathord', 'text': 'y', 'role': None, 'var': 1}]\n", + "[{'id': 5, 'type': 'supsub', 'text': '\\\\supsub', 'role': None}, {'id': 6, 'type': 'mathord', 'text': 'x', 'role': 'base', 'var': 0}, {'id': 7, 'type': 'textord', 'text': '3', 'role': 'sup'}, {'id': 8, 'type': 'rel', 'text': '=', 'role': None}, {'id': 9, 'type': 'supsub', 'text': '\\\\supsub', 'role': None}, {'id': 10, 'type': 'mathord', 'text': 'y', 'role': 'base', 'var': 1}, {'id': 11, 'type': 'textord', 'text': '2', 'role': 'sup'}]\n", + "[{'id': 12, 'type': 'mathord', 'text': 'x', 'role': None, 'var': 0}, {'id': 13, 'type': 'bin', 'text': '+', 'role': None}, {'id': 14, 'type': 'mathord', 'text': 'y', 'role': None, 'var': 1}, {'id': 15, 'type': 'rel', 'text': '=', 'role': None}, {'id': 16, 'type': 'mathord', 'text': '\\\\pi', 'role': None}]\n" + ] + } ], "metadata": { "collapsed": false, @@ -200,23 +487,25 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "name": "python3", + "display_name": "Python 3.8.5 64-bit" }, "language_info": { + "name": "python", + "version": "3.8.5", + "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", + "pygments_lexer": "ipython3", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "file_extension": ".py" + }, + "interpreter": { + "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 2 } \ No newline at end of file diff --git a/examples/formula/tree.ipynb b/examples/formula/tree.ipynb index b5f0fd10..ce8bb972 100644 --- a/examples/formula/tree.ipynb +++ b/examples/formula/tree.ipynb @@ -2,20 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": true - }, - "outputs": [ - { - "data": { - "text/plain": "{'value': 1, 'id': 0}" - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": 1, "source": [ "import networkx as nx\n", "\n", @@ -26,28 +13,45 @@ "g.add_edge(0, 1)\n", "g.add_edge(0, 2)\n", "g.nodes[0]" - ] + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'value': 1, 'id': 0}" + ] + }, + "metadata": {}, + "execution_count": 1 + } + ], + "metadata": { + "collapsed": true + } } ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "name": "python3", + "display_name": "Python 3.8.5 64-bit" }, "language_info": { + "name": "python", + "version": "3.8.5", + "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", + "pygments_lexer": "ipython3", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "file_extension": ".py" + }, + "interpreter": { + "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 2 } \ No newline at end of file diff --git a/examples/parse/parse.ipynb b/examples/parse/parse.ipynb new file mode 100644 index 00000000..94272b65 --- /dev/null +++ b/examples/parse/parse.ipynb @@ -0,0 +1,145 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# parse" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 导入类" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "from EduNLP.Formula.ast import str2ast, katex_parse\r\n", + "from EduNLP.SIF.parser import Parser" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 输入\n", + "\n", + "类型:str \n", + "内容:题目文本 (text)" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "text1 = '生产某种零件的A工厂25名工人的日加工零件数_ _'\r\n", + "text2 = 'X的分布列为( )'\r\n", + "text3 = '① AB是⊙O的直径,AC是⊙O的切线,BC交⊙O于点E.AC的中点为D'\r\n", + "text4 = '支持公式如$\\\\frac{y}{x}$,$\\\\SIFBlank$,$\\\\FigureID{1}$,不支持公式如$\\\\frac{ \\\\dddot y}{x}$'" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 输出" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "### 尝试转换为标准形式" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "text_parser1 = Parser(text1)\r\n", + "text_parser1.description_list()\r\n", + "print('text_parser1.text:',text_parser1.text)\r\n", + "\r\n", + "\r\n", + "text_parser2 = Parser(text2)\r\n", + "text_parser2.description_list()\r\n", + "print('text_parser2.text:',text_parser2.text)\r\n" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "text_parser1.text: 生产某种零件的$A$工厂$25$名工人的日加工零件数$\\SIFBlank$\n", + "text_parser2.text: $X$的分布列为$\\SIFChoice$\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "### 判断是否有语法问题" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "text_parser3 = Parser(text3)\r\n", + "text_parser3.description_list()\r\n", + "print('text_parser3.error_flag: ',text_parser3.error_flag)\r\n", + "\r\n", + "\r\n", + "text_parser4 = Parser(text4)\r\n", + "text_parser4.description_list()\r\n", + "print('text_parser4.fomula_illegal_flag: ',text_parser4.fomula_illegal_flag)\r\n" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "text_parser3.error_flag: 1\n", + "text_parser4.fomula_illegal_flag: 1\n" + ] + } + ], + "metadata": {} + } + ], + "metadata": { + "orig_nbformat": 4, + "language_info": { + "name": "python", + "version": "3.6.3", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.6.3 64-bit" + }, + "interpreter": { + "hash": "6f23ddf1f0697a8f0c43dd2435bdb82528077c79e9967f824fba6a3b52b05faf" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/pretrain/gensim/d2v_bow_tfidf.ipynb b/examples/pretrain/gensim/d2v_bow_tfidf.ipynb index 66a77fa6..154279dc 100644 --- a/examples/pretrain/gensim/d2v_bow_tfidf.ipynb +++ b/examples/pretrain/gensim/d2v_bow_tfidf.ipynb @@ -3,7 +3,14 @@ { "cell_type": "markdown", "source": [ - "# 1. load and tokenize test_items" + "# d2v_bow_tfidf" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 1. load and tokenize test_items" ], "metadata": {} }, @@ -158,7 +165,7 @@ { "cell_type": "markdown", "source": [ - "# 2. train and test model by 'bow'" + "## 2. train and test model by 'bow'" ], "metadata": { "pycharm": { @@ -226,7 +233,7 @@ { "cell_type": "markdown", "source": [ - "# 3. train and test model by 'tfidf'" + "## 3. train and test model by 'tfidf'" ], "metadata": {} }, @@ -321,4 +328,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/examples/pretrain/gensim/d2v_general.ipynb b/examples/pretrain/gensim/d2v_general.ipynb index 67ac5a8e..d1d8605a 100644 --- a/examples/pretrain/gensim/d2v_general.ipynb +++ b/examples/pretrain/gensim/d2v_general.ipynb @@ -3,7 +3,14 @@ { "cell_type": "markdown", "source": [ - "# 1. Get token example from item\n", + "# d2v_general" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 1. Get token example from item\r\n", "> Notes: use geneal('linear') tokenizition method, which means do not parse formulas" ], "metadata": {} @@ -109,7 +116,7 @@ { "cell_type": "markdown", "source": [ - "# 2. Load Model and test item" + "## 2. Load Model and test item" ], "metadata": {} }, diff --git a/examples/pretrain/gensim/d2v_stem_tf.ipynb b/examples/pretrain/gensim/d2v_stem_tf.ipynb index 1a602795..f9d76a1c 100644 --- a/examples/pretrain/gensim/d2v_stem_tf.ipynb +++ b/examples/pretrain/gensim/d2v_stem_tf.ipynb @@ -1,15 +1,42 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "# d2v_stem_tf" + ], + "metadata": {} + }, { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "source": [ + "import json\r\n", + "from tqdm import tqdm\r\n", + "\r\n", + "def load_items():\r\n", + " with open(\"../../../data/OpenLUNA.json\", encoding=\"utf-8\") as f:\r\n", + " for line in f:\r\n", + " yield json.loads(line)\r\n", + "\r\n", + "\r\n", + "from EduNLP.Pretrain import GensimWordTokenizer\r\n", + "\r\n", + "tokenizer = GensimWordTokenizer(symbol=\"gm\")\r\n", + "sif_items = []\r\n", + "for item in tqdm(load_items(), \"sifing\"):\r\n", + " sif_item = tokenizer(\r\n", + " item[\"stem\"]\r\n", + " )\r\n", + " if sif_item:\r\n", + " sif_items.append(sif_item.tokens)\r\n", + "\r\n", + "sif_items[0]" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "d:\\env\\python3.8\\lib\\site-packages\\gensim\\similarities\\__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.\n", " warnings.warn(msg)\n", @@ -17,8 +44,8 @@ ] }, { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "'LaTeX-incompatible input and strict mode is set to \\'warn\\': Unrecognized Unicode character \"①\" (9312) [unknownSymbol]'\n", "'LaTeX-incompatible input and strict mode is set to \\'warn\\': Unrecognized Unicode character \"②\" (9313) [unknownSymbol]'\n", @@ -158,6 +185,7 @@ ] }, { + "output_type": "execute_result", "data": { "text/plain": [ "['已知',\n", @@ -198,87 +226,64 @@ " '=']" ] }, - "execution_count": 1, "metadata": {}, - "output_type": "execute_result" + "execution_count": 1 } ], - "source": [ - "import json\r\n", - "from tqdm import tqdm\r\n", - "\r\n", - "def load_items():\r\n", - " with open(\"../../../data/OpenLUNA.json\", encoding=\"utf-8\") as f:\r\n", - " for line in f:\r\n", - " yield json.loads(line)\r\n", - "\r\n", - "\r\n", - "from EduNLP.Pretrain import GensimWordTokenizer\r\n", - "\r\n", - "tokenizer = GensimWordTokenizer(symbol=\"gm\")\r\n", - "sif_items = []\r\n", - "for item in tqdm(load_items(), \"sifing\"):\r\n", - " sif_item = tokenizer(\r\n", - " item[\"stem\"]\r\n", - " )\r\n", - " if sif_item:\r\n", - " sif_items.append(sif_item.tokens)\r\n", - "\r\n", - "sif_items[0]" - ] + "metadata": { + "collapsed": true + } }, { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, + "source": [ + "len(sif_items)" + ], "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "788" ] }, - "execution_count": 2, "metadata": {}, - "output_type": "execute_result" + "execution_count": 2 } ], - "source": [ - "len(sif_items)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - }, - "outputs": [], - "source": [ - "from EduNLP.Pretrain import train_vector" - ] + } }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, + "source": [ + "from EduNLP.Pretrain import train_vector" + ], + "outputs": [], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - }, + } + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "# 10 dimension with fasstext method\r\n", + "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_tf_\", 10, method=\"d2v\")" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "EduNLP, INFO Epoch #0: loss-0.0000 \n", "EduNLP, INFO Epoch #1: loss-0.0000 \n", @@ -294,32 +299,35 @@ ] }, { + "output_type": "execute_result", "data": { "text/plain": [ "'../../../data/w2v/gensim_luna_stem_tf_d2v_10.bin'" ] }, - "execution_count": 4, "metadata": {}, - "output_type": "execute_result" + "execution_count": 4 } ], - "source": [ - "# 10 dimension with fasstext method\n", - "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_tf_\", 10, method=\"d2v\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - }, + } + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "from EduNLP.Vector import D2V\r\n", + "\r\n", + "d2v = D2V(\"../../../data/w2v/gensim_luna_stem_tf_d2v_10.bin\")\r\n", + "d2v(sif_items[0])" + ], "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ "array([-0.16680606, -0.04633714, 0.05006265, 0.2665265 , -0.04968905,\n", @@ -327,17 +335,16 @@ " dtype=float32)" ] }, - "execution_count": 5, "metadata": {}, - "output_type": "execute_result" + "execution_count": 5 } ], - "source": [ - "from EduNLP.Vector import D2V\n", - "\n", - "d2v = D2V(\"../../../data/w2v/gensim_luna_stem_tf_d2v_10.bin\")\n", - "d2v(sif_items[0])" - ] + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } } ], "metadata": { @@ -354,5 +361,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "nbformat_minor": 2 +} diff --git a/examples/pretrain/gensim/w2v_stem_text.ipynb b/examples/pretrain/gensim/w2v_stem_text.ipynb index 01a38b20..3c9b6ca9 100644 --- a/examples/pretrain/gensim/w2v_stem_text.ipynb +++ b/examples/pretrain/gensim/w2v_stem_text.ipynb @@ -1,15 +1,38 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "# w2v_stem_text" + ], + "metadata": {} + }, { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "source": [ + "import json\r\n", + "from tqdm import tqdm\r\n", + "\r\n", + "def load_items():\r\n", + " with open(\"../../../data/OpenLUNA.json\", encoding=\"utf-8\") as f:\r\n", + " for line in f:\r\n", + " yield json.loads(line)\r\n", + "\r\n", + "from EduNLP.Pretrain import train_vector, GensimWordTokenizer\r\n", + "\r\n", + "tokenizer = GensimWordTokenizer(symbol=\"fgm\")\r\n", + "\r\n", + "sif_items = [\r\n", + " tokenizer(item[\"stem\"]).tokens for item in tqdm(load_items(), \"sifing\")\r\n", + "]\r\n", + "\r\n", + "sif_items[0]" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "d:\\env\\python3.8\\lib\\site-packages\\gensim\\similarities\\__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.\n", " warnings.warn(msg)\n", @@ -17,50 +40,38 @@ ] }, { + "output_type": "execute_result", "data": { - "text/plain": "['已知', '集合', '[FORMULA]', '[FORMULA]']" + "text/plain": [ + "['已知', '集合', '[FORMULA]', '[FORMULA]']" + ] }, - "execution_count": 1, "metadata": {}, - "output_type": "execute_result" + "execution_count": 1 } ], - "source": [ - "import json\n", - "from tqdm import tqdm\n", - "\n", - "def load_items():\n", - " with open(\"../../../data/OpenLUNA.json\", encoding=\"utf-8\") as f:\n", - " for line in f:\n", - " yield json.loads(line)\n", - "\n", - "from EduNLP.Pretrain import train_vector, GensimWordTokenizer\n", - "\n", - "tokenizer = GensimWordTokenizer(symbol=\"fgm\")\n", - "\n", - "sif_items = [\n", - " tokenizer(item[\"stem\"]).tokens for item in tqdm(load_items(), \"sifing\")\n", - "]\n", - "\n", - "sif_items[0]" - ] + "metadata": { + "collapsed": true + } }, { "cell_type": "code", "execution_count": 2, + "source": [ + "len(sif_items)" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "792" + "text/plain": [ + "792" + ] }, - "execution_count": 2, "metadata": {}, - "output_type": "execute_result" + "execution_count": 2 } ], - "source": [ - "len(sif_items)" - ], "metadata": { "collapsed": false, "pycharm": { @@ -71,10 +82,14 @@ { "cell_type": "code", "execution_count": 3, + "source": [ + "# 100 dimension with skipgram method\r\n", + "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_t_\", 100)" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "EduNLP, INFO Epoch #0: loss-0.0000 \n", "EduNLP, INFO Epoch #1: loss-0.0000 \n", @@ -85,18 +100,16 @@ ] }, { + "output_type": "execute_result", "data": { - "text/plain": "'../../../data/w2v/gensim_luna_stem_t_sg_100.kv'" + "text/plain": [ + "'../../../data/w2v/gensim_luna_stem_t_sg_100.kv'" + ] }, - "execution_count": 3, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], - "source": [ - "# 100 dimension with skipgram method\n", - "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_t_\", 100)" - ], "metadata": { "collapsed": false, "pycharm": { @@ -107,10 +120,14 @@ { "cell_type": "code", "execution_count": 4, + "source": [ + "# 50 dimension with cbow method\r\n", + "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_t_\", 50, method=\"cbow\")" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "EduNLP, INFO Epoch #0: loss-0.0000 \n", "EduNLP, INFO Epoch #1: loss-0.0000 \n", @@ -121,18 +138,16 @@ ] }, { + "output_type": "execute_result", "data": { - "text/plain": "'../../../data/w2v/gensim_luna_stem_t_cbow_50.kv'" + "text/plain": [ + "'../../../data/w2v/gensim_luna_stem_t_cbow_50.kv'" + ] }, - "execution_count": 4, "metadata": {}, - "output_type": "execute_result" + "execution_count": 4 } ], - "source": [ - "# 50 dimension with cbow method\n", - "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_t_\", 50, method=\"cbow\")" - ], "metadata": { "collapsed": false, "pycharm": { @@ -143,10 +158,14 @@ { "cell_type": "code", "execution_count": 5, + "source": [ + "# 10 dimension with fasstext method\r\n", + "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_t_\", 10, method=\"fasttext\")" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "EduNLP, INFO Epoch #0: loss-0.0000 \n", "EduNLP, INFO Epoch #1: loss-0.0000 \n", @@ -157,18 +176,16 @@ ] }, { + "output_type": "execute_result", "data": { - "text/plain": "'../../../data/w2v/gensim_luna_stem_t_fasttext_10.bin'" + "text/plain": [ + "'../../../data/w2v/gensim_luna_stem_t_fasttext_10.bin'" + ] }, - "execution_count": 5, "metadata": {}, - "output_type": "execute_result" + "execution_count": 5 } ], - "source": [ - "# 10 dimension with fasstext method\n", - "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_t_\", 10, method=\"fasttext\")" - ], "metadata": { "collapsed": false, "pycharm": { @@ -179,22 +196,44 @@ { "cell_type": "code", "execution_count": 6, + "source": [ + "from EduNLP.Vector import W2V\r\n", + "\r\n", + "w2v = W2V(\"../../../data/w2v/gensim_luna_stem_t_sg_100.kv\")\r\n", + "w2v[\"[FORMULA]\"]" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "array([-0.16754825, 0.2707899 , 0.01005908, -0.03040857, 0.10938002,\n -0.28348687, 0.19054936, 0.41737646, -0.3885515 , -0.14650987,\n 0.1157743 , -0.2406684 , -0.11294927, 0.12082661, 0.1759571 ,\n 0.17807944, 0.07178611, -0.16182491, -0.18266837, -0.52223957,\n -0.05876796, 0.0450548 , 0.26906556, 0.02253102, 0.1025768 ,\n 0.29827935, -0.441235 , -0.06949052, -0.22638813, -0.10846554,\n -0.05917242, 0.12802479, 0.21151058, -0.4611071 , -0.16157094,\n 0.32488874, 0.36630565, -0.36908495, 0.24223483, -0.3510737 ,\n -0.15079798, 0.10832163, 0.00392658, -0.20019084, 0.18827583,\n -0.17247967, -0.27385622, 0.17878376, 0.05156241, 0.30575123,\n -0.16626868, 0.01431947, 0.05540735, 0.03373449, 0.36685058,\n -0.05511234, 0.09583379, -0.09495933, 0.01121055, 0.18113017,\n 0.29060405, 0.06472825, 0.20568778, -0.02780204, -0.17310621,\n 0.23243082, 0.2480153 , 0.07856195, -0.03825858, 0.10257348,\n -0.02105796, 0.4248383 , 0.03114873, -0.09995517, 0.16022007,\n 0.08843125, 0.06128069, -0.03922344, 0.02587396, 0.03067247,\n 0.1209543 , -0.05948736, -0.25567266, 0.53167033, -0.4149 ,\n 0.08551055, 0.42399153, 0.18317291, 0.12455773, -0.10759205,\n 0.17496923, 0.2781072 , 0.25744784, 0.1921185 , 0.43071204,\n 0.09138201, -0.37603223, -0.07436363, 0.2961049 , 0.02517671],\n dtype=float32)" + "text/plain": [ + "array([-0.16754825, 0.2707899 , 0.01005908, -0.03040857, 0.10938002,\n", + " -0.28348687, 0.19054936, 0.41737646, -0.3885515 , -0.14650987,\n", + " 0.1157743 , -0.2406684 , -0.11294927, 0.12082661, 0.1759571 ,\n", + " 0.17807944, 0.07178611, -0.16182491, -0.18266837, -0.52223957,\n", + " -0.05876796, 0.0450548 , 0.26906556, 0.02253102, 0.1025768 ,\n", + " 0.29827935, -0.441235 , -0.06949052, -0.22638813, -0.10846554,\n", + " -0.05917242, 0.12802479, 0.21151058, -0.4611071 , -0.16157094,\n", + " 0.32488874, 0.36630565, -0.36908495, 0.24223483, -0.3510737 ,\n", + " -0.15079798, 0.10832163, 0.00392658, -0.20019084, 0.18827583,\n", + " -0.17247967, -0.27385622, 0.17878376, 0.05156241, 0.30575123,\n", + " -0.16626868, 0.01431947, 0.05540735, 0.03373449, 0.36685058,\n", + " -0.05511234, 0.09583379, -0.09495933, 0.01121055, 0.18113017,\n", + " 0.29060405, 0.06472825, 0.20568778, -0.02780204, -0.17310621,\n", + " 0.23243082, 0.2480153 , 0.07856195, -0.03825858, 0.10257348,\n", + " -0.02105796, 0.4248383 , 0.03114873, -0.09995517, 0.16022007,\n", + " 0.08843125, 0.06128069, -0.03922344, 0.02587396, 0.03067247,\n", + " 0.1209543 , -0.05948736, -0.25567266, 0.53167033, -0.4149 ,\n", + " 0.08551055, 0.42399153, 0.18317291, 0.12455773, -0.10759205,\n", + " 0.17496923, 0.2781072 , 0.25744784, 0.1921185 , 0.43071204,\n", + " 0.09138201, -0.37603223, -0.07436363, 0.2961049 , 0.02517671],\n", + " dtype=float32)" + ] }, - "execution_count": 6, "metadata": {}, - "output_type": "execute_result" + "execution_count": 6 } ], - "source": [ - "from EduNLP.Vector import W2V\n", - "\n", - "w2v = W2V(\"../../../data/w2v/gensim_luna_stem_t_sg_100.kv\")\n", - "w2v[\"[FORMULA]\"]" - ], "metadata": { "collapsed": false, "pycharm": { @@ -223,5 +262,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "nbformat_minor": 2 +} diff --git a/examples/pretrain/gensim/w2v_stem_tf.ipynb b/examples/pretrain/gensim/w2v_stem_tf.ipynb index 0a549870..4dd90cff 100644 --- a/examples/pretrain/gensim/w2v_stem_tf.ipynb +++ b/examples/pretrain/gensim/w2v_stem_tf.ipynb @@ -1,15 +1,42 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "# w2v_stem_tf" + ], + "metadata": {} + }, { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "source": [ + "import json\r\n", + "from tqdm import tqdm\r\n", + "\r\n", + "def load_items():\r\n", + " with open(\"../../../data/OpenLUNA.json\", encoding=\"utf-8\") as f:\r\n", + " for line in f:\r\n", + " yield json.loads(line)\r\n", + "\r\n", + "\r\n", + "from EduNLP.Pretrain import GensimWordTokenizer\r\n", + "\r\n", + "tokenizer = GensimWordTokenizer(symbol=\"gm\")\r\n", + "sif_items = []\r\n", + "for item in tqdm(load_items(), \"sifing\"):\r\n", + " sif_item = tokenizer(\r\n", + " item[\"stem\"]\r\n", + " )\r\n", + " if sif_item:\r\n", + " sif_items.append(sif_item.tokens)\r\n", + "\r\n", + "sif_items[0]" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "d:\\env\\python3.8\\lib\\site-packages\\gensim\\similarities\\__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.\n", " warnings.warn(msg)\n", @@ -17,8 +44,8 @@ ] }, { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "'LaTeX-incompatible input and strict mode is set to \\'warn\\': Unrecognized Unicode character \"①\" (9312) [unknownSymbol]'\n", "'LaTeX-incompatible input and strict mode is set to \\'warn\\': Unrecognized Unicode character \"②\" (9313) [unknownSymbol]'\n", @@ -158,54 +185,74 @@ ] }, { + "output_type": "execute_result", "data": { - "text/plain": "['埃及',\n '胡夫',\n '金字塔',\n '古代',\n '世界',\n '建筑',\n '奇迹',\n '形状',\n '视为',\n '正四',\n '棱锥',\n '以该',\n '四',\n '棱锥',\n '高为',\n '边长',\n '正方形',\n '面积',\n '等于',\n '四',\n '棱锥',\n '侧面',\n '三角形',\n '面积',\n '侧面',\n '三角形',\n '底边',\n '高',\n '底面',\n '正方形',\n '边长',\n '比值',\n '[FIGURE]',\n '[FIGURE]',\n '[FIGURE]',\n '[FIGURE]',\n '[FIGURE]']" + "text/plain": [ + "['埃及',\n", + " '胡夫',\n", + " '金字塔',\n", + " '古代',\n", + " '世界',\n", + " '建筑',\n", + " '奇迹',\n", + " '形状',\n", + " '视为',\n", + " '正四',\n", + " '棱锥',\n", + " '以该',\n", + " '四',\n", + " '棱锥',\n", + " '高为',\n", + " '边长',\n", + " '正方形',\n", + " '面积',\n", + " '等于',\n", + " '四',\n", + " '棱锥',\n", + " '侧面',\n", + " '三角形',\n", + " '面积',\n", + " '侧面',\n", + " '三角形',\n", + " '底边',\n", + " '高',\n", + " '底面',\n", + " '正方形',\n", + " '边长',\n", + " '比值',\n", + " '[FIGURE]',\n", + " '[FIGURE]',\n", + " '[FIGURE]',\n", + " '[FIGURE]',\n", + " '[FIGURE]']" + ] }, - "execution_count": 1, "metadata": {}, - "output_type": "execute_result" + "execution_count": 1 } ], - "source": [ - "import json\n", - "from tqdm import tqdm\n", - "\n", - "def load_items():\n", - " with open(\"../../../data/OpenLUNA.json\", encoding=\"utf-8\") as f:\n", - " for line in f:\n", - " yield json.loads(line)\n", - "\n", - "\n", - "from EduNLP.Pretrain import GensimWordTokenizer\n", - "\n", - "tokenizer = GensimWordTokenizer(symbol=\"gm\")\n", - "sif_items = []\n", - "for item in tqdm(load_items(), \"sifing\"):\n", - " sif_item = tokenizer(\n", - " item[\"stem\"]\n", - " )\n", - " if sif_item:\n", - " sif_items.append(sif_item.tokens)\n", - "\n", - "sif_items[0]" - ] + "metadata": { + "collapsed": true + } }, { "cell_type": "code", "execution_count": 3, + "source": [ + "len(sif_items)" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "792" + "text/plain": [ + "792" + ] }, - "execution_count": 3, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], - "source": [ - "len(sif_items)" - ], "metadata": { "collapsed": false, "pycharm": { @@ -216,10 +263,10 @@ { "cell_type": "code", "execution_count": 4, - "outputs": [], "source": [ "from EduNLP.Pretrain import train_vector" ], + "outputs": [], "metadata": { "collapsed": false, "pycharm": { @@ -230,10 +277,14 @@ { "cell_type": "code", "execution_count": 5, + "source": [ + "# 100 dimension with skipgram method\r\n", + "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_tf_\", 100)" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "EduNLP, INFO Epoch #0: loss-0.0000 \n", "EduNLP, INFO Epoch #1: loss-0.0000 \n", @@ -244,18 +295,16 @@ ] }, { + "output_type": "execute_result", "data": { - "text/plain": "'../../../data/w2v/gensim_luna_stem_tf_sg_100.kv'" + "text/plain": [ + "'../../../data/w2v/gensim_luna_stem_tf_sg_100.kv'" + ] }, - "execution_count": 5, "metadata": {}, - "output_type": "execute_result" + "execution_count": 5 } ], - "source": [ - "# 100 dimension with skipgram method\n", - "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_tf_\", 100)" - ], "metadata": { "collapsed": false, "pycharm": { @@ -266,10 +315,14 @@ { "cell_type": "code", "execution_count": 6, + "source": [ + "# 50 dimension with cbow method\r\n", + "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_tf_\", 50, method=\"cbow\")" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "EduNLP, INFO Epoch #0: loss-0.0000 \n", "EduNLP, INFO Epoch #1: loss-0.0000 \n", @@ -280,18 +333,16 @@ ] }, { + "output_type": "execute_result", "data": { - "text/plain": "'../../../data/w2v/gensim_luna_stem_tf_cbow_50.kv'" + "text/plain": [ + "'../../../data/w2v/gensim_luna_stem_tf_cbow_50.kv'" + ] }, - "execution_count": 6, "metadata": {}, - "output_type": "execute_result" + "execution_count": 6 } ], - "source": [ - "# 50 dimension with cbow method\n", - "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_tf_\", 50, method=\"cbow\")" - ], "metadata": { "collapsed": false, "pycharm": { @@ -302,10 +353,14 @@ { "cell_type": "code", "execution_count": 7, + "source": [ + "# 10 dimension with fasstext method\r\n", + "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_tf_\", 10, method=\"fasttext\")" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "EduNLP, INFO Epoch #0: loss-0.0000 \n", "EduNLP, INFO Epoch #1: loss-0.0000 \n", @@ -316,18 +371,16 @@ ] }, { + "output_type": "execute_result", "data": { - "text/plain": "'../../../data/w2v/gensim_luna_stem_tf_fasttext_10.bin'" + "text/plain": [ + "'../../../data/w2v/gensim_luna_stem_tf_fasttext_10.bin'" + ] }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "execution_count": 7 } ], - "source": [ - "# 10 dimension with fasstext method\n", - "train_vector(sif_items, \"../../../data/w2v/gensim_luna_stem_tf_\", 10, method=\"fasttext\")" - ], "metadata": { "collapsed": false, "pycharm": { @@ -338,22 +391,26 @@ { "cell_type": "code", "execution_count": 8, + "source": [ + "from EduNLP.Vector import W2V\n", + "\n", + "w2v = W2V(\"../../../data/w2v/gensim_luna_stem_tf_fasttext_10.bin\", method=\"fasttext\")\n", + "w2v[\"[FIGURE]\"]" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "array([ 0.3322667 , -0.701586 , -0.6528301 , -0.02556002, 0.44070247,\n 0.44261315, 0.54466563, 0.8991576 , -1.0600986 , 0.19438864],\n dtype=float32)" + "text/plain": [ + "array([ 0.3322667 , -0.701586 , -0.6528301 , -0.02556002, 0.44070247,\n", + " 0.44261315, 0.54466563, 0.8991576 , -1.0600986 , 0.19438864],\n", + " dtype=float32)" + ] }, - "execution_count": 8, "metadata": {}, - "output_type": "execute_result" + "execution_count": 8 } ], - "source": [ - "from EduNLP.Vector import W2V\n", - "\n", - "w2v = W2V(\"../../../data/w2v/gensim_luna_stem_tf_fasttext_10.bin\", method=\"fasttext\")\n", - "w2v[\"[FIGURE]\"]" - ], "metadata": { "collapsed": false, "pycharm": { @@ -382,5 +439,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "nbformat_minor": 2 +} diff --git a/examples/pretrain/prepare_dataset.ipynb b/examples/pretrain/prepare_dataset.ipynb index 0f1acb0e..d33f39b3 100644 --- a/examples/pretrain/prepare_dataset.ipynb +++ b/examples/pretrain/prepare_dataset.ipynb @@ -1,40 +1,49 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "# prepare_dataset" + ], + "metadata": {} + }, { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "source": [ + "from EduData import get_data\r\n", + "\r\n", + "get_data(\"open-luna\", \"../../data/\")\r\n" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "downloader, INFO http://base.ustc.edu.cn/data/OpenLUNA/OpenLUNA.json is saved as ..\\..\\data\\OpenLUNA.json\n" ] }, { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "Downloading ..\\..\\data\\OpenLUNA.json 100.00%: 275142 | 275142\n" ] }, { + "output_type": "execute_result", "data": { - "text/plain": "'../../data/'" + "text/plain": [ + "'../../data/'" + ] }, - "execution_count": 1, "metadata": {}, - "output_type": "execute_result" + "execution_count": 1 } ], - "source": [ - "from EduData import get_data\n", - "\n", - "get_data(\"open-luna\", \"../../data/\")\n" - ] + "metadata": { + "collapsed": true + } } ], "metadata": { @@ -57,5 +66,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "nbformat_minor": 2 +} diff --git a/examples/pretrain/seg_token/d2v.ipynb b/examples/pretrain/seg_token/d2v.ipynb index 12ee5a99..909d7846 100644 --- a/examples/pretrain/seg_token/d2v.ipynb +++ b/examples/pretrain/seg_token/d2v.ipynb @@ -1,15 +1,55 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "# d2v" + ], + "metadata": {} + }, { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "source": [ + "import warnings\r\n", + "from tqdm import tqdm\r\n", + "import json\r\n", + "from EduNLP.utils import dict2str4sif\r\n", + "\r\n", + "def load_items():\r\n", + " with open(\"../../../data/OpenLUNA.json\", encoding=\"utf-8\") as f:\r\n", + " for line in f:\r\n", + " yield json.loads(line)\r\n", + "\r\n", + "from EduNLP.Pretrain import GensimSegTokenizer\r\n", + "\r\n", + "tokenizer = GensimSegTokenizer(depth=None)\r\n", + "sif_items = []\r\n", + "for item in tqdm(load_items(), \"sifing\"):\r\n", + " keys = [\"stem\"]\r\n", + " item[\"options\"] = eval(item[\"options\"])\r\n", + " if item[\"options\"]:\r\n", + " keys.append(\"options\")\r\n", + " try:\r\n", + " item_str = dict2str4sif(\r\n", + " item,\r\n", + " key_as_tag=True,\r\n", + " add_list_no_tag=False,\r\n", + " keys=keys,\r\n", + " tag_mode=\"head\"\r\n", + " )\r\n", + " except TypeError:\r\n", + " continue\r\n", + " sif_item = tokenizer(\r\n", + " item_str\r\n", + " )\r\n", + " if sif_item:\r\n", + " sif_items.append(sif_item)" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "d:\\env\\python3.8\\lib\\site-packages\\gensim\\similarities\\__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.\n", " warnings.warn(msg)\n", @@ -17,8 +57,8 @@ ] }, { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "'LaTeX-incompatible input and strict mode is set to \\'warn\\': Unrecognized Unicode character \"①\" (9312) [unknownSymbol]'\n", "'LaTeX-incompatible input and strict mode is set to \\'warn\\': Unrecognized Unicode character \"②\" (9313) [unknownSymbol]'\n", @@ -190,59 +230,65 @@ ] } ], - "source": [ - "import warnings\n", - "from tqdm import tqdm\n", - "import json\n", - "from EduNLP.utils import dict2str4sif\n", - "\n", - "def load_items():\n", - " with open(\"../../../data/OpenLUNA.json\", encoding=\"utf-8\") as f:\n", - " for line in f:\n", - " yield json.loads(line)\n", - "\n", - "from EduNLP.Pretrain import GensimSegTokenizer\n", - "\n", - "tokenizer = GensimSegTokenizer(depth=None)\n", - "sif_items = []\n", - "for item in tqdm(load_items(), \"sifing\"):\n", - " keys = [\"stem\"]\n", - " item[\"options\"] = eval(item[\"options\"])\n", - " if item[\"options\"]:\n", - " keys.append(\"options\")\n", - " try:\n", - " item_str = dict2str4sif(\n", - " item,\n", - " key_as_tag=True,\n", - " add_list_no_tag=False,\n", - " keys=keys,\n", - " tag_mode=\"head\"\n", - " )\n", - " except TypeError:\n", - " continue\n", - " sif_item = tokenizer(\n", - " item_str\n", - " )\n", - " if sif_item:\n", - " sif_items.append(sif_item)" - ] + "metadata": { + "collapsed": true + } }, { "cell_type": "code", "execution_count": 2, + "source": [ + "sif_items[0]" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "[['\\\\SIFTag{stem}'],\n ['已知', '集合'],\n ['mathord',\n '=',\n 'mathord',\n '\\\\mid',\n 'mathord',\n 'textord',\n '{ }',\n '^',\n '-',\n 'textord',\n 'mathord',\n '-',\n 'textord',\n '<',\n 'textord',\n '\\\\{',\n ',',\n 'mathord',\n '=',\n '\\\\{',\n '-',\n 'textord',\n ',',\n 'textord',\n ',',\n 'textord',\n ',',\n 'textord',\n '\\\\}',\n ','],\n ['mathord', '\\\\cap', 'mathord', '='],\n ['\\\\SIFTag{options}'],\n ['\\\\', '{', '\\\\'],\n ['\\\\', '{', '\\\\'],\n ['\\\\', '{', '\\\\'],\n ['\\\\', '{', '\\\\']]" + "text/plain": [ + "[['\\\\SIFTag{stem}'],\n", + " ['已知', '集合'],\n", + " ['mathord',\n", + " '=',\n", + " 'mathord',\n", + " '\\\\mid',\n", + " 'mathord',\n", + " 'textord',\n", + " '{ }',\n", + " '^',\n", + " '-',\n", + " 'textord',\n", + " 'mathord',\n", + " '-',\n", + " 'textord',\n", + " '<',\n", + " 'textord',\n", + " '\\\\{',\n", + " ',',\n", + " 'mathord',\n", + " '=',\n", + " '\\\\{',\n", + " '-',\n", + " 'textord',\n", + " ',',\n", + " 'textord',\n", + " ',',\n", + " 'textord',\n", + " ',',\n", + " 'textord',\n", + " '\\\\}',\n", + " ','],\n", + " ['mathord', '\\\\cap', 'mathord', '='],\n", + " ['\\\\SIFTag{options}'],\n", + " ['\\\\', '{', '\\\\'],\n", + " ['\\\\', '{', '\\\\'],\n", + " ['\\\\', '{', '\\\\'],\n", + " ['\\\\', '{', '\\\\']]" + ] }, - "execution_count": 2, "metadata": {}, - "output_type": "execute_result" + "execution_count": 2 } ], - "source": [ - "sif_items[0]" - ], "metadata": { "collapsed": false, "pycharm": { @@ -253,19 +299,21 @@ { "cell_type": "code", "execution_count": 3, + "source": [ + "len(sif_items)" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "770" + "text/plain": [ + "770" + ] }, - "execution_count": 3, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], - "source": [ - "len(sif_items)" - ], "metadata": { "collapsed": false, "pycharm": { @@ -276,17 +324,17 @@ { "cell_type": "code", "execution_count": null, - "outputs": [], "source": [ - "from EduNLP.Pretrain import train_vector\n", - "from gensim.models.doc2vec import TaggedDocument\n", - "\n", - "train_vector(\n", - " sif_items,\n", - " \"../../../data/w2v/gensim_luna_stem_tf_\",\n", - " 10\n", + "from EduNLP.Pretrain import train_vector\r\n", + "from gensim.models.doc2vec import TaggedDocument\r\n", + "\r\n", + "train_vector(\r\n", + " sif_items,\r\n", + " \"../../../data/w2v/gensim_luna_stem_tf_\",\r\n", + " 10\r\n", ")" ], + "outputs": [], "metadata": { "collapsed": false, "pycharm": { @@ -315,5 +363,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "nbformat_minor": 2 +} diff --git a/examples/pretrain/seg_token/d2v_d1.ipynb b/examples/pretrain/seg_token/d2v_d1.ipynb index 0e7047b2..49711e88 100644 --- a/examples/pretrain/seg_token/d2v_d1.ipynb +++ b/examples/pretrain/seg_token/d2v_d1.ipynb @@ -1,15 +1,54 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "# d2v_d1" + ], + "metadata": {} + }, { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": true - }, + "source": [ + "from tqdm import tqdm\r\n", + "import json\r\n", + "from EduNLP.utils import dict2str4sif\r\n", + "\r\n", + "def load_items():\r\n", + " with open(\"../../../data/OpenLUNA.json\", encoding=\"utf-8\") as f:\r\n", + " for line in f:\r\n", + " yield json.loads(line)\r\n", + "\r\n", + "from EduNLP.Pretrain import GensimSegTokenizer\r\n", + "\r\n", + "tokenizer = GensimSegTokenizer(depth=1)\r\n", + "sif_items = []\r\n", + "for item in tqdm(load_items(), \"sifing\"):\r\n", + " keys = [\"stem\"]\r\n", + " item[\"options\"] = eval(item[\"options\"])\r\n", + " if item[\"options\"]:\r\n", + " keys.append(\"options\")\r\n", + " try:\r\n", + " item_str = dict2str4sif(\r\n", + " item,\r\n", + " key_as_tag=True,\r\n", + " add_list_no_tag=False,\r\n", + " keys=keys,\r\n", + " tag_mode=\"head\"\r\n", + " )\r\n", + " except TypeError:\r\n", + " continue\r\n", + " sif_item = tokenizer(\r\n", + " item_str\r\n", + " )\r\n", + " if sif_item:\r\n", + " sif_items.append(sif_item)" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "d:\\env\\python3.8\\lib\\site-packages\\gensim\\similarities\\__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.\n", " warnings.warn(msg)\n", @@ -17,8 +56,8 @@ ] }, { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "'LaTeX-incompatible input and strict mode is set to \\'warn\\': Unrecognized Unicode character \"①\" (9312) [unknownSymbol]'\n", "'LaTeX-incompatible input and strict mode is set to \\'warn\\': Unrecognized Unicode character \"②\" (9313) [unknownSymbol]'\n", @@ -190,58 +229,51 @@ ] } ], - "source": [ - "from tqdm import tqdm\n", - "import json\n", - "from EduNLP.utils import dict2str4sif\n", - "\n", - "def load_items():\n", - " with open(\"../../../data/OpenLUNA.json\", encoding=\"utf-8\") as f:\n", - " for line in f:\n", - " yield json.loads(line)\n", - "\n", - "from EduNLP.Pretrain import GensimSegTokenizer\n", - "\n", - "tokenizer = GensimSegTokenizer(depth=1)\n", - "sif_items = []\n", - "for item in tqdm(load_items(), \"sifing\"):\n", - " keys = [\"stem\"]\n", - " item[\"options\"] = eval(item[\"options\"])\n", - " if item[\"options\"]:\n", - " keys.append(\"options\")\n", - " try:\n", - " item_str = dict2str4sif(\n", - " item,\n", - " key_as_tag=True,\n", - " add_list_no_tag=False,\n", - " keys=keys,\n", - " tag_mode=\"head\"\n", - " )\n", - " except TypeError:\n", - " continue\n", - " sif_item = tokenizer(\n", - " item_str\n", - " )\n", - " if sif_item:\n", - " sif_items.append(sif_item)" - ] + "metadata": { + "collapsed": true + } }, { "cell_type": "code", "execution_count": 3, + "source": [ + "sif_items[1]" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "[['\\\\SIFTag{stem}'],\n ['[TEXT_BEGIN]', '复数'],\n ['[FORMULA_BEGIN]',\n 'mathord',\n '=',\n 'textord',\n '+',\n 'textord',\n 'mathord',\n '+',\n 'mathord',\n 'textord',\n '{ }',\n '^'],\n ['[TEXT_BEGIN]'],\n ['[FORMULA_BEGIN]', 'textord', 'mathord', 'textord', '='],\n ['\\\\SIFTag{options}'],\n ['[TEXT_BEGIN]'],\n ['\\\\SIFSep'],\n ['[TEXT_BEGIN]'],\n ['\\\\SIFSep'],\n ['[FORMULA_BEGIN]', 'textord', '{ }', '\\\\sqrt'],\n ['\\\\SIFSep'],\n ['[TEXT_BEGIN]']]" + "text/plain": [ + "[['\\\\SIFTag{stem}'],\n", + " ['[TEXT_BEGIN]', '复数'],\n", + " ['[FORMULA_BEGIN]',\n", + " 'mathord',\n", + " '=',\n", + " 'textord',\n", + " '+',\n", + " 'textord',\n", + " 'mathord',\n", + " '+',\n", + " 'mathord',\n", + " 'textord',\n", + " '{ }',\n", + " '^'],\n", + " ['[TEXT_BEGIN]'],\n", + " ['[FORMULA_BEGIN]', 'textord', 'mathord', 'textord', '='],\n", + " ['\\\\SIFTag{options}'],\n", + " ['[TEXT_BEGIN]'],\n", + " ['\\\\SIFSep'],\n", + " ['[TEXT_BEGIN]'],\n", + " ['\\\\SIFSep'],\n", + " ['[FORMULA_BEGIN]', 'textord', '{ }', '\\\\sqrt'],\n", + " ['\\\\SIFSep'],\n", + " ['[TEXT_BEGIN]']]" + ] }, - "execution_count": 3, "metadata": {}, - "output_type": "execute_result" + "execution_count": 3 } ], - "source": [ - "sif_items[1]" - ], "metadata": { "collapsed": false, "pycharm": { @@ -252,19 +284,21 @@ { "cell_type": "code", "execution_count": 4, + "source": [ + "len(sif_items)" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "770" + "text/plain": [ + "770" + ] }, - "execution_count": 4, "metadata": {}, - "output_type": "execute_result" + "execution_count": 4 } ], - "source": [ - "len(sif_items)" - ], "metadata": { "collapsed": false, "pycharm": { @@ -293,5 +327,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "nbformat_minor": 2 +} diff --git a/examples/pretrain/seg_token/d2v_d2.ipynb b/examples/pretrain/seg_token/d2v_d2.ipynb index 6ecc5216..076c1e95 100644 --- a/examples/pretrain/seg_token/d2v_d2.ipynb +++ b/examples/pretrain/seg_token/d2v_d2.ipynb @@ -1,15 +1,54 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "# d2v_d2" + ], + "metadata": {} + }, { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "source": [ + "from tqdm import tqdm\r\n", + "import json\r\n", + "from EduNLP.utils import dict2str4sif\r\n", + "\r\n", + "def load_items():\r\n", + " with open(\"../../../data/OpenLUNA.json\", encoding=\"utf-8\") as f:\r\n", + " for line in f:\r\n", + " yield json.loads(line)\r\n", + "\r\n", + "from EduNLP.Pretrain import GensimSegTokenizer\r\n", + "\r\n", + "tokenizer = GensimSegTokenizer(depth=2)\r\n", + "sif_items = []\r\n", + "for item in tqdm(load_items(), \"sifing\"):\r\n", + " keys = [\"stem\"]\r\n", + " item[\"options\"] = eval(item[\"options\"])\r\n", + " if item[\"options\"]:\r\n", + " keys.append(\"options\")\r\n", + " try:\r\n", + " item_str = dict2str4sif(\r\n", + " item,\r\n", + " key_as_tag=True,\r\n", + " add_list_no_tag=False,\r\n", + " keys=keys,\r\n", + " tag_mode=\"head\"\r\n", + " )\r\n", + " except TypeError:\r\n", + " continue\r\n", + " sif_item = tokenizer(\r\n", + " item_str\r\n", + " )\r\n", + " if sif_item:\r\n", + " sif_items.append(sif_item)" + ], "outputs": [ { - "name": "stderr", "output_type": "stream", + "name": "stderr", "text": [ "d:\\env\\python3.8\\lib\\site-packages\\gensim\\similarities\\__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.\n", " warnings.warn(msg)\n", @@ -17,8 +56,8 @@ ] }, { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "'LaTeX-incompatible input and strict mode is set to \\'warn\\': Unrecognized Unicode character \"①\" (9312) [unknownSymbol]'\n", "'LaTeX-incompatible input and strict mode is set to \\'warn\\': Unrecognized Unicode character \"②\" (9313) [unknownSymbol]'\n", @@ -190,58 +229,51 @@ ] } ], - "source": [ - "from tqdm import tqdm\n", - "import json\n", - "from EduNLP.utils import dict2str4sif\n", - "\n", - "def load_items():\n", - " with open(\"../../../data/OpenLUNA.json\", encoding=\"utf-8\") as f:\n", - " for line in f:\n", - " yield json.loads(line)\n", - "\n", - "from EduNLP.Pretrain import GensimSegTokenizer\n", - "\n", - "tokenizer = GensimSegTokenizer(depth=2)\n", - "sif_items = []\n", - "for item in tqdm(load_items(), \"sifing\"):\n", - " keys = [\"stem\"]\n", - " item[\"options\"] = eval(item[\"options\"])\n", - " if item[\"options\"]:\n", - " keys.append(\"options\")\n", - " try:\n", - " item_str = dict2str4sif(\n", - " item,\n", - " key_as_tag=True,\n", - " add_list_no_tag=False,\n", - " keys=keys,\n", - " tag_mode=\"head\"\n", - " )\n", - " except TypeError:\n", - " continue\n", - " sif_item = tokenizer(\n", - " item_str\n", - " )\n", - " if sif_item:\n", - " sif_items.append(sif_item)" - ] + "metadata": { + "collapsed": true + } }, { "cell_type": "code", "execution_count": 2, + "source": [ + "sif_items[1]" + ], "outputs": [ { + "output_type": "execute_result", "data": { - "text/plain": "[['\\\\SIFTag{stem}'],\n ['[TEXT_BEGIN]', '复数'],\n ['[FORMULA_BEGIN]',\n 'mathord',\n '=',\n 'textord',\n '+',\n 'textord',\n 'mathord',\n '+',\n 'mathord',\n 'textord',\n '{ }',\n '^'],\n ['[TEXT_BEGIN]'],\n ['[FORMULA_BEGIN]', 'textord', 'mathord', 'textord', '='],\n ['\\\\SIFTag{options}'],\n ['[TEXT_BEGIN]'],\n ['\\\\SIFSep'],\n ['[TEXT_BEGIN]'],\n ['\\\\SIFSep'],\n ['[FORMULA_BEGIN]', 'textord', '{ }', '\\\\sqrt'],\n ['\\\\SIFSep'],\n ['[TEXT_BEGIN]']]" + "text/plain": [ + "[['\\\\SIFTag{stem}'],\n", + " ['[TEXT_BEGIN]', '复数'],\n", + " ['[FORMULA_BEGIN]',\n", + " 'mathord',\n", + " '=',\n", + " 'textord',\n", + " '+',\n", + " 'textord',\n", + " 'mathord',\n", + " '+',\n", + " 'mathord',\n", + " 'textord',\n", + " '{ }',\n", + " '^'],\n", + " ['[TEXT_BEGIN]'],\n", + " ['[FORMULA_BEGIN]', 'textord', 'mathord', 'textord', '='],\n", + " ['\\\\SIFTag{options}'],\n", + " ['[TEXT_BEGIN]'],\n", + " ['\\\\SIFSep'],\n", + " ['[TEXT_BEGIN]'],\n", + " ['\\\\SIFSep'],\n", + " ['[FORMULA_BEGIN]', 'textord', '{ }', '\\\\sqrt'],\n", + " ['\\\\SIFSep'],\n", + " ['[TEXT_BEGIN]']]" + ] }, - "execution_count": 2, "metadata": {}, - "output_type": "execute_result" + "execution_count": 2 } ], - "source": [ - "sif_items[1]" - ], "metadata": { "collapsed": false, "pycharm": { @@ -270,5 +302,5 @@ } }, "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "nbformat_minor": 2 +} diff --git a/examples/seg/seg.ipynb b/examples/seg/seg.ipynb new file mode 100644 index 00000000..751d1439 --- /dev/null +++ b/examples/seg/seg.ipynb @@ -0,0 +1,287 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Code for beginner to learn how to use seg\r\n", + "\r\n", + "In this notebook, we will show you the basic usage to apply SIF to prepare data for conducting scientific experiments.\r\n", + "\r\n", + "We use the demo item (an exercise from LUNA) shown in the following Figure.\r\n", + "![Figure](../../asset/_static/item.png).\r\n", + "The SIF expression of this item can be written as follows:" + ], + "metadata": { + "collapsed": true, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 1, + "source": [ + "item = {\r\n", + " \"stem\": r\"如图来自古希腊数学家希波克拉底所研究的几何图形.此图由三个半圆构成,三个半圆的直径分别为直角三角形$ABC$的斜边$BC$, 直角边$AB$, $AC$.$\\bigtriangleup ABC$的三边所围成的区域记为$I$,黑色部分记为$II$, 其余部分记为$III$.在整个图形中随机取一点,此点取自$I,II,III$的概率分别记为$p_1,p_2,p_3$,则$\\SIFChoice$$\\FigureID{1}$\",\r\n", + " \"options\": [\"$p_1=p_2$\", \"$p_1=p_3$\", \"$p_2=p_3$\", \"$p_1=p_2+p_3$\"]\r\n", + "}\r\n", + "item[\"stem\"]" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "'如图来自古希腊数学家希波克拉底所研究的几何图形.此图由三个半圆构成,三个半圆的直径分别为直角三角形$ABC$的斜边$BC$, 直角边$AB$, $AC$.$\\\\bigtriangleup ABC$的三边所围成的区域记为$I$,黑色部分记为$II$, 其余部分记为$III$.在整个图形中随机取一点,此点取自$I,II,III$的概率分别记为$p_1,p_2,p_3$,则$\\\\SIFChoice$$\\\\FigureID{1}$'" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "from PIL import Image\r\n", + "img = Image.open(\"../../asset/_static/item_figure.png\")\r\n", + "figures = {\"1\": img}\r\n", + "img" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOYAAACICAYAAADzlJeRAAAe3klEQVR4nO2df1RT9/3/nyE/SWJyBdSUOgs72mKP1XhK24j2CGon4FagbtTMtpZt6rDzR7rjKt+2C+rnHGxdBbr2xB9zgc4j1NMJbpXg2hlsp2DnjrSHVaqcgquWbFqNLUgQkvf3DwgiSSA/7uUm8f04530O3Ny87zu5ed7X+8fr/XoJCCEEFAolrIjhuwEUCsUTKkwKJQyhwqRQwhAR3w2IBJqamuBwOHy+rtFokJKSMo4tokQ7VJgAGhoa0NzcDLvdjn//+99oa2uDw+FAa2srAIBhmFHf73K58O233wIAYmNj8cADDwAAZs+ejblz50Kr1UKn00Emk3H6OSjRg+Bum5Vtbm5Gc3MzrFYrPvnkE7S2toJhGPT09KC3t5f160mlUsTGxqKrqwsMw2D69OlYunQpdDodMjMzWb8eJTqIemHa7XbU19fjT3/6E/72t79BoVDA6XSiq6uL13YxDIOuri48/vjjyM/PR2ZmJpKSknhtEyV8iEph2mw21NbW4tChQzh16hSkUulQVzMckcvliImJgUwmQ3Z2NgwGA7RaLd/NovBIVAmzuroar732Gs6fPw8AuHnzJs8tCg61Wg2FQoFXX30VK1asGHOMS4k+Il6YNpsNf/jDH7Bz504ACGvLGChyuRxOpxOLFy/G+vXr6Zj0LiJihdnU1IQ33ngDdXV1cLlcoy5nRAMMw0AkEmHnzp14/vnn+W4OhWMiTpjNzc0oLCxEa2sr7HY7380Zd5RKJWQyGRVolBMxwrTZbFi/fj0++OAD3Lhxg+/m8A4VaHQT9sK02+347W9/i8rKyqgaP7KFUqmESqXCn//8Z+h0Or6bQ2GJsPaV3bdvH5KTk7Fv3z4qSh90dXXh66+/RlZWFjZs2HBXdu+jEhKGdHZ2Ep1OR9RqNQFAi59FJpORSZMmEYvFwuLdqCNlZRdYrI/iD2FnMfft24dZs2ahqamJjiUDxOFw4MqVK9Dr9XjqqadYsJ5tKJ+fjUNsNI4SGHw/Gdxcv36d5OXlUSvJUhGLxWTSpEmksbEx6HtyoSyNACBr6li80RS/CAuL2dDQgPvvvx/vv/8+tZIs0dfXhytXruBHP/oRSkpKAq+grRw78QrK0thvG2VseJ+VLS0txbZt2+ikBYeoVCrMmzcP1dXVfrr3WbB2/nlsPnk/dgr+Dw9eOImN07lrX0NDA4Db+17/+c9/4uuvv/Z67uzZs5GcnAwASEpKuqNEFXya65/97GeEYRjeu313QxGLxSQ5OZmcO3duzPtSt2YNqSOEkAtlJA2Df7NAT08PsVqtxGg0Ep1OR2JjYwkAwjAMYRiGiESigD6TUqkkDMMQpVJJABCtVkuys7OJ0WgMqQsfDvAizOvXr5NHH3106MbQMn4lLi6OHDlyxOe9cY8rb5fQhHnu3Dny8ssvk+TkZCISicbtQcwwDImNjSUZGRnEZDKR9vb2ED7F+DPuwmxvbycpKSlEIpHw/iO9W8uUKVPIO++843lzLpSRNcOXRurWEKSVkUAXSxobG0lhYSFJSEggKpUqYEvIdpHL5USpVJLExERiMpnI9evXQ/kJjwvjKsyzZ8+SxMRE3n+YtIBMnDiRvP3227dvzoUykjZChBfK0vwWZk9PDzGbzSQxMTGshydyuZwoFAqSnZ3N8novu4ybMM+ePUs0Gg3vN4aW2yUuLm5InAPH0siAwbxAytKGn+u7O9vZ2Uk2bdpEJkyYMDTWi5TCMAxJSEggZrN5nFTgP+MizPb2dmopw7TExcWRjRs3BnxPOzs7ycqVK4lcLue9qxpqUSqVYSdQzoXZ3t5OkpKSeP/yafFdGIbxW5w9PT3EaDSS+Ph4IhaLeW87m8Ut0JqaGk414Q+cCpOKMnIKwzBk165do97PqqoqotFoiEKh4L29XBa1Wk10Oh2vM7mcCfP69eskJSWF9y+ZFv/LlClTvC6luDcVqFQq3ts4noVhGGI0GklPTw9XMvEJJ54/DocDjzzyCFpaWtiuOuqRy+WQSCQecW6Tk5OhVqvvODcnJwcA8NFHH+G///0vrl27ht7eXvT19QUdnnPKlCloaGgYiixfX1+PZ555Bt98802QnyiyUSgUiI+Px5EjR8Y1ciEnwnzqqafw/vvvo6+vj+2qowaGYYYiuLtFl5OTM5RuQavVshodr6GhAXa7Hc3Nzaivr4fNZsNXX30FAIiJiUF/f//QuSqVCrt27cJf/vIXfPTRR9RdEkB8fDxKSkqwevXqcbke68Kkvq+eiEQiKJVKdHV1ITU1FUuXLkV6enpY5Dxxi7WjowN//etfceLECXz33XdwuVx3iJUyEFZ00aJFOHjwIPfpLtjsFzc2NpKEhATexwZ8F7frmUgkIjqdjpSUlESU7+bhw4eJRqMharWayOVyIpVKSUxMDO/fazgUsVhMZsyY4ZfPcSiwJszOzk4ybdo03r84vopMJiNKpZJotdqIE+JwTCYTSU9PJ+3t7aSqqoqsWLGCnDt3jphMJpKZmUkkEknULZMEU6ZMmULOnj3L2X1gRZg9PT0kNTWV9y+Lj6JWq0liYiLZvn17xDlKD6enp4esWLGC/PKXvyQ9PT3k448/Jh9//DHZtGkTKS0tveM8s9k8tDtEKBTyfg/4KhqNhrMHMCvCNBqNUb+2NVKMKpWKPPfcc5w+NceLc+fOEa1WS6qqqoYE6S49PT0kMzOTWK1Wj/e1t7eT7du3k8TERCKXy3m/L3yUhISEUXfrBEvIwmxvbw9rp2U2i1KpJNnZ2WHhGcIWVVVVRKfTkXPnznmI0l06OzuJVqslnZ2dPus5e/Ysee6554hSqSRSqZT3ezWexdf6byiEPCs7b948NDU1hVJFWCMSiSCXy5Gbm4utW7dGzU55h8OBoqIi2Gw2mEymMdecRSIRioqKYLFYRp2RdDgceOutt1BSUoLu7m5Oco6GI1OmTMHevXvx5JNPslJfSMLct28fNm/eHJVxemQyGWQyGX7yk59g27Zt0Gg0fDeJNWw2G/R6PXJycpCamur3+86cOYOLFy+itLR0zHMdDgcqKirw6quvoru7Gz09PaE0OSKIj4/Hhx9+yIojQtDCtNlsmDt3Lmw2W8iNCCcUCgViYmKwbt06bNmyJepS4NXX18NgMMBsNge1Trl//34sXLgwoLQMFRUV2Lx5M3p6etDd3R3wNSOJpKQkWK3WkHtWQUfJW7duXVS5aYnFYjAMg+LiYvzvf//Djh07ok6UxcXFKC8vR2NjY9DOAyaTCZWVlQENX55//nlcuXIFBw4cQHJyMmJjY4O6diTQ0dGBH/zgByFnnwvKYjY3NyM9PT1qurAMw2DZsmV46623ok6MwIB3T15eHhYuXIglS5aEXN/UqVORl5cHq9Ua1Pf1u9/9Djt27IDdbofT6Qy5PeGGRCKBVqvF6dOng64jKGEuWrQIVqs16IuGCxMmTMC9994Ls9kctQl5mpqaUFBQAJPJBJFIxFq9/f392Lp1a9C/A5vNhnXr1uH48eNR84AfjkqlwpYtW1BUVBTU+wMWZjRYS7FYDIVCgW3btmH9+vV8N4czysrKcOTIEVRVVaGtrY31+v/xj3/gxo0bwQWUHqShoQErV67E9evXo26CKJTJoIDHmC+++GJEi5JhGOTn56O9vT1qRWm326HX63Hx4kVYLBZORAkACxYsQGtrK2pra4OuIz09HZcvX8ZvfvMbxMfHs9e4MOCbb75BTk5OUOPNgCxmJFtLsViMyZMn47333ovabisAtLa2oqCgABs3bsTUqVM5v96sWbOQlZUFs9kc8k4Zm82GvLw8fPbZZ7h58yZLLeQXsViM/Px8HDhwIKD3BWQxI9VaMgyDRYsWoaWlJapFWVFRAb1eD7PZPC6iBICWlhaYzWbo9fqQZyI1Gg0aGxuxefPmqJmE6+vrw9GjR1FfXx/Q+/y2mE1NTcjKyoq4fZZxcXF45ZVXYDAY+G4KZzgcDhgMBjgcDphMJpw5c2bc23D16lVUVlaipqaGlfqamprw5JNP4sqVK6zUxzfJycn48ssv/T7fb4tpMpkiSpRisRjTpk3DyZMno1qUHR0dyMjIwJw5c/Dzn/+cF1ECQEJCAlJSUrBjxw5W6tPpdDh//jx0Oh3kcjkrdfLJ1atX8fvf/97v8/0SpsPhwKFDkZO+dOLEifjhD3+IL774gvcIAVxSW1uLvLw8mEwmzJo1i+/mYNmyZTh27NhQ9q5QYRgmarq23333HYqLi/3u7vslzOrqalbXwLgkISEB5eXlOHz4MPfhH3ikqKgIlZWVsFqtQQfe4oKamhoYDAZ0dHSwVmdxcTEsFgsSExNZq5MPent7/e5R+DXGnDlzJlpbW0NuGNdoNBpUVVUhPT2d76ZwhtsBfenSpViwYAHfzfGKvztRAqWjowNZWVkR8Vv0hUqlwhdffDHmpogxLWZra6vPJKLhRGJiIiwWS1SL0j0BZzQaw1aUwIBX0KpVq1BYWMhqvUlJSWhsbERqamrE9OBG4nQ6/Vo6GVOYe/bsCfs1pXvvvRcnT54c17if482OHTtgMBhgsVgi4kc5ffp0yGQy7N69m9V6GYbBxx9/jLS0tIh0hu/u7kZJScmYY80xhXngwIGwDWMokUiQkpKClpaWqNnAPBK3A/qNGzewc+dOzrx4uKC0tBTvvvsu6xvpZTIZTpw4gSeeeMIjCHYk4HQ6x1zXHHWM2dHRgYceeiisJhfcyOVyzJkzB3V1dRE/Y+eL5uZmFBQUwGg0IiEhge/mBMXUqVOh1+tRU1PDyWbzTZs2oaKiIuIcX8Za1xzVYtbX18PlcrHeqFCRSCSYM2cOjh8/HrWi3L17NwoLC1FTUxOxogSAS5cuoaSkBAUFBSF7BnmjrKwMRqMREydOZL1uLrl27Rqam5t9vj6qMA8dOhSW48v7778fx48fj8rlEIfDgYKCAnz66aewWq24dOkS300KGZFIhKVLlwa9BWosDAYDnnvuuYjq1nZ3d6OystLn6z67sg6HAxMnTuTkKRcKKSkpaGxsjEpL6XZAX7t2LaZPn853c1jn7bffRk5ODlasWMFJ/Xq9HrW1tWH3m/VFQkKCT5dDnxazqakp7CzStGnTgt41H+7U1tYObWiORlECgNlsxmuvvcbZOmRVVRUeffTRiJi1BgaWlXxNjPkU5ocffhhWkz6JiYk4ceJEVEWrc2MwGFBZWQmLxRJW3znbnDlzBjU1NdDr9Zz5XR87dgyPPPIIxGIxJ/WzSVdXF959912vr/nsyoZT+BCNRgOLxRJ165Tu/YdPP/10QGEkIx22d6KMxG63Y968eRHhIeSrO+vTYv7rX//itEH+kpCQgJqamqgTZUNDAzIyMlBaWnpXiRIYuKdz5sxBcXExJ/UzDAOr1Ypp06ZxUj+bOBwOr37FXoVpt9vDIoK2SqXCiy++GHWbm4uLi7F169aQwkhGOkuWLMHp06cD3kDsLxqNBseOHUNcXBwn9bOJt904XoXZ3NzMu7uTWCyGVqvlbIqdD+x2O7KysgAA27dvHzMtQbRTVVWFoqIiVneiDMe9P1SlUnFSPxt0dXXhk08+8TjuU5h8RyybPHkyjhw5wmsb2KSpqQkZGRnYuHEjK7Fdo4GWlhZUVVUhLy+PsyWO1atXY/HixWE9U+ut1+BVmGfPnuW1K5uQkID33nsvapZFdu/ejaKiItTU1ECpVPLdnLDi6tWreOmll1BQUMDZNQ4ePBjWezm/+uorjweTV2F+9tln49Igb0TTuNLhcECv1+PTTz+FxWKJCi8eLpg6dSo0Gg3Kyso4qV8mk4X1eFOpVHq453kVJl8/ILFYjMWLF0fFuLK1tRXz5s1DTk4OVq5cyVssnkihpKSE1bAkIwnn8abL5fIYZ/ucleWD+Ph4HDx4kJdrs0l1dTX0ej2qqqrGLYxkpHPmzBmYzWYYDAbOMsi5x5tCoZCT+oOlt7fX4zPfIUzi6se3Xd9BoIoF1ApgHD3yGIaB2WwOOzfAQHA4HCgsLMSRI0fQ2NiIq1ev8t2kiKKtrQ0mk4mVGLW++OMf/xh2Xdre3l6cP3/+jmN3CFMQI8K1K/+DTCwGbnYD4+QLLBaL8dhjjyEzM3N8LsgB7ng0c+bMwQsvvEC7rkHS39+PnJwczoYzDMPgjTfeCLsu7UgvJY+ubMfFi4iJCTptZlCoVCpUVFSM6zXZpL6+Hnl5eSgpKQmLMJKRTmpqKux2O2e/iWeffRbf//73Oak7WDo7O+/432Nxx27/dvAvMaCWDPzpugV81wfIZIBUCPQOWlOxGJAPntM7zMIOPw4ncGOY6R3x2gSXGEaj0dM5nThxy+kCAQBBDCRCIQQjj0MAsUiEGBD09/fDCcHA+MHZDycAxIggFbhunx8jgjRGENAXNhbFxcU4ffo0rFbrXe8wwCYmkwlZWVlISUnhZIbebDZj8eLFuHbtGut1B8PIYY+HabTbrw9GLegDbt4C4BwQJQA4nED/rUEBigfGoDe6gRu3AKkMEA8elwsHusI3uoF+IaBwe/p7vjbpnslesm4R9DsJYoRiSEUiCOGCk4w8LoY4hqDPRQAIIBLGQAACp9OJGJEYUmEMBK5+9LoA8dD/TrAVj8FutyMjIwMA8PLLL1NRsox7MqiwsJCTyUitVouVK1eG7S6U0fusfX1AL24LSyYEnH23/46RDEwSqSUAhIBw8LjLCQyehlu3AJHw9nuGvaaWSlFd9a6nWIgTzhghRAJgQHTigb+JE06BAMJBoxcjGCk2AYRC0bAPJYAwZtDSDuIKOE2vJ01NTZg3bx6MRiP14uGQS5cuobS0FHl5eZzU//rrr2PChAmc1B0qvoXpFpbDOSAsMQCh884Jof5bgxZzsLhfixllOnrwNZFIhPzlyzFn9hzv5xF3d3XkceL9+DhRVlYGg8EAq9Ua1m5e0QKXYUlkMhn2798flt5YQ8J0ufrQ6yIDyxXEOeyUQaspV9y2lsCgYCXDllQGu7YOJwDh7eNCIdDvvP2ewddUKhXeLC8FiRF6eToIICDu7isAkAFLJxBCCIL+IbNHfLyffdxhJC9evBhxYSQjHTYS5PoiNzc3LCfshn7TAggAVz/iJ09GTP+ICRKHE8AIa4k+oNcJSBWD3Vnh4OuDY1P3cbET6O67/Z6bt6BQxWHz/9sCgUQGydBkDEF/fx/6BwUoEQrgdPaht78Pvf1OYKhbGwO4+geOuwSD7yfod7pAQOB09sM15v+B0dzcjKysLKxatQrLly8P8N0UNuAyLInJZAo7q+kRwaChoQF5eXkjBtxiQNbH2rpmXFwcLl++HBHOBBUVFdizZw/MZjN1GOCZhIQE6PV6NDY2sv7b4Ttix8hIBv71AmVCwDn2af6gUChQVFQU9qJ0e/GcOHECVquVijIMuHr1KoxGI/R6Pet179q1i1erOdJ100OYdwhGMdgdheP2ZFCISKVS/OpXv2KnMo4Il2SwFE/YTpDrRqvVYtKkSazWGQgjDZWHMDUaze1wF90jZltDJBKsZbglg6V4wnaCXDe7du3iLXv1mBYzKSmJsxCK4W4twzUZLMUTLhLk5ubmYsqUKazVFwgjrbXXMSYXDr7hbC1tNhvmzZsHtVqNX//619SLJwJoaWmByWRiPScKH1ZTLpdj9uzZdxzzKsz4+HjWLy4UCvHMM8+wXm+oNDQ0ICsrC6WlpWGdDJbiCRcJcnNzczF58mTW6vMHiUTiEZ7VqzC5MOcPP/xw2EVR37FjB7Zu3QqLxXLXhpGMdLhIkPvSSy+Nq1dXT08PUlJS7jjmVZhs77pXq9XYsGEDq3WGwvBksNu3b6dePBEO2wlyc3NzIZFIxj6RJaRSqUfgOa/CfOCBB1h9YgiFwrDZBN3c3IyMjAysWrUKy5Yt47s5FBZgOyyJRqPBY489xkLL/OPBBx/0OOZVmAsWLGBtsVUkEmH58uVhMekTLclgKZ6wnSB3w4YNkEqlLLRsdEQiERYvXuxx3KswdTodawGfFQoF1q1bx0pdwRKNyWApnrC5EyUzM3NcjIlSqfS6ddCrMBmGYW1m9p577uE1IZA7jOTChQtpGMm7gNTUVNhsNlRXV4dUj0wmQ05ODkut8o3D4fAaocGnrywbYpLL5di4cWPI9QRLdXU1CgoKUFVVFbXJYCmesLUTpbCwkHOrqdVqvV7DpzCfeOKJkPvYTqeTs7Teo+FwOGAwGHDkyBFYLBbqgH6XwVaCXJ1Ox2mEA6lUiqefftrraz6FqdPpQs74NXPmzHHPP2Kz2ZCVlYX77rsPL7zwAvXiuUu5dOkSjEZjyDlR1qxZw1KLPJFKpT5XK0btyjqdwe/1EolE474c4U4GW1JSctclg6V4wkaC3CVLlnAWsCs+Pt7DscCNT2HKZDI8/vjjQV/U12wTV9BksBRvhJogV6fTcZJSYazVilE3Sj/77LNBO7Q7nc5xydhFk8FSxiKUBLkymQxz585lv1EAfvGLX/h8bVRhZmZmBt2dTU1N5XxGyx1GkiaDpYxGqAly8/PzIRCwGyh84cKFo86/jCpMhmHw0EMPBXxRqVSKJ598MuD3BUJZWRmKiopgtVrDLpASJfwIJUFueno6q+NMhmG8BDm/kzFj/qxatSrg/WmjzTaFit1uh16vx8WLF2GxWKgDOsVvgk2Qy7aDzMSJE8fUx5jCzM3NDfjCEonE52xTKLS2tiIrKws5OTlYvnw59eKhBEywCXLZGmeq1Wrs2rVrzPPGFGYwnvaPPvpoQOf7gzsZrNlspslgKUET7E6UH//4x6xcPy4uzi9j51f4yg0bNkCtVvt1YZFIFNIyy0hoMlgK2wSTIHfJkiUhp6f011oCXgI+++Lee+/F119/7dfFKyoqguoCj6SjowMFBQV4+umnacQ6CuucOXMGFy9eRGlp6ZjnOhwOyOVy+CkXryQnJ+PLL7/061y/HwEGgwEKhWLM8wghrIwvaTJYCtcEkiBXJpOFtPynVqtx+PBhv8/3W5ijLYYO5+bNmyELs6ioCOXl5bBardSLh8IpJpMJlZWVfoUlCTZmlUgkQn5+fkCzu34Lk2EY/PSnPx0z5Mj3vvc9vy8+EpvNhoyMDKjVapoMljIuBJIgN9iZWZVKhTfffDOg9wQ0ml23bt2Y3dmZM2cG1AA3TU1NyMrKgtFopGEkKeOKvwlytVptwB5AarUa+/fvD7gbHJAwtVot8vPzfVrNYGdk3clgLRYLTQZL4QV/wpI88sgjAdUpFouxaNGioCZCA57/3bZtm09PIIVCEdD4kiaDpYQTYyXITUlJCchi3nPPPTh48GBQbQlYmBqNBqtWrfJqmgOZkaXJYCnhyGhhSZKSkvxeLomLi8OxY8eCnskNasX09ddf92o1XS6XXw2pqKhAYWEhzGYzDSNJCStaWlpgNpt9Oh/4EwharVZjx44dIa1OBCVMmUyGoqIij4kgh8Mx6pSyO4wkTQZLCWdGS5A7Vhws97hy9erVIbXBb8+fkTgcDsyePRsXLly447iv6lpbW1FQUIC1a9fSiHWUiODo0aNQq9XYsmXL0LH4+Hhcu3bN6/kSiQQPP/wwjh8/HvJe5KCd/2QyGd555507Nnv6inZQW1uLgoICmEwmKkpKxOAtQe5oET1mz57NiiiBEIQJDMRDefbZZ4ca4q3/bTAYUFlZCYvFQpPBUiKOkQlyfW2YTklJwQcffMBa1I7Q3OUxMBHk9vYZvh3LnQz2vvvuo8lgKRHLyAS53nZZpaSkoLGxkdVQrSELUyaT4dChQ3eY+OHJYGkYSUqkMzxB7vAJT3dAALZFCbAgTGDAIygtLQ0zZsygyWApUYk7Qa5UKoVQKERsbCy0Wi0nogQCEKZlrQACwYgyvxxuX50lS5bAarWitbUVf//736kXDyXqKC0txX/+8x/I5XLk5OTg9OnTnGUaCGi5pK18PmZ8/grIniwAFqwVZGPvmjqQPVk4cOAAtmzZgsuXL7PeSK1Wy8oXkJ6ePvR3RUVFUHFGKRSRSIS+vj5OrxGAMNtQPn8GPn+FYM9AfGVY1gqQjbpBoVIo0U9HRwc6OjrueMgDACxrIcjeO/TvmjqCPViLtdiDYOTh/xiz7SgOnVqD3MGLtJXPR/beNJRtpqKk3D0kJSWNEGUbyucLIMgG6ggBGSy5tQIIslvw4IzgruO3MNuOHsIp7EX24PhyFSpByElspP4ClLuWgV7kJpThAtmD4SYqa08d1mAW7g9SH34Ksw1HD53CmrrBJ8KFMmDTDMwvpxM8lLuXtvJV2HQqDWWVG+Gpvxl4sCwXwfYn/RtjtpVj/ozP8cqwp4JlrQDZe9egbsSTgkK5Oxi0lrO4mWPxy2K2HT2EU2uGq78N51sApD2IILvQFEqEcwGfnwLSgh1EjoEfcTwGu7Gv3JalZe0MbDoFrKnzZsIplLuAtvPg0sl0TIspEAyIcG/2bceC7L1pKLtAgpoGplCigun3YxaAU59f8Pqypfy2800wBL0fk0K56xlcu0wru4CTQ8sTnuv9wUCFSaGEQls55s/YhFNDB9JQdiH0ZUQqTAolDGFldwmFQmEXKkwKJQz5/6GFNae7ZI7lAAAAAElFTkSuQmCC" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Segment" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "sif4sci(item[\"stem\"], figures=figures, tokenization=False, symbol=\"tfgm\")\r\n" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "['[TEXT]', '[FORMULA]', '[TEXT]', '[FORMULA]', '[TEXT]', '[FORMULA]', '[TEXT]', '[FORMULA]', '[TEXT]', '[FORMULA]', '[TEXT]', '[FORMULA]', '[TEXT]', '[FORMULA]', '[TEXT]', '[FORMULA]', '[TEXT]', '[FORMULA]', '[TEXT]', '[FORMULA]', '[TEXT]', '[MARK]', '[FIGURE]']" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "segments = sif4sci(item[\"stem\"], figures=figures, tokenization=False)\r\n", + "segments" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "['如图来自古希腊数学家希波克拉底所研究的几何图形.此图由三个半圆构成,三个半圆的直径分别为直角三角形', 'ABC', '的斜边', 'BC', ', 直角边', 'AB', ', ', 'AC', '.', '\\\\bigtriangleup ABC', '的三边所围成的区域记为', 'I', ',黑色部分记为', 'II', ', 其余部分记为', 'III', '.在整个图形中随机取一点,此点取自', 'I,II,III', '的概率分别记为', 'p_1,p_2,p_3', ',则', '\\\\SIFChoice', \\FigureID{1}]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "segments.text_segments" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "['如图来自古希腊数学家希波克拉底所研究的几何图形.此图由三个半圆构成,三个半圆的直径分别为直角三角形',\n", + " '的斜边',\n", + " ', 直角边',\n", + " ', ',\n", + " '.',\n", + " '的三边所围成的区域记为',\n", + " ',黑色部分记为',\n", + " ', 其余部分记为',\n", + " '.在整个图形中随机取一点,此点取自',\n", + " '的概率分别记为',\n", + " ',则']" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "segments.figure_segments" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "[\\FigureID{1}]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "source": [ + "segments.figure_segments[0].figure" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOYAAACICAYAAADzlJeRAAAe3klEQVR4nO2df1RT9/3/nyE/SWJyBdSUOgs72mKP1XhK24j2CGon4FagbtTMtpZt6rDzR7rjKt+2C+rnHGxdBbr2xB9zgc4j1NMJbpXg2hlsp2DnjrSHVaqcgquWbFqNLUgQkvf3DwgiSSA/7uUm8f04530O3Ny87zu5ed7X+8fr/XoJCCEEFAolrIjhuwEUCsUTKkwKJQyhwqRQwhAR3w2IBJqamuBwOHy+rtFokJKSMo4tokQ7VJgAGhoa0NzcDLvdjn//+99oa2uDw+FAa2srAIBhmFHf73K58O233wIAYmNj8cADDwAAZs+ejblz50Kr1UKn00Emk3H6OSjRg+Bum5Vtbm5Gc3MzrFYrPvnkE7S2toJhGPT09KC3t5f160mlUsTGxqKrqwsMw2D69OlYunQpdDodMjMzWb8eJTqIemHa7XbU19fjT3/6E/72t79BoVDA6XSiq6uL13YxDIOuri48/vjjyM/PR2ZmJpKSknhtEyV8iEph2mw21NbW4tChQzh16hSkUulQVzMckcvliImJgUwmQ3Z2NgwGA7RaLd/NovBIVAmzuroar732Gs6fPw8AuHnzJs8tCg61Wg2FQoFXX30VK1asGHOMS4k+Il6YNpsNf/jDH7Bz504ACGvLGChyuRxOpxOLFy/G+vXr6Zj0LiJihdnU1IQ33ngDdXV1cLlcoy5nRAMMw0AkEmHnzp14/vnn+W4OhWMiTpjNzc0oLCxEa2sr7HY7380Zd5RKJWQyGRVolBMxwrTZbFi/fj0++OAD3Lhxg+/m8A4VaHQT9sK02+347W9/i8rKyqgaP7KFUqmESqXCn//8Z+h0Or6bQ2GJsPaV3bdvH5KTk7Fv3z4qSh90dXXh66+/RlZWFjZs2HBXdu+jEhKGdHZ2Ep1OR9RqNQFAi59FJpORSZMmEYvFwuLdqCNlZRdYrI/iD2FnMfft24dZs2ahqamJjiUDxOFw4MqVK9Dr9XjqqadYsJ5tKJ+fjUNsNI4SGHw/Gdxcv36d5OXlUSvJUhGLxWTSpEmksbEx6HtyoSyNACBr6li80RS/CAuL2dDQgPvvvx/vv/8+tZIs0dfXhytXruBHP/oRSkpKAq+grRw78QrK0thvG2VseJ+VLS0txbZt2+ikBYeoVCrMmzcP1dXVfrr3WbB2/nlsPnk/dgr+Dw9eOImN07lrX0NDA4Db+17/+c9/4uuvv/Z67uzZs5GcnAwASEpKuqNEFXya65/97GeEYRjeu313QxGLxSQ5OZmcO3duzPtSt2YNqSOEkAtlJA2Df7NAT08PsVqtxGg0Ep1OR2JjYwkAwjAMYRiGiESigD6TUqkkDMMQpVJJABCtVkuys7OJ0WgMqQsfDvAizOvXr5NHH3106MbQMn4lLi6OHDlyxOe9cY8rb5fQhHnu3Dny8ssvk+TkZCISicbtQcwwDImNjSUZGRnEZDKR9vb2ED7F+DPuwmxvbycpKSlEIpHw/iO9W8uUKVPIO++843lzLpSRNcOXRurWEKSVkUAXSxobG0lhYSFJSEggKpUqYEvIdpHL5USpVJLExERiMpnI9evXQ/kJjwvjKsyzZ8+SxMRE3n+YtIBMnDiRvP3227dvzoUykjZChBfK0vwWZk9PDzGbzSQxMTGshydyuZwoFAqSnZ3N8novu4ybMM+ePUs0Gg3vN4aW2yUuLm5InAPH0siAwbxAytKGn+u7O9vZ2Uk2bdpEJkyYMDTWi5TCMAxJSEggZrN5nFTgP+MizPb2dmopw7TExcWRjRs3BnxPOzs7ycqVK4lcLue9qxpqUSqVYSdQzoXZ3t5OkpKSeP/yafFdGIbxW5w9PT3EaDSS+Ph4IhaLeW87m8Ut0JqaGk414Q+cCpOKMnIKwzBk165do97PqqoqotFoiEKh4L29XBa1Wk10Oh2vM7mcCfP69eskJSWF9y+ZFv/LlClTvC6luDcVqFQq3ts4noVhGGI0GklPTw9XMvEJJ54/DocDjzzyCFpaWtiuOuqRy+WQSCQecW6Tk5OhVqvvODcnJwcA8NFHH+G///0vrl27ht7eXvT19QUdnnPKlCloaGgYiixfX1+PZ555Bt98802QnyiyUSgUiI+Px5EjR8Y1ciEnwnzqqafw/vvvo6+vj+2qowaGYYYiuLtFl5OTM5RuQavVshodr6GhAXa7Hc3Nzaivr4fNZsNXX30FAIiJiUF/f//QuSqVCrt27cJf/vIXfPTRR9RdEkB8fDxKSkqwevXqcbke68Kkvq+eiEQiKJVKdHV1ITU1FUuXLkV6enpY5Dxxi7WjowN//etfceLECXz33XdwuVx3iJUyEFZ00aJFOHjwIPfpLtjsFzc2NpKEhATexwZ8F7frmUgkIjqdjpSUlESU7+bhw4eJRqMharWayOVyIpVKSUxMDO/fazgUsVhMZsyY4ZfPcSiwJszOzk4ybdo03r84vopMJiNKpZJotdqIE+JwTCYTSU9PJ+3t7aSqqoqsWLGCnDt3jphMJpKZmUkkEknULZMEU6ZMmULOnj3L2X1gRZg9PT0kNTWV9y+Lj6JWq0liYiLZvn17xDlKD6enp4esWLGC/PKXvyQ9PT3k448/Jh9//DHZtGkTKS0tveM8s9k8tDtEKBTyfg/4KhqNhrMHMCvCNBqNUb+2NVKMKpWKPPfcc5w+NceLc+fOEa1WS6qqqoYE6S49PT0kMzOTWK1Wj/e1t7eT7du3k8TERCKXy3m/L3yUhISEUXfrBEvIwmxvbw9rp2U2i1KpJNnZ2WHhGcIWVVVVRKfTkXPnznmI0l06OzuJVqslnZ2dPus5e/Ysee6554hSqSRSqZT3ezWexdf6byiEPCs7b948NDU1hVJFWCMSiSCXy5Gbm4utW7dGzU55h8OBoqIi2Gw2mEymMdecRSIRioqKYLFYRp2RdDgceOutt1BSUoLu7m5Oco6GI1OmTMHevXvx5JNPslJfSMLct28fNm/eHJVxemQyGWQyGX7yk59g27Zt0Gg0fDeJNWw2G/R6PXJycpCamur3+86cOYOLFy+itLR0zHMdDgcqKirw6quvoru7Gz09PaE0OSKIj4/Hhx9+yIojQtDCtNlsmDt3Lmw2W8iNCCcUCgViYmKwbt06bNmyJepS4NXX18NgMMBsNge1Trl//34sXLgwoLQMFRUV2Lx5M3p6etDd3R3wNSOJpKQkWK3WkHtWQUfJW7duXVS5aYnFYjAMg+LiYvzvf//Djh07ok6UxcXFKC8vR2NjY9DOAyaTCZWVlQENX55//nlcuXIFBw4cQHJyMmJjY4O6diTQ0dGBH/zgByFnnwvKYjY3NyM9PT1qurAMw2DZsmV46623ok6MwIB3T15eHhYuXIglS5aEXN/UqVORl5cHq9Ua1Pf1u9/9Djt27IDdbofT6Qy5PeGGRCKBVqvF6dOng64jKGEuWrQIVqs16IuGCxMmTMC9994Ls9kctQl5mpqaUFBQAJPJBJFIxFq9/f392Lp1a9C/A5vNhnXr1uH48eNR84AfjkqlwpYtW1BUVBTU+wMWZjRYS7FYDIVCgW3btmH9+vV8N4czysrKcOTIEVRVVaGtrY31+v/xj3/gxo0bwQWUHqShoQErV67E9evXo26CKJTJoIDHmC+++GJEi5JhGOTn56O9vT1qRWm326HX63Hx4kVYLBZORAkACxYsQGtrK2pra4OuIz09HZcvX8ZvfvMbxMfHs9e4MOCbb75BTk5OUOPNgCxmJFtLsViMyZMn47333ovabisAtLa2oqCgABs3bsTUqVM5v96sWbOQlZUFs9kc8k4Zm82GvLw8fPbZZ7h58yZLLeQXsViM/Px8HDhwIKD3BWQxI9VaMgyDRYsWoaWlJapFWVFRAb1eD7PZPC6iBICWlhaYzWbo9fqQZyI1Gg0aGxuxefPmqJmE6+vrw9GjR1FfXx/Q+/y2mE1NTcjKyoq4fZZxcXF45ZVXYDAY+G4KZzgcDhgMBjgcDphMJpw5c2bc23D16lVUVlaipqaGlfqamprw5JNP4sqVK6zUxzfJycn48ssv/T7fb4tpMpkiSpRisRjTpk3DyZMno1qUHR0dyMjIwJw5c/Dzn/+cF1ECQEJCAlJSUrBjxw5W6tPpdDh//jx0Oh3kcjkrdfLJ1atX8fvf/97v8/0SpsPhwKFDkZO+dOLEifjhD3+IL774gvcIAVxSW1uLvLw8mEwmzJo1i+/mYNmyZTh27NhQ9q5QYRgmarq23333HYqLi/3u7vslzOrqalbXwLgkISEB5eXlOHz4MPfhH3ikqKgIlZWVsFqtQQfe4oKamhoYDAZ0dHSwVmdxcTEsFgsSExNZq5MPent7/e5R+DXGnDlzJlpbW0NuGNdoNBpUVVUhPT2d76ZwhtsBfenSpViwYAHfzfGKvztRAqWjowNZWVkR8Vv0hUqlwhdffDHmpogxLWZra6vPJKLhRGJiIiwWS1SL0j0BZzQaw1aUwIBX0KpVq1BYWMhqvUlJSWhsbERqamrE9OBG4nQ6/Vo6GVOYe/bsCfs1pXvvvRcnT54c17if482OHTtgMBhgsVgi4kc5ffp0yGQy7N69m9V6GYbBxx9/jLS0tIh0hu/u7kZJScmYY80xhXngwIGwDWMokUiQkpKClpaWqNnAPBK3A/qNGzewc+dOzrx4uKC0tBTvvvsu6xvpZTIZTpw4gSeeeMIjCHYk4HQ6x1zXHHWM2dHRgYceeiisJhfcyOVyzJkzB3V1dRE/Y+eL5uZmFBQUwGg0IiEhge/mBMXUqVOh1+tRU1PDyWbzTZs2oaKiIuIcX8Za1xzVYtbX18PlcrHeqFCRSCSYM2cOjh8/HrWi3L17NwoLC1FTUxOxogSAS5cuoaSkBAUFBSF7BnmjrKwMRqMREydOZL1uLrl27Rqam5t9vj6qMA8dOhSW48v7778fx48fj8rlEIfDgYKCAnz66aewWq24dOkS300KGZFIhKVLlwa9BWosDAYDnnvuuYjq1nZ3d6OystLn6z67sg6HAxMnTuTkKRcKKSkpaGxsjEpL6XZAX7t2LaZPn853c1jn7bffRk5ODlasWMFJ/Xq9HrW1tWH3m/VFQkKCT5dDnxazqakp7CzStGnTgt41H+7U1tYObWiORlECgNlsxmuvvcbZOmRVVRUeffTRiJi1BgaWlXxNjPkU5ocffhhWkz6JiYk4ceJEVEWrc2MwGFBZWQmLxRJW3znbnDlzBjU1NdDr9Zz5XR87dgyPPPIIxGIxJ/WzSVdXF959912vr/nsyoZT+BCNRgOLxRJ165Tu/YdPP/10QGEkIx22d6KMxG63Y968eRHhIeSrO+vTYv7rX//itEH+kpCQgJqamqgTZUNDAzIyMlBaWnpXiRIYuKdz5sxBcXExJ/UzDAOr1Ypp06ZxUj+bOBwOr37FXoVpt9vDIoK2SqXCiy++GHWbm4uLi7F169aQwkhGOkuWLMHp06cD3kDsLxqNBseOHUNcXBwn9bOJt904XoXZ3NzMu7uTWCyGVqvlbIqdD+x2O7KysgAA27dvHzMtQbRTVVWFoqIiVneiDMe9P1SlUnFSPxt0dXXhk08+8TjuU5h8RyybPHkyjhw5wmsb2KSpqQkZGRnYuHEjK7Fdo4GWlhZUVVUhLy+PsyWO1atXY/HixWE9U+ut1+BVmGfPnuW1K5uQkID33nsvapZFdu/ejaKiItTU1ECpVPLdnLDi6tWreOmll1BQUMDZNQ4ePBjWezm/+uorjweTV2F+9tln49Igb0TTuNLhcECv1+PTTz+FxWKJCi8eLpg6dSo0Gg3Kyso4qV8mk4X1eFOpVHq453kVJl8/ILFYjMWLF0fFuLK1tRXz5s1DTk4OVq5cyVssnkihpKSE1bAkIwnn8abL5fIYZ/ucleWD+Ph4HDx4kJdrs0l1dTX0ej2qqqrGLYxkpHPmzBmYzWYYDAbOMsi5x5tCoZCT+oOlt7fX4zPfIUzi6se3Xd9BoIoF1ApgHD3yGIaB2WwOOzfAQHA4HCgsLMSRI0fQ2NiIq1ev8t2kiKKtrQ0mk4mVGLW++OMf/xh2Xdre3l6cP3/+jmN3CFMQI8K1K/+DTCwGbnYD4+QLLBaL8dhjjyEzM3N8LsgB7ng0c+bMwQsvvEC7rkHS39+PnJwczoYzDMPgjTfeCLsu7UgvJY+ubMfFi4iJCTptZlCoVCpUVFSM6zXZpL6+Hnl5eSgpKQmLMJKRTmpqKux2O2e/iWeffRbf//73Oak7WDo7O+/432Nxx27/dvAvMaCWDPzpugV81wfIZIBUCPQOWlOxGJAPntM7zMIOPw4ncGOY6R3x2gSXGEaj0dM5nThxy+kCAQBBDCRCIQQjj0MAsUiEGBD09/fDCcHA+MHZDycAxIggFbhunx8jgjRGENAXNhbFxcU4ffo0rFbrXe8wwCYmkwlZWVlISUnhZIbebDZj8eLFuHbtGut1B8PIYY+HabTbrw9GLegDbt4C4BwQJQA4nED/rUEBigfGoDe6gRu3AKkMEA8elwsHusI3uoF+IaBwe/p7vjbpnslesm4R9DsJYoRiSEUiCOGCk4w8LoY4hqDPRQAIIBLGQAACp9OJGJEYUmEMBK5+9LoA8dD/TrAVj8FutyMjIwMA8PLLL1NRsox7MqiwsJCTyUitVouVK1eG7S6U0fusfX1AL24LSyYEnH23/46RDEwSqSUAhIBw8LjLCQyehlu3AJHw9nuGvaaWSlFd9a6nWIgTzhghRAJgQHTigb+JE06BAMJBoxcjGCk2AYRC0bAPJYAwZtDSDuIKOE2vJ01NTZg3bx6MRiP14uGQS5cuobS0FHl5eZzU//rrr2PChAmc1B0qvoXpFpbDOSAsMQCh884Jof5bgxZzsLhfixllOnrwNZFIhPzlyzFn9hzv5xF3d3XkceL9+DhRVlYGg8EAq9Ua1m5e0QKXYUlkMhn2798flt5YQ8J0ufrQ6yIDyxXEOeyUQaspV9y2lsCgYCXDllQGu7YOJwDh7eNCIdDvvP2ewddUKhXeLC8FiRF6eToIICDu7isAkAFLJxBCCIL+IbNHfLyffdxhJC9evBhxYSQjHTYS5PoiNzc3LCfshn7TAggAVz/iJ09GTP+ICRKHE8AIa4k+oNcJSBWD3Vnh4OuDY1P3cbET6O67/Z6bt6BQxWHz/9sCgUQGydBkDEF/fx/6BwUoEQrgdPaht78Pvf1OYKhbGwO4+geOuwSD7yfod7pAQOB09sM15v+B0dzcjKysLKxatQrLly8P8N0UNuAyLInJZAo7q+kRwaChoQF5eXkjBtxiQNbH2rpmXFwcLl++HBHOBBUVFdizZw/MZjN1GOCZhIQE6PV6NDY2sv7b4Ttix8hIBv71AmVCwDn2af6gUChQVFQU9qJ0e/GcOHECVquVijIMuHr1KoxGI/R6Pet179q1i1erOdJ100OYdwhGMdgdheP2ZFCISKVS/OpXv2KnMo4Il2SwFE/YTpDrRqvVYtKkSazWGQgjDZWHMDUaze1wF90jZltDJBKsZbglg6V4wnaCXDe7du3iLXv1mBYzKSmJsxCK4W4twzUZLMUTLhLk5ubmYsqUKazVFwgjrbXXMSYXDr7hbC1tNhvmzZsHtVqNX//619SLJwJoaWmByWRiPScKH1ZTLpdj9uzZdxzzKsz4+HjWLy4UCvHMM8+wXm+oNDQ0ICsrC6WlpWGdDJbiCRcJcnNzczF58mTW6vMHiUTiEZ7VqzC5MOcPP/xw2EVR37FjB7Zu3QqLxXLXhpGMdLhIkPvSSy+Nq1dXT08PUlJS7jjmVZhs77pXq9XYsGEDq3WGwvBksNu3b6dePBEO2wlyc3NzIZFIxj6RJaRSqUfgOa/CfOCBB1h9YgiFwrDZBN3c3IyMjAysWrUKy5Yt47s5FBZgOyyJRqPBY489xkLL/OPBBx/0OOZVmAsWLGBtsVUkEmH58uVhMekTLclgKZ6wnSB3w4YNkEqlLLRsdEQiERYvXuxx3KswdTodawGfFQoF1q1bx0pdwRKNyWApnrC5EyUzM3NcjIlSqfS6ddCrMBmGYW1m9p577uE1IZA7jOTChQtpGMm7gNTUVNhsNlRXV4dUj0wmQ05ODkut8o3D4fAaocGnrywbYpLL5di4cWPI9QRLdXU1CgoKUFVVFbXJYCmesLUTpbCwkHOrqdVqvV7DpzCfeOKJkPvYTqeTs7Teo+FwOGAwGHDkyBFYLBbqgH6XwVaCXJ1Ox2mEA6lUiqefftrraz6FqdPpQs74NXPmzHHPP2Kz2ZCVlYX77rsPL7zwAvXiuUu5dOkSjEZjyDlR1qxZw1KLPJFKpT5XK0btyjqdwe/1EolE474c4U4GW1JSctclg6V4wkaC3CVLlnAWsCs+Pt7DscCNT2HKZDI8/vjjQV/U12wTV9BksBRvhJogV6fTcZJSYazVilE3Sj/77LNBO7Q7nc5xydhFk8FSxiKUBLkymQxz585lv1EAfvGLX/h8bVRhZmZmBt2dTU1N5XxGyx1GkiaDpYxGqAly8/PzIRCwGyh84cKFo86/jCpMhmHw0EMPBXxRqVSKJ598MuD3BUJZWRmKiopgtVrDLpASJfwIJUFueno6q+NMhmG8BDm/kzFj/qxatSrg/WmjzTaFit1uh16vx8WLF2GxWKgDOsVvgk2Qy7aDzMSJE8fUx5jCzM3NDfjCEonE52xTKLS2tiIrKws5OTlYvnw59eKhBEywCXLZGmeq1Wrs2rVrzPPGFGYwnvaPPvpoQOf7gzsZrNlspslgKUET7E6UH//4x6xcPy4uzi9j51f4yg0bNkCtVvt1YZFIFNIyy0hoMlgK2wSTIHfJkiUhp6f011oCXgI+++Lee+/F119/7dfFKyoqguoCj6SjowMFBQV4+umnacQ6CuucOXMGFy9eRGlp6ZjnOhwOyOVy+CkXryQnJ+PLL7/061y/HwEGgwEKhWLM8wghrIwvaTJYCtcEkiBXJpOFtPynVqtx+PBhv8/3W5ijLYYO5+bNmyELs6ioCOXl5bBardSLh8IpJpMJlZWVfoUlCTZmlUgkQn5+fkCzu34Lk2EY/PSnPx0z5Mj3vvc9vy8+EpvNhoyMDKjVapoMljIuBJIgN9iZWZVKhTfffDOg9wQ0ml23bt2Y3dmZM2cG1AA3TU1NyMrKgtFopGEkKeOKvwlytVptwB5AarUa+/fvD7gbHJAwtVot8vPzfVrNYGdk3clgLRYLTQZL4QV/wpI88sgjAdUpFouxaNGioCZCA57/3bZtm09PIIVCEdD4kiaDpYQTYyXITUlJCchi3nPPPTh48GBQbQlYmBqNBqtWrfJqmgOZkaXJYCnhyGhhSZKSkvxeLomLi8OxY8eCnskNasX09ddf92o1XS6XXw2pqKhAYWEhzGYzDSNJCStaWlpgNpt9Oh/4EwharVZjx44dIa1OBCVMmUyGoqIij4kgh8Mx6pSyO4wkTQZLCWdGS5A7Vhws97hy9erVIbXBb8+fkTgcDsyePRsXLly447iv6lpbW1FQUIC1a9fSiHWUiODo0aNQq9XYsmXL0LH4+Hhcu3bN6/kSiQQPP/wwjh8/HvJe5KCd/2QyGd555507Nnv6inZQW1uLgoICmEwmKkpKxOAtQe5oET1mz57NiiiBEIQJDMRDefbZZ4ca4q3/bTAYUFlZCYvFQpPBUiKOkQlyfW2YTklJwQcffMBa1I7Q3OUxMBHk9vYZvh3LnQz2vvvuo8lgKRHLyAS53nZZpaSkoLGxkdVQrSELUyaT4dChQ3eY+OHJYGkYSUqkMzxB7vAJT3dAALZFCbAgTGDAIygtLQ0zZsygyWApUYk7Qa5UKoVQKERsbCy0Wi0nogQCEKZlrQACwYgyvxxuX50lS5bAarWitbUVf//736kXDyXqKC0txX/+8x/I5XLk5OTg9OnTnGUaCGi5pK18PmZ8/grIniwAFqwVZGPvmjqQPVk4cOAAtmzZgsuXL7PeSK1Wy8oXkJ6ePvR3RUVFUHFGKRSRSIS+vj5OrxGAMNtQPn8GPn+FYM9AfGVY1gqQjbpBoVIo0U9HRwc6OjrueMgDACxrIcjeO/TvmjqCPViLtdiDYOTh/xiz7SgOnVqD3MGLtJXPR/beNJRtpqKk3D0kJSWNEGUbyucLIMgG6ggBGSy5tQIIslvw4IzgruO3MNuOHsIp7EX24PhyFSpByElspP4ClLuWgV7kJpThAtmD4SYqa08d1mAW7g9SH34Ksw1HD53CmrrBJ8KFMmDTDMwvpxM8lLuXtvJV2HQqDWWVG+Gpvxl4sCwXwfYn/RtjtpVj/ozP8cqwp4JlrQDZe9egbsSTgkK5Oxi0lrO4mWPxy2K2HT2EU2uGq78N51sApD2IILvQFEqEcwGfnwLSgh1EjoEfcTwGu7Gv3JalZe0MbDoFrKnzZsIplLuAtvPg0sl0TIspEAyIcG/2bceC7L1pKLtAgpoGplCigun3YxaAU59f8Pqypfy2800wBL0fk0K56xlcu0wru4CTQ8sTnuv9wUCFSaGEQls55s/YhFNDB9JQdiH0ZUQqTAolDGFldwmFQmEXKkwKJQz5/6GFNae7ZI7lAAAAAElFTkSuQmCC" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "source": [ + "segments.formula_segments" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "['ABC',\n", + " 'BC',\n", + " 'AB',\n", + " 'AC',\n", + " '\\\\bigtriangleup ABC',\n", + " 'I',\n", + " 'II',\n", + " 'III',\n", + " 'I,II,III',\n", + " 'p_1,p_2,p_3']" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 10, + "source": [ + "segments.ques_mark_segments" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "['\\\\SIFChoice']" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "orig_nbformat": 4, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/examples/sif/sif.ipynb b/examples/sif/sif.ipynb index 8bd80a7e..3376cd6d 100644 --- a/examples/sif/sif.ipynb +++ b/examples/sif/sif.ipynb @@ -627,4 +627,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/examples/sif/sif_addition.ipynb b/examples/sif/sif_addition.ipynb new file mode 100644 index 00000000..57830c43 --- /dev/null +++ b/examples/sif/sif_addition.ipynb @@ -0,0 +1,740 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# sif_addition" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "from EduNLP.SIF import is_sif, to_sif,sif4sci" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## is_sif" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + " text = '若$x,y$满足约束条件' \\\r\n", + " '$\\\\left\\\\{\\\\begin{array}{c}2 x+y-2 \\\\leq 0 \\\\\\\\ x-y-1 \\\\geq 0 \\\\\\\\ y+1 \\\\geq 0\\\\end{array}\\\\right.$,' \\\r\n", + " '则$z=x+7 y$的最大值$\\\\SIFUnderline$'\r\n", + " \r\n", + "is_sif(text)\r\n" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'\r\n", + "is_sif(text)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "False" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## to_sif" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "text = '某校一个课外学习小组为研究某作物的发芽率y和温度x(单位...'\r\n", + "to_sif(text)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'某校一个课外学习小组为研究某作物的发芽率$y$和温度$x$(单位...'" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## sif4sci\n", + " to_symbolize:\n", + " - \"t\": text\n", + " - \"f\": formula\n", + " - \"g\": figure\n", + " - \"m\": question mark" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "source": [ + " test_item = r\"如图所示,则$\\bigtriangleup ABC$的面积是$\\SIFBlank$。$\\FigureID{1}$\"\r\n", + " t1 = sif4sci(test_item)\r\n", + " t1" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['如图所示', '\\\\bigtriangleup', 'ABC', '面积', '\\\\SIFBlank', \\FigureID{1}]" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "source": [ + "t1.describe()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'t': 2, 'f': 2, 'g': 1, 'm': 1}" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "source": [ + "with t1.filter('fgm'):\n", + " print(t1)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['如图所示', '面积']\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 18, + "source": [ + "with t1.filter(keep='t'):\n", + " print(t1)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['如图所示', '面积']\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "source": [ + "with t1.filter():\n", + " print(t1)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['如图所示', '\\\\bigtriangleup', 'ABC', '面积', '\\\\SIFBlank', \\FigureID{1}]\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "source": [ + "t1.text_tokens" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['如图所示', '面积']" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 23, + "source": [ + "t1.formula_tokens" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['\\\\bigtriangleup', 'ABC']" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 24, + "source": [ + "t1.figure_tokens" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[\\FigureID{1}]" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 25, + "source": [ + "t1.ques_mark_tokens" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['\\\\SIFBlank']" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 26, + "source": [ + "sif4sci(test_item, symbol=\"gm\", tokenization_params={\"formula_params\": {\"method\": \"ast\"}})" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['如图所示', , '面积', '[MARK]', '[FIGURE]']" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 27, + "source": [ + "sif4sci(test_item, symbol=\"tfgm\")" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['[TEXT]', '[FORMULA]', '[TEXT]', '[MARK]', '[TEXT]', '[FIGURE]']" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 28, + "source": [ + "sif4sci(test_item, symbol=\"gm\", tokenization_params={\"formula_params\": {\"method\": \"ast\", \"return_type\": \"list\"}})" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['如图所示', '\\\\bigtriangleup', 'A', 'B', 'C', '面积', '[MARK]', '[FIGURE]']" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 29, + "source": [ + " test_item_1 = {\n", + " \"stem\": r\"若$x=2$, $y=\\sqrt{x}$,则下列说法正确的是$\\SIFChoice$\",\n", + " \"options\": [r\"$x < y$\", r\"$y = x$\", r\"$y < x$\"]\n", + " }" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 30, + "source": [ + " tls = [\n", + " sif4sci(e, symbol=\"gm\",\n", + " tokenization_params={\n", + " \"formula_params\": {\n", + " \"method\": \"ast\", \"return_type\": \"list\", \"ord2token\": True, \"var_numbering\": True,\n", + " \"link_variable\": False}\n", + " })\n", + " for e in ([test_item_1[\"stem\"]] + test_item_1[\"options\"])\n", + " ]" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 33, + "source": [ + "tls" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[['mathord_0', '=', 'textord', 'mathord_1', '=', 'mathord_0', '{ }', '\\\\sqrt', '说法', '正确', '[MARK]'],\n", + " ['mathord_0', '<', 'mathord_1'],\n", + " ['mathord_0', '=', 'mathord_1'],\n", + " ['mathord_0', '<', 'mathord_1']]" + ] + }, + "metadata": {}, + "execution_count": 33 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 34, + "source": [ + "tls[1:]" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[['mathord_0', '<', 'mathord_1'],\n", + " ['mathord_0', '=', 'mathord_1'],\n", + " ['mathord_0', '<', 'mathord_1']]" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 35, + "source": [ + "from EduNLP.utils import dict2str4sif\n", + "\n", + "test_item_1_str = dict2str4sif(test_item_1, tag_mode=\"head\", add_list_no_tag=False)\n", + "test_item_1_str " + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'$\\\\SIFTag{stem}$若$x=2$, $y=\\\\sqrt{x}$,则下列说法正确的是$\\\\SIFChoice$$\\\\SIFTag{options}$$x < y$$\\\\SIFSep$$y = x$$\\\\SIFSep$$y < x$'" + ] + }, + "metadata": {}, + "execution_count": 35 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 36, + "source": [ + "tl1 = sif4sci(\n", + " test_item_1_str, \n", + " symbol=\"gm\", \n", + " tokenization_params={\n", + " \"formula_params\": {\"method\": \"ast\", \"return_type\": \"list\", \"ord2token\": True}\n", + " })\n", + " " + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 37, + "source": [ + "tl1.get_segments()[0]" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['\\\\SIFTag{stem}']" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 38, + "source": [ + "tl1.get_segments()[1:3]" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[['[TEXT_BEGIN]', '[TEXT_END]'],\n", + " ['[FORMULA_BEGIN]', 'mathord', '=', 'textord', '[FORMULA_END]']]" + ] + }, + "metadata": {}, + "execution_count": 38 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 39, + "source": [ + "tl1.get_segments(add_seg_type=False)[0:3]" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[['\\\\SIFTag{stem}'],\n", + " ['mathord', '=', 'textord'],\n", + " ['mathord', '=', 'mathord', '{ }', '\\\\sqrt']]" + ] + }, + "metadata": {}, + "execution_count": 39 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 41, + "source": [ + "test_item_2 = {\"options\": [r\"$x < y$\", r\"$y = x$\", r\"$y < x$\"]}" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 42, + "source": [ + "test_item_2_str = dict2str4sif(test_item_2, tag_mode=\"head\", add_list_no_tag=False)" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 43, + "source": [ + "test_item_2_str" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'$\\\\SIFTag{options}$$x < y$$\\\\SIFSep$$y = x$$\\\\SIFSep$$y < x$'" + ] + }, + "metadata": {}, + "execution_count": 43 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 44, + "source": [ + "tl2 = sif4sci(test_item_2_str, symbol=\"gms\",\n", + " tokenization_params={\"formula_params\": {\"method\": \"ast\", \"return_type\": \"list\"}})\n", + "tl2 " + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['\\\\SIFTag{options}', 'x', '<', 'y', '[SEP]', 'y', '=', 'x', '[SEP]', 'y', '<', 'x']" + ] + }, + "metadata": {}, + "execution_count": 44 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 45, + "source": [ + "tl2.get_segments(add_seg_type=False)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[['\\\\SIFTag{options}'],\n", + " ['x', '<', 'y'],\n", + " ['[SEP]'],\n", + " ['y', '=', 'x'],\n", + " ['[SEP]'],\n", + " ['y', '<', 'x']]" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 46, + "source": [ + "tl2.get_segments(add_seg_type=False, drop=\"s\")" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[['\\\\SIFTag{options}'], ['x', '<', 'y'], ['y', '=', 'x'], ['y', '<', 'x']]" + ] + }, + "metadata": {}, + "execution_count": 46 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 47, + "source": [ + "tl3 = sif4sci(test_item_1[\"stem\"], symbol=\"gs\")\n", + "tl3.text_segments" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[['说法', '正确']]" + ] + }, + "metadata": {}, + "execution_count": 47 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 48, + "source": [ + "tl3.formula_segments" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[['x', '=', '2'], ['y', '=', '\\\\sqrt', '{', 'x', '}']]" + ] + }, + "metadata": {}, + "execution_count": 48 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 49, + "source": [ + "tl3.figure_segments" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 49 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 50, + "source": [ + "tl3.ques_mark_segments" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[['\\\\SIFChoice']]" + ] + }, + "metadata": {}, + "execution_count": 50 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [], + "outputs": [], + "metadata": {} + } + ], + "metadata": { + "orig_nbformat": 4, + "language_info": { + "name": "python", + "version": "3.8.5", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.8.5 64-bit" + }, + "interpreter": { + "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/examples/test_model/test_gensim_luna_stem_tf_d2v_256.bin b/examples/test_model/test_gensim_luna_stem_tf_d2v_256.bin new file mode 100644 index 00000000..7a56bca4 Binary files /dev/null and b/examples/test_model/test_gensim_luna_stem_tf_d2v_256.bin differ diff --git a/examples/tokenizer/test_stopwords.txt b/examples/tokenizer/test_stopwords.txt new file mode 100644 index 00000000..8183ecf4 --- /dev/null +++ b/examples/tokenizer/test_stopwords.txt @@ -0,0 +1,9 @@ +一旦 +一时 +一来 +一样 +一次 +一片 +一番 +一直 +一致 \ No newline at end of file diff --git a/examples/tokenizer/tokenizer.ipynb b/examples/tokenizer/tokenizer.ipynb new file mode 100644 index 00000000..4819b00d --- /dev/null +++ b/examples/tokenizer/tokenizer.ipynb @@ -0,0 +1,501 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Tokenizer\n", + "\n", + "## 概述\n", + "\n", + "为了方便后续向量化表征试题,本模块提供题目文本的令牌化解析(Tokenization),即将题目转换成令牌序列。 \n", + "\n", + "根据构成题目的元素类型,解析功能分为 **“文本解析”** 和 **“公式解析”** 两部分。\n", + "\n", + "### 文本解析\n", + "\n", + "根据题目文本切分粒度的大小,文本解析又分为 **“句解析”** 和 **“词解析”**。\n", + "\n", + "(1) 句解析(sentence-tokenization):将较长的文档切分成若干句子的过程称为“分句”。每个句子为一个“令牌”(token)。(待实现) \n", + " \n", + "\n", + "(2) 词解析(text-tokenization):一个句子(不含公式)是由若干“词”按顺序构成的,将一个句子切分为若干词的过程称为“词解析”。根据词的粒度大小,又可细分为“词组解析”和\"单字解析\"。\n", + "- 词组解析 (word-tokenization):每一个词组为一个“令牌”(token)。\n", + "- 单字解析 (char-tokenization):单个字符即为一个“令牌”(token)。\n", + "\n", + "### 公式解析\n", + "\n", + "公式解析(formula-tokenization):理科类文本中常常含有公式。将一个符合 latex 语法的公式切分为标记字符列表的过程称为“公式解析”。每个标记字符为一个“令牌”(token)。 \n", + " " + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 文本解析" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "### 句解析\n", + "\n", + "待实现..." + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "### 词解析\n", + "\n", + "词解析分为两个主要步骤: \n", + "\n", + "(1) 分词: \n", + "- 词组解析:使用分词工具切分并提取题目文本中的词。 \n", + " 本项目目前支持的分词工具有:`jieba` \n", + "- 单字解析:按字符划分。\n", + " \n", + " \n", + "(2) 筛选:过滤指定的停用词。 \n", + "- 本项目默认使用的停用词表:[stopwords](https://github.com/bigdata-ustc/EduNLP/blob/master/EduNLP/meta_data/sif_stopwords.txt) \n", + "- 你也可以使用自己的停用词表,具体使用方法见下面的示例。\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "source": [ + "# 导入模块\n", + "from EduNLP.SIF.tokenization.text import tokenize " + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "# 输入\n", + "text = \"三角函数是基本初等函数之一\"" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "#### 词组解析\n", + "\n", + "分词粒度参数选择 word: `granularity = \"word\"` " + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "# 输出:默认使用 EduNLP 项目提供的停用词表\n", + "tokenize(text, granularity=\"word\")" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['三角函数', '初等', '函数']" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "#### 单字解析\n", + "\n", + "分词粒度参数选择 word: `granularity = \"char\"` " + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "# 输出:默认使用 EduNLP 项目提供的停用词表\n", + "tokenize(text, granularity=\"char\")" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['三', '角', '函', '数', '基', '初', '函', '数']" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "#### 停用词表" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "# 获取自己的停用词表\n", + "spath = \"test_stopwords.txt\"\n", + "from EduNLP.SIF.tokenization.text.stopwords import get_stopwords\n", + "stopwords = get_stopwords(spath)\n", + "stopwords" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'一旦', '一时', '一来', '一样', '一次', '一片', '一番', '一直', '一致'}" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "# 输出:传入停用词表(stopwords)\n", + "tokenize(text,granularity=\"word\",stopwords=stopwords)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['三角函数', '是', '基本', '初等', '函数', '之一']" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 公式解析\n", + "切分出 latex 公式的每个标记符号。针对本模块更加详细的解释参见 [formula](../formula/formula.ipynb)" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "# 导入模块\n", + "from EduNLP.SIF.tokenization.formula import tokenize" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "- 输入" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "source": [ + "formula = \"\\\\frac{\\\\pi}{x + y} + 1 = x\"" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "- 输出" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "(1)如果您想按 latex 语法标记拆分公式的各个部分,并得到顺序序列结果,输出方法可以选择:`linear`" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "source": [ + "tokenize(formula, method=\"linear\")" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['\\\\frac', '{', '\\\\pi', '}', '{', 'x', '+', 'y', '}', '+', '1', '=', 'x']" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "(2) 如果您想得到公式解析出的语法分析树序列,输出方法可以选择:`ast`\n", + "> 抽象语法分析树,简称语法树(Syntax tree),是源代码语法结构的一种抽象表示。它以树状的形式表现编程语言的语法结构,树上的每个节点都表示源代码中的一种结构。 \n", + "> 因此,ast 可以看做是公式的语法结构表征。" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "source": [ + "tokenize(formula, method=\"ast\", return_type=\"list\", ord2token=False)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['\\\\pi', '{ }', 'x', '+', 'y', '{ }', '\\\\frac', '+', '1', '=', 'x']" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "(3)如果您只是关心公式的结构和类型,并不关心变量具体是什么,比如二元二次方程 `x^2 + y = 1` ,它从公式结构和类型上来说,和 `w^2 + z = 1` 没有区别。 \n", + "此时,您可以设置如下参数:`ord2token = True`,将公式变量名转换成 token" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "source": [ + "# 输出形式选择抽象语法分析树(ast)且将公式变量名转换成 token\n", + "tokenize(formula, method=\"ast\", return_type=\"list\", ord2token=True)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['mathord',\n", + " '{ }',\n", + " 'mathord',\n", + " '+',\n", + " 'mathord',\n", + " '{ }',\n", + " '\\\\frac',\n", + " '+',\n", + " 'textord',\n", + " '=',\n", + " 'mathord']" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "(4) 如果您除了 (3) 中提供的功能之外,还需要区分不同的变量。此时可以另外设置参数:`var_numbering=True`" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "source": [ + "# 输出形式选择抽象语法分析树(ast)且将公式变量名转换成带编号的 token\n", + "tokenize(formula, method=\"ast\", return_type=\"list\", ord2token=True, var_numbering=True)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['mathord_con',\n", + " '{ }',\n", + " 'mathord_0',\n", + " '+',\n", + " 'mathord_1',\n", + " '{ }',\n", + " '\\\\frac',\n", + " '+',\n", + " 'textord',\n", + " '=',\n", + " 'mathord_0']" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 综合解析\n", + "\n", + "综合解析,即综合以上两种解析方式(标记解析 + 公式解析),提供对题目文本的全解析。另外,如遇到特殊符号将转换成常量,例如:\n", + "```python\n", + "FIGURE_SYMBOL = \"[FIGURE]\" # $\\SIFChoice$\n", + "QUES_MARK_SYMBOL = \"[MARK]\" # $\\FigureID{1}$\n", + "```\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 39, + "source": [ + "# 导入模块\n", + "from EduNLP.Tokenizer import get_tokenizer\n", + "\n", + "# 输入\n", + "item = {\n", + " \"如图来自古希腊数学家希波克拉底所研究的几何图形.此图由三个半圆构成,三个半圆的直径分别为直角三角形$ABC$的斜边$BC$, 直角边$AB$, $AC$.$\\bigtriangleup ABC$的三边所围成的区域记为$I$,黑色部分记为$II$, 其余部分记为$III$.在整个图形中随机取一点,此点取自$I,II,III$的概率分别记为$p_1,p_2,p_3$,则$\\SIFChoice$$\\FigureID{1}$\"\n", + "}\n", + "\n", + "# 输出\n", + "tokenizer = get_tokenizer(\"text\")\n", + "tokens = tokenizer(item)\n", + "next(tokens) " + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['如图',\n", + " '古希腊',\n", + " '数学家',\n", + " '希波',\n", + " '克拉底',\n", + " '研究',\n", + " '几何图形',\n", + " '此图',\n", + " '三个',\n", + " '半圆',\n", + " '三个',\n", + " '半圆',\n", + " '直径',\n", + " '直角三角形',\n", + " 'ABC',\n", + " '斜边',\n", + " 'BC',\n", + " '直角',\n", + " 'AB',\n", + " 'AC',\n", + " '\\x08',\n", + " 'igtriangleupABC',\n", + " '三边',\n", + " '围成',\n", + " '区域',\n", + " '记',\n", + " 'I',\n", + " '黑色',\n", + " '记',\n", + " 'II',\n", + " '其余部分',\n", + " '记',\n", + " 'III',\n", + " '图形',\n", + " '中',\n", + " '随机',\n", + " '取',\n", + " '一点',\n", + " '此点',\n", + " '取自',\n", + " 'I',\n", + " ',',\n", + " 'II',\n", + " ',',\n", + " 'III',\n", + " '概率',\n", + " '记',\n", + " 'p',\n", + " '_',\n", + " '1',\n", + " ',',\n", + " 'p',\n", + " '_',\n", + " '2',\n", + " ',',\n", + " 'p',\n", + " '_',\n", + " '3',\n", + " '[MARK]',\n", + " '[FIGURE]']" + ] + }, + "metadata": {}, + "execution_count": 39 + } + ], + "metadata": {} + } + ], + "metadata": { + "orig_nbformat": 4, + "language_info": { + "name": "python", + "version": "3.8.5", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.8.5 64-bit" + }, + "interpreter": { + "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/examples/utils/data.ipynb b/examples/utils/data.ipynb new file mode 100644 index 00000000..d1045c66 --- /dev/null +++ b/examples/utils/data.ipynb @@ -0,0 +1,199 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# data" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "source": [ + "from EduNLP.utils import dict2str4sif" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/home/lvrui/.local/lib/python3.8/site-packages/gensim/similarities/__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.\n", + " warnings.warn(msg)\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "item = {\r\n", + " \"stem\": r\"若复数$z=1+2 i+i^{3}$,则$|z|=$\",\r\n", + " \"options\": ['0', '1', r'$\\sqrt{2}$', '2'],\r\n", + " }\r\n", + "item" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'stem': '若复数$z=1+2 i+i^{3}$,则$|z|=$',\n", + " 'options': ['0', '1', '$\\\\sqrt{2}$', '2']}" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "# 给题目各个部分加标签\r\n", + "dict2str4sif(item) # doctest: +ELLIPSIS" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'$\\\\SIFTag{stem_begin}$若复数$z=1+2 i+i^{3}$,则$|z|=$$\\\\SIFTag{stem_end}$$\\\\SIFTag{options_begin}$$\\\\SIFTag{list_0}$0$\\\\SIFTag{list_1}$1$\\\\SIFTag{list_2}$$\\\\sqrt{2}$$\\\\SIFTag{list_3}$2$\\\\SIFTag{options_end}$'" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "dict2str4sif(item, add_list_no_tag=True) # doctest: +ELLIPSIS" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'$\\\\SIFTag{stem_begin}$若复数$z=1+2 i+i^{3}$,则$|z|=$$\\\\SIFTag{stem_end}$$\\\\SIFTag{options_begin}$$\\\\SIFTag{list_0}$0$\\\\SIFTag{list_1}$1$\\\\SIFTag{list_2}$$\\\\sqrt{2}$$\\\\SIFTag{list_3}$2$\\\\SIFTag{options_end}$'" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "dict2str4sif(item, tag_mode=\"head\") # doctest: +ELLIPSIS" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'$\\\\SIFTag{stem}$若复数$z=1+2 i+i^{3}$,则$|z|=$$\\\\SIFTag{options}$$\\\\SIFTag{list_0}$0$\\\\SIFTag{list_1}$1$\\\\SIFTag{list_2}$$\\\\sqrt{2}$$\\\\SIFTag{list_3}$2'" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "source": [ + "dict2str4sif(item, tag_mode=\"tail\") # doctest: +ELLIPSIS" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'若复数$z=1+2 i+i^{3}$,则$|z|=$$\\\\SIFTag{stem}$$\\\\SIFTag{list_0}$0$\\\\SIFTag{list_1}$1$\\\\SIFTag{list_2}$$\\\\sqrt{2}$$\\\\SIFTag{list_3}$2$\\\\SIFTag{options}$'" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "source": [ + "dict2str4sif(item, add_list_no_tag=False) # doctest: +ELLIPSIS" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'$\\\\SIFTag{stem_begin}$若复数$z=1+2 i+i^{3}$,则$|z|=$$\\\\SIFTag{stem_end}$$\\\\SIFTag{options_begin}$0$\\\\SIFSep$1$\\\\SIFSep$$\\\\sqrt{2}$$\\\\SIFSep$2$\\\\SIFTag{options_end}$'" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "source": [ + "dict2str4sif(item, key_as_tag=False)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'若复数$z=1+2 i+i^{3}$,则$|z|=$0$\\\\SIFSep$1$\\\\SIFSep$$\\\\sqrt{2}$$\\\\SIFSep$2'" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ], + "metadata": {} + } + ], + "metadata": { + "orig_nbformat": 4, + "language_info": { + "name": "python", + "version": "3.8.5", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.8.5 64-bit" + }, + "interpreter": { + "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/examples/vectorization/get_pretrained_i2v.ipynb b/examples/vectorization/get_pretrained_i2v.ipynb new file mode 100644 index 00000000..9fe707b7 --- /dev/null +++ b/examples/vectorization/get_pretrained_i2v.ipynb @@ -0,0 +1,211 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# get_pretrained_i2v\n", + "\n", + "## 概述\n", + "\n", + "使用 EduNLP 项目组给定的预训练模型将给定的题目文本转成向量。\n", + "\n", + "- 优点:简单方便。\n", + "- 缺点:只能使用项目中给定的模型,局限性较大。\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 导入功能块" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "source": [ + "from EduNLP import get_pretrained_i2v" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 输入\n", + "\n", + "类型:str \n", + "内容:题目文本 (text)" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "item = {\n", + "\"如图来自古希腊数学家希波克拉底所研究的几何图形.此图由三个半圆构成,三个半圆的直径分别为直角三角形$ABC$的斜边$BC$, 直角边$AB$, $AC$.$\\bigtriangleup ABC$的三边所围成的区域记为$I$,黑色部分记为$II$, 其余部分记为$III$.在整个图形中随机取一点,此点取自$I,II,III$的概率分别记为$p_1,p_2,p_3$,则$\\SIFChoice$$\\FigureID{1}$\"\n", + "}\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 模型选择与使用" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "根据题目所属学科选择预训练模型: \n", + "\n", + " 预训练模型名称 | 模型训练数据的所属学科 \n", + " -------------- | ---------------------- \n", + " d2v_all_256 | 全学科 \n", + " d2v_sci_256 | 理科 \n", + " d2v_eng_256 | 英语 \n", + " d2v_lit_256 | 文科 \n", + "\n" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "i2v = get_pretrained_i2v(\"d2v_sci_256\")" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "EduNLP, INFO Use pretrained t2v model d2v_sci_256\n", + "downloader, INFO http://base.ustc.edu.cn/data/model_zoo/EduNLP/d2v/general_science_256.zip is saved as /home/lvrui/.EduNLP/model/general_science_256.zip\n", + "downloader, INFO file existed, skipped\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "- 注意:\n", + " 默认的 EduNLP 项目存储地址为根目录(`~/.EduNLP`),模型存储地址为项目存储地址下的 `model` 文件夹。您可以通过修改下面的环境变量来修改模型存储地址:\n", + " - EduNLP 项目存储地址:`EDUNLPPATH = xx/xx/xx`\n", + " - 模型存储地址:`EDUNLPMODELPATH = xx/xx/xx`" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "print(i2v(item))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "([array([-2.38860980e-01, 7.09681511e-02, -2.71706015e-01, 1.64714813e-01,\n", + " 2.81243492e-02, -1.82386801e-01, 9.22331214e-02, 1.31783364e-02,\n", + " 9.15176645e-02, 3.14464062e-01, 9.37800854e-02, -2.28523940e-01,\n", + " -2.60597020e-01, 6.49375990e-02, 9.75619778e-02, -1.97933778e-01,\n", + " 8.29798505e-02, -2.26491719e-01, -1.77030653e-01, -3.56038064e-02,\n", + " 6.22844934e-01, -2.66110301e-01, 8.00080523e-02, -1.60827965e-01,\n", + " -1.78654417e-01, -1.33000776e-01, 2.76004016e-01, 1.79546073e-01,\n", + " 8.71006995e-02, 2.33958483e-01, 1.76031828e-01, 1.55402005e-01,\n", + " -1.38987333e-01, -1.92975491e-01, -1.09528497e-01, 1.12305783e-01,\n", + " 2.32549626e-02, 7.75609687e-02, -2.43636876e-01, 6.35311157e-02,\n", + " -4.82399836e-02, -2.24204548e-02, 7.49862418e-02, -1.91449642e-01,\n", + " 9.72701237e-02, 4.00750965e-01, 2.81992704e-01, 3.07581365e-01,\n", + " -4.68867749e-01, -3.03025767e-02, -1.95257351e-01, 1.79073047e-02,\n", + " -2.15334237e-01, 9.98005569e-02, -2.62755096e-01, -2.39337608e-01,\n", + " 3.44270498e-01, 1.50241479e-01, -2.96006531e-01, -3.81666899e-01,\n", + " -1.19041964e-01, 6.18071109e-02, 6.49120063e-02, 9.94637012e-02,\n", + " 1.23297565e-01, 1.29930690e-01, 1.27305657e-01, -1.53804764e-01,\n", + " 7.04720244e-03, -1.33500487e-01, -1.51161134e-01, 1.13862932e-01,\n", + " -2.44814962e-01, -8.95622373e-02, 4.76458520e-02, -5.92206642e-02,\n", + " 2.88407020e-02, -5.88610955e-02, -4.25557904e-02, 3.20446432e-01,\n", + " -2.61463765e-02, 7.19539896e-02, -1.32161498e-01, 1.62227061e-02,\n", + " 1.20197656e-03, -2.03355268e-01, -6.83294982e-03, -2.82588631e-01,\n", + " -1.61395460e-01, -5.05547188e-02, -2.27462381e-01, -1.70932785e-01,\n", + " 1.41351461e-01, -1.30069017e-01, -1.83039993e-01, -6.79691881e-02,\n", + " -2.15642393e-01, -7.84436688e-02, 1.77202985e-01, 4.50607650e-02,\n", + " 7.02605024e-02, 8.01992565e-02, -1.55584306e-01, -2.00563252e-01,\n", + " 1.17082551e-01, 9.73844752e-02, -1.10356934e-01, -1.37866074e-02,\n", + " -8.57235789e-02, -5.56467362e-02, -9.36827138e-02, 6.82030804e-03,\n", + " 6.92379624e-02, -2.28701755e-01, 6.70390204e-02, 1.34586483e-01,\n", + " 2.25231394e-01, 1.33322045e-01, -8.82911906e-02, 1.42205298e-01,\n", + " 2.41012901e-01, 7.94170424e-03, -7.02124536e-02, 2.51370400e-01,\n", + " 1.04983136e-01, -6.39194548e-02, 5.24720028e-02, 7.16757867e-03,\n", + " -1.08169973e-01, -1.08731678e-02, 1.69618204e-02, 7.87692815e-02,\n", + " -2.26539060e-01, 3.29003595e-02, 1.91522852e-01, 2.75921494e-01,\n", + " -1.64055750e-01, 5.83723187e-02, 9.84422341e-02, 3.21688712e-01,\n", + " -2.62310840e-02, -2.08140060e-01, 1.14425711e-01, 1.23823956e-01,\n", + " -8.62085819e-03, -4.14005108e-02, -3.41566652e-02, 1.34680912e-01,\n", + " 4.27634180e-01, 1.42883554e-01, -1.54787973e-01, 7.96157196e-02,\n", + " 1.40678003e-01, 1.39171826e-02, 1.66003749e-01, -4.85638082e-02,\n", + " 5.88261709e-02, 9.51106697e-02, 1.81014258e-02, 1.44485429e-01,\n", + " 4.01205927e-01, 6.77596256e-02, -5.52676022e-01, -1.87850371e-01,\n", + " 1.12366609e-01, -6.84190989e-02, 9.48949978e-02, 2.23454669e-01,\n", + " -1.69843137e-01, 2.09085494e-01, 4.29946512e-01, -3.36349100e-01,\n", + " 6.12608856e-03, -1.46142125e-01, -5.11092655e-02, 8.06671828e-02,\n", + " 1.81744993e-01, -6.78945482e-02, -5.77093139e-02, 1.52337164e-01,\n", + " 2.21259117e-01, 3.35705757e-01, -2.51778495e-02, 1.03662543e-01,\n", + " -4.21361588e-02, 1.43061429e-01, -3.92947495e-01, -4.89463992e-02,\n", + " -9.15660262e-02, -1.00108273e-01, 3.86523217e-01, -4.25569601e-02,\n", + " 4.10154127e-02, -3.41399819e-01, 2.13903114e-02, 8.09015241e-03,\n", + " 9.56344381e-02, 1.12729572e-01, 7.25207478e-02, -6.64384067e-02,\n", + " -2.73666024e-01, -2.79651750e-02, 1.18422434e-01, -5.22459708e-02,\n", + " -2.47057881e-02, 2.84700710e-02, 2.07451075e-01, -9.74238589e-02,\n", + " 8.08936954e-02, 4.07307222e-02, -1.35277033e-01, 2.18436554e-01,\n", + " 1.28792310e-02, -1.20433331e-01, 2.41929386e-02, 1.28128864e-02,\n", + " -7.39881098e-02, -1.12995692e-01, 7.69245178e-02, -2.87000872e-02,\n", + " 1.64782573e-02, -2.78794408e-01, -2.64403820e-01, -2.43874848e-01,\n", + " 1.77457914e-01, 4.11631197e-01, -6.09753132e-02, 2.84967333e-01,\n", + " 9.81074646e-02, -2.68213183e-01, 1.52153388e-01, 2.42148209e-02,\n", + " 1.24371536e-01, 6.02926640e-03, 8.22689310e-02, 2.82294262e-04,\n", + " -1.40584474e-02, 4.09389734e-02, -2.58334547e-01, -9.83026102e-02,\n", + " -1.91695184e-01, -2.61005852e-02, -2.21736208e-01, -4.36628833e-02,\n", + " 9.49840024e-02, -5.16017936e-02, 2.17577979e-01, 2.58604765e-01,\n", + " 6.33814484e-02, -7.10158283e-03, 9.87893157e-03, -2.26405971e-02,\n", + " 1.67435139e-01, 2.90897069e-03, 2.35914681e-02, 5.43428905e-06],\n", + " dtype=float32)], None)\n" + ] + } + ], + "metadata": {} + } + ], + "metadata": { + "orig_nbformat": 4, + "language_info": { + "name": "python", + "version": "3.8.5", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.8.5 64-bit" + }, + "interpreter": { + "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/examples/vectorization/get_pretrained_t2v.ipynb b/examples/vectorization/get_pretrained_t2v.ipynb new file mode 100644 index 00000000..c0982e81 --- /dev/null +++ b/examples/vectorization/get_pretrained_t2v.ipynb @@ -0,0 +1,160 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# get_pretrained_t2v\n", + "\n", + "## 概述\n", + "\n", + "使用 EduNLP 项目组给定的预训练模型将一组题目的切分序列表征为向量。\n", + "\n", + "- 优点:简单方便。\n", + "- 缺点:只能使用项目中给定的模型,局限性较大。" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 导入功能块" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "source": [ + "from tqdm import tqdm\n", + "from EduNLP.SIF.segment import seg\n", + "from EduNLP.SIF.tokenization import tokenize\n", + "from EduNLP.Pretrain import GensimWordTokenizer\n", + "from EduNLP.Vector import get_pretrained_t2v" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 输入\n", + "\n", + "类型:list \n", + "内容:一个题组中每个题目切分序列的组合。\n", + "> 这里需要调用 `GensimWordTokenizer` 将题目文本(`str` 类型)转换成 tokens。" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "def load_items():\n", + " test_items = [\n", + " {'ques_content':'有公式$\\\\FormFigureID{wrong1?}$和公式$\\\\FormFigureBase64{wrong2?}$,如图$\\\\FigureID{088f15ea-8b7c-11eb-897e-b46bfc50aa29}$,若$x,y$满足约束条件$\\\\SIFSep$,则$z=x+7 y$的最大值为$\\\\SIFBlank$'},\n", + " {'ques_content':'如图$\\\\FigureID{088f15ea-8b7c-11eb-897e-b46bfc50aa29}$,若$x,y$满足约束条件$\\\\SIFSep$,则$z=x+7 y$的最大值为$\\\\SIFBlank$'},\n", + " {'ques_content':'
Below is a discussion on a website.
t2v\n", + "t2v = get_pretrained_t2v(\"d2v_sci_256\")" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "downloader, INFO http://base.ustc.edu.cn/data/model_zoo/EduNLP/d2v/general_science_256.zip is saved as /home/lvrui/.EduNLP/model/general_science_256.zip\n", + "downloader, INFO file existed, skipped\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "- 注意:\n", + " 默认的 EduNLP 项目存储地址为根目录(`~/.EduNLP`),模型存储地址为项目存储地址下的 `model` 文件夹。您可以通过修改下面的环境变量来修改模型存储地址:\n", + " - EduNLP 项目存储地址:`EDUNLPPATH = xx/xx/xx`\n", + " - 模型存储地址:`EDUNLPMODELPATH = xx/xx/xx`" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "t2v(token_items)" + ], + "outputs": [], + "metadata": {} + } + ], + "metadata": { + "orig_nbformat": 4, + "language_info": { + "name": "python", + "version": "3.8.5", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.8.5 64-bit" + }, + "interpreter": { + "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/examples/vectorization/i2v.ipynb b/examples/vectorization/i2v.ipynb new file mode 100644 index 00000000..3122fbce --- /dev/null +++ b/examples/vectorization/i2v.ipynb @@ -0,0 +1,192 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# I2V\n", + "\n", + "## 概述\n", + "\n", + "使用自己提供的任一预训练模型(给出模型存放路径即可)将给定的题目文本转成向量。\n", + "\n", + "- 优点:可以使用自己的模型,另可调整训练参数,灵活性强。" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 导入类" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "source": [ + "from EduNLP.I2V import D2V" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 输入\n", + "\n", + "类型:str \n", + "内容:题目文本 (text)" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 37, + "source": [ + "item = {\n", + "\"如图来自古希腊数学家希波克拉底所研究的几何图形.此图由三个半圆构成,三个半圆的直径分别为直角三角形$ABC$的斜边$BC$, 直角边$AB$, $AC$.$\\bigtriangleup ABC$的三边所围成的区域记为$I$,黑色部分记为$II$, 其余部分记为$III$.在整个图形中随机取一点,此点取自$I,II,III$的概率分别记为$p_1,p_2,p_3$,则$\\SIFChoice$$\\FigureID{1}$\"\n", + "}" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 输出" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 34, + "source": [ + "model_path = \"../test_model/test_gensim_luna_stem_tf_d2v_256.bin\"\n", + "i2v = D2V(\"text\",\"d2v\",filepath=model_path, pretrained_t2v = False)\n", + "i2v " + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 35, + "source": [ + "i2v(item)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "([array([ 4.76559885e-02, -1.60574958e-01, 1.94614579e-03, 2.40295693e-01,\n", + " 2.24517003e-01, -3.24351490e-02, 4.35789041e-02, -1.65670961e-02,\n", + " -7.77302235e-02, 4.23757173e-02, 4.62658405e-02, 7.54115507e-02,\n", + " -4.54682261e-02, -1.82153687e-01, 5.55203669e-02, 4.23391759e-02,\n", + " 8.86691213e-02, 6.97413310e-02, -2.47167766e-01, 2.54209518e-01,\n", + " -3.76413465e-02, 3.58376503e-02, -1.39907554e-01, -8.55517760e-02,\n", + " -1.62535697e-01, -4.44540828e-02, -3.99694731e-03, 1.83905549e-02,\n", + " -8.03738683e-02, -9.05910060e-02, 1.45633578e-01, 9.63102728e-02,\n", + " -7.19666481e-02, -8.49684048e-03, -1.51718438e-01, -1.46381939e-02,\n", + " 8.34727809e-02, -7.11122975e-02, 1.66607365e-01, -1.14558250e-01,\n", + " -1.72963589e-01, 4.86062802e-02, -1.63086802e-02, -3.68945636e-02,\n", + " 2.46143237e-01, 5.40899672e-03, 5.04904091e-02, 1.16586924e-01,\n", + " 7.59096816e-02, 1.20751150e-02, 1.04407202e-02, 3.19544263e-02,\n", + " -6.02783300e-02, 1.18572332e-01, -2.19343737e-01, 2.67594811e-02,\n", + " 1.01860933e-01, -2.87170410e-02, 5.16606905e-02, 1.62313670e-01,\n", + " -5.12879491e-02, -1.62193626e-02, -6.77167401e-02, 1.67254247e-02,\n", + " 1.10977821e-01, 8.02466944e-02, -2.00764649e-02, 1.28788516e-01,\n", + " -7.20706284e-02, -6.22547232e-02, 1.06899485e-01, 4.60059335e-03,\n", + " -1.99650228e-01, -1.38489634e-01, 7.20307231e-02, -4.98757213e-02,\n", + " -1.94095057e-02, -5.85906627e-03, 1.47433639e-01, 4.68258560e-02,\n", + " 9.31144804e-02, -4.59938832e-02, 3.38427201e-02, 4.83937971e-02,\n", + " -1.27312467e-01, 2.01561809e-01, 1.10482745e-01, -1.70595810e-01,\n", + " -9.55015421e-02, -7.73611516e-02, 4.43056040e-02, -1.65684260e-02,\n", + " 1.65379923e-02, -1.26138464e-01, 8.31304193e-02, 2.06687212e-01,\n", + " -1.69529378e-01, 3.43789416e-03, 1.19198427e-01, -1.38129979e-01,\n", + " -1.87937781e-01, -8.27087983e-02, -1.76488962e-02, 8.51018950e-02,\n", + " 8.15693215e-02, 2.30262652e-02, 1.05074964e-01, 3.13350782e-02,\n", + " 1.53877333e-01, 1.01772640e-02, 9.17675197e-02, -1.32400826e-01,\n", + " 5.29836975e-02, 2.52282787e-02, -6.19753152e-02, -5.56256585e-02,\n", + " 3.87686864e-02, 4.30755690e-02, 7.57815093e-02, 2.63280701e-02,\n", + " 4.59217802e-02, -1.17288530e-01, 1.76368475e-01, 9.27482091e-04,\n", + " 2.64808517e-02, 9.73805785e-03, 1.90501258e-01, 1.02596413e-02,\n", + " -5.55249080e-02, -1.17555618e-01, -9.98716354e-02, 1.28057361e-01,\n", + " -4.52451073e-02, 7.51599446e-02, -3.01250312e-02, 6.24186322e-02,\n", + " 5.77449016e-02, 2.07213312e-02, -2.53734970e-03, -1.69801563e-01,\n", + " -2.28750743e-02, -2.55512260e-02, 1.70693725e-01, 2.35232189e-01,\n", + " -2.71384805e-01, -1.84327438e-01, 4.16823551e-02, 8.70332569e-02,\n", + " 1.82847306e-01, 2.76729286e-01, -4.31840494e-02, -1.38212308e-01,\n", + " -3.26297544e-02, -4.25132550e-02, -1.62892416e-01, 1.91870285e-03,\n", + " 1.52552709e-01, -1.01523520e-02, -9.16219354e-02, -5.46490997e-02,\n", + " 6.06994517e-02, -6.42470419e-02, 7.96310753e-02, -5.70830703e-02,\n", + " -8.82780831e-03, -3.94574478e-02, 9.63162258e-02, 1.54309124e-01,\n", + " 1.81100428e-01, 8.63620341e-02, 1.56518817e-02, -4.08006124e-02,\n", + " 5.20652272e-02, 8.38029310e-02, -1.55516326e-01, 3.57730500e-03,\n", + " -1.50946556e-02, 2.84812655e-02, 1.37905419e-01, 8.77659023e-02,\n", + " 8.23542774e-02, -1.04377635e-01, 4.80731949e-03, 1.18891411e-02,\n", + " 9.32120830e-02, 7.88019150e-02, -1.44494563e-01, -7.53350407e-02,\n", + " -1.13602541e-01, 5.43805361e-02, 1.64935380e-01, -2.00515296e-02,\n", + " 1.92917317e-01, -4.35359031e-02, 8.92477036e-02, -4.37481068e-02,\n", + " 4.01461311e-02, -2.59898454e-01, -1.11872263e-01, -1.25746787e-01,\n", + " -2.34577611e-01, -6.69524372e-02, 5.55978045e-02, -1.91931397e-01,\n", + " 5.87355606e-02, 1.01886272e-01, -2.64038593e-01, -2.05450356e-02,\n", + " -1.97510555e-01, 9.13371146e-02, 1.49546817e-01, -3.91026959e-02,\n", + " 5.94646595e-02, 1.29657034e-02, -3.72891256e-04, 5.56622408e-02,\n", + " 1.61776438e-01, 2.29037628e-02, -1.94774106e-01, -5.02247922e-02,\n", + " -5.45939505e-02, 5.31783216e-02, 1.26433298e-01, -1.23263724e-01,\n", + " 8.53074417e-02, -1.41412809e-01, -7.71067888e-02, 1.21865064e-01,\n", + " 4.73318882e-02, 7.20091909e-02, -9.83269960e-02, 1.99413914e-02,\n", + " -1.88907124e-02, -2.14710683e-02, -4.93260436e-02, 1.64937660e-01,\n", + " -1.07827298e-01, -7.75848776e-02, -6.23578345e-03, -1.05760902e-01,\n", + " -4.14819457e-02, 5.95730543e-02, 4.11023498e-02, -2.18305327e-02,\n", + " -2.30057724e-02, -3.34391668e-02, 1.30382255e-01, 5.10290638e-02,\n", + " -1.21569566e-01, -1.23630039e-01, -1.83883369e-01, 1.10945016e-01,\n", + " -1.05633408e-01, -8.24846700e-02, -3.76710802e-01, -4.50239740e-02],\n", + " dtype=float32)],\n", + " None)" + ] + }, + "metadata": {}, + "execution_count": 35 + } + ], + "metadata": {} + } + ], + "metadata": { + "orig_nbformat": 4, + "language_info": { + "name": "python", + "version": "3.8.5", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.8.5 64-bit" + }, + "interpreter": { + "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/vectorization/t2v.ipynb b/examples/vectorization/t2v.ipynb new file mode 100644 index 00000000..908ff182 --- /dev/null +++ b/examples/vectorization/t2v.ipynb @@ -0,0 +1,261 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# T2V\n", + "\n", + "## 概述\n", + "\n", + "使用自己提供的任一预训练模型(给出模型存放路径即可)将一组题目的切分序列表征为向量。\n", + "\n", + "- 优点:模型及其参数可自主调整,灵活性强。\n" + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 导入功能块" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "source": [ + "from tqdm import tqdm\n", + "from EduNLP.SIF.segment import seg\n", + "from EduNLP.SIF.tokenization import tokenize\n", + "from EduNLP.Pretrain import GensimWordTokenizer\n", + "from EduNLP.Vector import T2V" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 输入\n", + "\n", + "类型:list \n", + "内容:一个题组中每个题目切分序列的组合。\n", + "> 这里需要调用 `GensimWordTokenizer` 将题目文本(`str` 类型)转换成 tokens。" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "source": [ + "print(type(token_items))\n", + "print(type(token_items[0]))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "token_items[0]" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['公式',\n", + " '[FORMULA]',\n", + " '公式',\n", + " '[FORMULA]',\n", + " '如图',\n", + " '[FIGURE]',\n", + " 'x',\n", + " ',',\n", + " 'y',\n", + " '约束条件',\n", + " '[SEP]',\n", + " 'z',\n", + " '=',\n", + " 'x',\n", + " '+',\n", + " '7',\n", + " 'y',\n", + " '最大值',\n", + " '[MARK]']" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## 输出" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "path = \"../test_model/test_gensim_luna_stem_tf_d2v_256.bin\"\n", + "t2v = T2V('d2v',filepath=path)\n", + "t2v(token_items)" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[array([ 0.0256574 , 0.06061139, -0.00121044, -0.0167674 , -0.0111706 ,\n", + " 0.05325712, -0.02097339, -0.01613594, 0.02904145, 0.0185046 ,\n", + " 0.03473525, 0.00628165, 0.03696947, 0.00666153, -0.02352318,\n", + " -0.00458236, 0.02308686, -0.02153478, 0.01579256, -0.01575841,\n", + " -0.02654778, 0.01376328, 0.02539059, -0.01098955, 0.02203193,\n", + " -0.01503642, 0.01310026, -0.03569775, -0.00450978, 0.02522727,\n", + " -0.01547103, -0.00907244, -0.00072009, -0.0021727 , 0.02894731,\n", + " 0.01382611, 0.01647377, 0.00452782, -0.02488854, 0.02741116,\n", + " 0.0489724 , -0.04156181, -0.00855933, 0.01783935, 0.00704233,\n", + " 0.01296936, -0.06078439, -0.04922014, -0.0206639 , 0.00820663,\n", + " 0.02565274, 0.0164784 , 0.00996537, -0.02215545, 0.06741589,\n", + " 0.01634789, -0.0094168 , 0.00183323, 0.00853508, -0.0547929 ,\n", + " 0.00405556, 0.01386227, -0.04204945, 0.02175955, -0.01960315,\n", + " -0.05279269, -0.01511251, -0.02905018, -0.00405249, 0.03328003,\n", + " -0.00487469, -0.00338632, 0.01793213, 0.00942458, -0.02468935,\n", + " 0.03548338, -0.00907473, 0.00927462, -0.02545504, 0.02286367,\n", + " -0.01822809, 0.03625014, -0.00976438, -0.00188348, 0.06408882,\n", + " -0.04314236, -0.00193059, 0.02433112, -0.0091018 , 0.0276503 ,\n", + " -0.0036342 , -0.02485391, 0.02309245, 0.01880057, -0.00893952,\n", + " -0.03391525, 0.02678591, -0.00618519, -0.03601262, 0.0327184 ,\n", + " 0.09240578, 0.03631649, -0.00700663, -0.01786321, -0.02987848,\n", + " 0.00315695, -0.02082208, -0.00494443, -0.02717963, -0.00938541,\n", + " -0.0329605 , 0.0069218 , 0.01227082, 0.00856757, -0.0008222 ,\n", + " -0.0067637 , -0.01577486, 0.0628339 , -0.02329138, -0.00475964,\n", + " 0.02197625, 0.03022351, 0.00256966, -0.00247619, -0.01218352,\n", + " 0.01257284, 0.0051926 , -0.05297434, -0.0057066 , 0.01031242,\n", + " 0.02414824, -0.0115857 , 0.01625632, -0.03126714, -0.02389767,\n", + " -0.01417263, 0.02280749, -0.01431546, -0.00771551, 0.0264634 ,\n", + " 0.00115387, -0.01903204, -0.00100629, 0.00608774, 0.03787961,\n", + " 0.05098663, 0.03064756, -0.00654223, -0.01838502, -0.01889201,\n", + " 0.04686983, -0.02295219, -0.00901293, 0.00916024, -0.00013042,\n", + " 0.01236307, -0.00918534, 0.01792936, 0.00862702, -0.00018518,\n", + " -0.00566689, 0.00499178, 0.0246148 , -0.0170825 , 0.01850726,\n", + " 0.00031357, 0.02411471, 0.01080729, -0.01361136, -0.06226439,\n", + " 0.01830878, 0.01209503, -0.00980596, -0.01865078, 0.03692432,\n", + " -0.04503555, 0.0037965 , -0.04214804, -0.05657932, -0.01566005,\n", + " 0.00271924, -0.00026349, -0.00783886, 0.01218421, -0.03205092,\n", + " -0.02793218, -0.00298462, 0.00380523, 0.04471321, -0.02079478,\n", + " 0.0100926 , 0.00450996, -0.03412817, 0.03027697, 0.00872989,\n", + " 0.01512562, 0.01527565, 0.03683509, 0.05608684, 0.01055199,\n", + " 0.01637757, -0.01995301, -0.01610573, 0.04207385, 0.00058077,\n", + " 0.03846577, 0.04952911, -0.02142448, 0.0049874 , -0.00308159,\n", + " -0.02233348, 0.02013967, -0.01194606, -0.02481469, 0.01824989,\n", + " -0.00939436, -0.00374474, 0.02278485, 0.04107878, 0.01870474,\n", + " -0.00310527, -0.00257802, -0.03689042, -0.0200304 , -0.04838364,\n", + " 0.0035307 , 0.02496746, -0.0385387 , 0.01649689, 0.01429029,\n", + " 0.04338812, -0.05614391, -0.01632982, 0.03378268, 0.01393604,\n", + " -0.03859077, 0.01855484, 0.00241599, -0.00985778, 0.00530987,\n", + " 0.03700508, -0.06107654, -0.00972089, 0.02251891, 0.01154722,\n", + " 0.00913082, -0.0267815 , -0.01723521, 0.0136464 , 0.01965802,\n", + " 0.04769301, -0.02218902, -0.01268643, 0.00650465, 0.00985247,\n", + " 0.0029873 ], dtype=float32),\n", + " array([ 0.00877787, 0.03242666, -0.00026327, -0.01881958, -0.00730135,\n", + " 0.03559063, -0.01825701, -0.01065201, 0.01681685, 0.01074173,\n", + " 0.02253641, 0.0082016 , 0.02200216, 0.00088347, -0.0205142 ,\n", + " -0.01339685, 0.01239092, -0.01781665, 0.01000167, -0.01227449,\n", + " -0.03044926, 0.00296532, 0.01440197, -0.01035894, 0.01061506,\n", + " -0.00530907, 0.00484147, -0.02209524, 0.00735557, 0.01712263,\n", + " -0.00231011, -0.01255511, -0.00114341, -0.01413104, 0.02112199,\n", + " 0.01123461, 0.01380601, -0.00019924, -0.02128731, 0.01526375,\n", + " 0.02988552, -0.02491145, -0.00939747, 0.00798917, 0.0135474 ,\n", + " 0.01258122, -0.03753063, -0.04039029, -0.01517935, 0.00668549,\n", + " 0.02796665, 0.01242495, 0.0059546 , -0.01216253, 0.0372387 ,\n", + " 0.01762399, -0.00170241, 0.0003667 , 0.00895109, -0.03517802,\n", + " -0.00762667, 0.01357641, -0.02436312, 0.01829541, -0.01330634,\n", + " -0.02818829, -0.01139517, -0.01664645, 0.00769452, 0.01209339,\n", + " -0.00416979, -0.01296107, -0.0064631 , 0.0050506 , -0.01833598,\n", + " 0.02872021, -0.00062401, 0.0109796 , -0.01280711, 0.01152301,\n", + " -0.01085931, 0.02023655, 0.00272896, -0.00558658, 0.03704501,\n", + " -0.01837787, -0.00414707, 0.00713773, -0.01023714, 0.0090292 ,\n", + " 0.00089387, -0.01082103, 0.02051528, 0.01287969, -0.0074691 ,\n", + " -0.01942614, 0.01223695, -0.0136801 , -0.01567431, 0.01466064,\n", + " 0.04967042, 0.02889016, -0.005946 , -0.00131571, -0.0110809 ,\n", + " 0.00165396, -0.01279759, -0.01407798, -0.01902512, -0.01361593,\n", + " -0.00631681, -0.00142478, 0.01678663, 0.00815052, -0.00193329,\n", + " -0.00845464, -0.00746565, 0.03766166, -0.01099476, 0.00489809,\n", + " 0.01403449, 0.01477709, -0.00150515, 0.00462877, -0.01271886,\n", + " 0.00072193, 0.00815068, -0.04432011, -0.00604029, -0.00264471,\n", + " 0.01325564, -0.01315497, 0.00713541, -0.0137267 , -0.01845939,\n", + " -0.02801731, 0.01673851, -0.00593479, -0.01457028, 0.01636872,\n", + " -0.00751132, -0.01056858, 0.01126528, 0.01645665, 0.02689397,\n", + " 0.01920939, 0.01767929, -0.00843761, -0.01002457, -0.00844629,\n", + " 0.02888541, -0.00503441, -0.00025836, 0.01326172, -0.00968244,\n", + " 0.00430614, -0.00964946, 0.00635843, 0.00445558, -0.00235765,\n", + " 0.00160239, -0.00325711, 0.03206096, -0.00511734, 0.01108837,\n", + " 0.0014369 , 0.02616214, 0.01631057, -0.00778238, -0.04322761,\n", + " -0.00086197, 0.01174034, -0.00230315, -0.01354581, 0.01665967,\n", + " -0.02281472, -0.0123808 , -0.02901287, -0.04143119, -0.00477564,\n", + " 0.00608404, -0.00701787, -0.00686041, 0.01422733, -0.02854553,\n", + " -0.01464688, -0.00404892, 0.00348112, 0.02299088, -0.02302668,\n", + " 0.01208024, 0.01010513, -0.01571813, 0.01446694, -0.00129136,\n", + " -0.00054684, -0.00328883, 0.01649218, 0.03326375, -0.00185443,\n", + " 0.02091988, -0.00814938, -0.0088084 , 0.02302703, -0.01156406,\n", + " 0.04080933, 0.02902327, -0.01330268, -0.00385899, -0.00826302,\n", + " -0.02295679, 0.00658087, -0.0056047 , -0.01404469, 0.00368797,\n", + " -0.01484573, 0.00689151, 0.02035506, 0.02181732, 0.02151672,\n", + " 0.0004279 , -0.00763045, -0.01551796, -0.02054572, -0.03275407,\n", + " 0.00623783, 0.007831 , -0.02604559, 0.01956206, 0.0161521 ,\n", + " 0.02634443, -0.03285164, -0.01301691, 0.01066694, 0.01585914,\n", + " -0.0187955 , 0.01046878, -0.00189302, -0.01132144, -0.00140048,\n", + " 0.02645635, -0.04300842, -0.00639437, 0.01285532, -0.00437311,\n", + " 0.01163111, -0.015357 , -0.00531165, 0.01102756, 0.00182517,\n", + " 0.02303016, -0.00949884, -0.02009463, 0.00573564, 0.00076009,\n", + " 0.00078505], dtype=float32)]" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ], + "metadata": {} + } + ], + "metadata": { + "orig_nbformat": 4, + "language_info": { + "name": "python", + "version": "3.8.5", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.8.5 64-bit" + }, + "interpreter": { + "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file