bigdata-ustc · BAOOOOOM · Sep 11, 2021 · Aug 24, 2021 · Aug 25, 2021 · Sep 2, 2021
diff --git a/AUTHORS.md b/AUTHORS.md
@@ -16,5 +16,7 @@
 
 [Meikai Bao](https://github.com/BAOOOOOM)
 
+[Yuting Ning](https://github.com/nnnyt)
+
 
 The stared contributors are the corresponding authors. 
diff --git a/EduNLP/Formula/Formula.py b/EduNLP/Formula/Formula.py
@@ -15,6 +15,17 @@
 
 class Formula(object):
     """
+
+    Parameters
+    ----------
+    formula: str or List[Dict]
+        latex formula string or the parsed abstracted syntax tree
+    variable_standardization
+    const_mathord
+    init
+    args
+    kwargs
+
     Examples
     --------
     >>> f = Formula("x")
@@ -34,21 +45,8 @@ class Formula(object):
     --------
     the parsed abstracted syntax tree
     """
-
     def __init__(self, formula: (str, List[Dict]), variable_standardization=False, const_mathord=None,
                  init=True, *args, **kwargs):
-        """
-
-        Parameters
-        ----------
-        formula: str or List[Dict]
-            latex formula string or the parsed abstracted syntax tree
-        variable_standardization
-        const_mathord
-        init
-        args
-        kwargs
-        """
         self._formula = formula
         self._ast = None
         if init is True:
@@ -131,6 +129,15 @@ def resetable(self):
 
 class FormulaGroup(object):
     """
+
+    Parameters
+    ----------
+    formula: str or List[Dict] or List[Formula]
+        latex formula string or the parsed abstracted syntax tree or a group of parsed abstracted syntax tree
+    variable_standardization
+    const_mathord
+    detach
+
     Examples
     ---------
     >>> fg = FormulaGroup(["x + y", "y + x", "z + x"])
@@ -141,8 +148,9 @@ class FormulaGroup(object):
     <FormulaGroup: <Formula: x + y>;<Formula: y + x>;<Formula: z + x>>
     >>> fg = FormulaGroup(["x", Formula("y"), "x"])
     >>> fg.elements
-    [{'id': 0, 'type': 'mathord', 'text': 'x', 'role': None}, {'id': 1, 'type': 'mathord', 'text': 'y', 'role': None},\
- {'id': 2, 'type': 'mathord', 'text': 'x', 'role': None}]
+    [{'id': 0, 'type': 'mathord', 'text': 'x', 'role': None}, \
+{'id': 1, 'type': 'mathord', 'text': 'y', 'role': None}, \
+{'id': 2, 'type': 'mathord', 'text': 'x', 'role': None}]
     >>> fg = FormulaGroup(["x", Formula("y"), "x"], variable_standardization=True)
     >>> fg.elements
     [{'id': 0, 'type': 'mathord', 'text': 'x', 'role': None, 'var': 0}, \
@@ -153,24 +161,12 @@ class FormulaGroup(object):
     --------
     the parsed abstracted syntax forest
     """
-
     def __init__(self,
                  formula_list: (list, List[str], List[Formula]),
                  variable_standardization=False,
                  const_mathord=None,
                  detach=True
                  ):
-        """
-
-        Parameters
-        ----------
-        formula: str or List[Dict] or List[Formula]
-            latex formula string or the parsed abstracted syntax tree or a group of parsed abstracted syntax tree
-        variable_standardization
-        const_mathord
-        detach
-
-        """
         forest = []
         self._formulas = []
         for formula in formula_list:
@@ -261,7 +257,8 @@ def link_formulas(*formula: Formula, link_vars=True, **kwargs):
 
     Parameters
     ----------
-    formula:the parsed abstracted syntax tree
+    formula
+        the parsed abstracted syntax tree
     link_vars
     kwargs
     """

diff --git a/EduNLP/I2V/i2v.py b/EduNLP/I2V/i2v.py
@@ -16,7 +16,7 @@ class I2V(object):
     If you want to get vector from item, you can use other model like D2V and W2V.
 
     Parameters
-    ----------
+    -----------
     tokenizer: str
         the tokenizer name
     t2v: str
@@ -26,8 +26,11 @@ class I2V(object):
     tokenizer_kwargs: dict
         the parameters passed to tokenizer
     pretrained_t2v: bool
+
         True: use pretrained t2v model
+
         False: use your own t2v model
+
     kwargs:
         the parameters passed to t2v
 
@@ -39,33 +42,13 @@ class I2V(object):
     >>> model_path = "examples/test_model/test_gensim_luna_stem_tf_d2v_256.bin" # doctest: +ELLIPSIS
     >>> i2v = D2V("text","d2v",filepath=model_path, pretrained_t2v = False) # doctest: +ELLIPSIS
     >>> i2v(item) # doctest: +ELLIPSIS
-    ([array([ ...dtype=float32)], None)
+    ([array([...dtype=float32)], None)
 
     Returns
     -------
     i2v model: I2V
     """
-
     def __init__(self, tokenizer, t2v, *args, tokenizer_kwargs: dict = None, pretrained_t2v=False, **kwargs):
-        """
-
-        Parameters
-        ----------
-        tokenizer: str
-            the tokenizer name
-        t2v: str
-            the name of token2vector model
-        args:
-            the parameters passed to t2v
-        tokenizer_kwargs: dict
-            the parameters passed to tokenizer
-        pretrained_t2v: bool
-            True: use pretrained t2v model
-            False: use your own t2v model
-        kwargs:
-            the parameters passed to t2v
-
-        """
         self.tokenizer: Tokenizer = get_tokenizer(tokenizer, **tokenizer_kwargs if tokenizer_kwargs is not None else {})
         if pretrained_t2v:
             logger.info("Use pretrained t2v model %s" % t2v)
@@ -125,8 +108,11 @@ def vector_size(self):
 
 class D2V(I2V):
     """
+
+    Bases: I2V
+
     Parameters
-    ----------
+    -----------
     tokenizer: str
         the tokenizer name
     t2v: str
@@ -142,7 +128,7 @@ class D2V(I2V):
         the parameters passed to t2v
 
     Examples
-    --------
+    ---------
     >>> item = {"如图来自古希腊数学家希波克拉底所研究的几何图形．此图由三个半圆构成，三个半圆的直径分别为直角三角形$ABC$的斜边$BC$, \
     ... 直角边$AB$, $AC$.$\\bigtriangleup ABC$的三边所围成的区域记为$I$,黑色部分记为$II$, 其余部分记为$III$.在整个图形中随机取一点，\
     ... 此点取自$I,II,III$的概率分别记为$p_1,p_2,p_3$,则$\\SIFChoice$$\\FigureID{1}$"}
@@ -160,7 +146,7 @@ def infer_vector(self, items, tokenize=True, indexing=False, padding=False, key=
         '''
 
         Parameters
-        ----------
+        -----------
         items:str
             the text of question
         tokenize:bool
@@ -175,7 +161,7 @@ def infer_vector(self, items, tokenize=True, indexing=False, padding=False, key=
             the parameters passed to t2v
 
         Returns
-        -------
+        --------
         vector:list
         '''
         tokens = self.tokenize(items, return_token=True, key=key) if tokenize is True else items
@@ -189,8 +175,11 @@ def from_pretrained(cls, name, model_dir=MODEL_DIR, *args, **kwargs):
 
 class W2V(I2V):
     """
+
+    Bases: I2V
+
     Parameters
-    ----------
+    -----------
     tokenizer: str
         the tokenizer name
     t2v: str
@@ -206,19 +195,40 @@ class W2V(I2V):
         the parameters passed to t2v
 
     Examples
-    --------
+    ---------
     >>> i2v = get_pretrained_i2v("test_w2v", "examples/test_model/data/w2v")
     >>> item_vector, token_vector = i2v(["有学者认为：‘学习’，必须适应实际"])
-    >>> item_vector
-    array([[...]], dtype=float32)
+    >>> item_vector # doctest: +ELLIPSIS
+    [array([...], dtype=float32)]
 
     Returns
-    -------
+    --------
     i2v model: W2V
 
     """
     def infer_vector(self, items, tokenize=True, indexing=False, padding=False, key=lambda x: x, *args,
                      **kwargs) -> tuple:
+        '''
+
+        Parameters
+        -----------
+        items:str
+            the text of question
+        tokenize:bool
+            True: tokenize the item
+        indexing:bool
+        padding:bool
+        key: lambda function
+            the parameter passed to tokenizer, select the text to be processed
+        args:
+            the parameters passed to t2v
+        kwargs:
+            the parameters passed to t2v
+
+        Returns
+        --------
+        vector:list
+        '''
         tokens = self.tokenize(items, return_token=True) if tokenize is True else items
         tokens = [token for token in tokens]
         return self.t2v(tokens, *args, **kwargs), self.t2v.infer_tokens(tokens, *args, **kwargs)
@@ -244,18 +254,18 @@ def get_pretrained_i2v(name, model_dir=MODEL_DIR):
     """
 
     Parameters
-    ----------
+    -----------
     name: str
         the name of item2vector model
     model_dir:str
         the path of model, default: MODEL_DIR = '~/.EduNLP/model'
 
     Returns
-    -------
+    --------
     i2v model: I2V
 
     Examples
-    --------
+    ---------
     >>> item = {"如图来自古希腊数学家希波克拉底所研究的几何图形．此图由三个半圆构成，三个半圆的直径分别为直角三角形$ABC$的斜边$BC$, \
     ... 直角边$AB$, $AC$.$\\bigtriangleup ABC$的三边所围成的区域记为$I$,黑色部分记为$II$, 其余部分记为$III$.在整个图形中随机取一点，\
     ... 此点取自$I,II,III$的概率分别记为$p_1,p_2,p_3$,则$\\SIFChoice$$\\FigureID{1}$"}

diff --git a/EduNLP/ModelZoo/rnn/rnn.py b/EduNLP/ModelZoo/rnn/rnn.py
@@ -9,6 +9,20 @@
 
 class LM(nn.Module):
     """
+
+    Parameters
+    ----------
+    rnn_type：str
+        Legal types including RNN, LSTM, GRU,ELMO
+    vocab_size: int
+    embedding_dim: int
+    hidden_size: int
+    num_layers
+    bidirectional
+    embedding
+    model_params
+    kwargs
+
     Examples
     --------
     >>> import torch
@@ -30,22 +44,6 @@ class LM(nn.Module):
 
     def __init__(self, rnn_type: str, vocab_size: int, embedding_dim: int, hidden_size: int, num_layers=1,
                  bidirectional=False, embedding=None, model_params=None, **kwargs):
-        """
-
-        Parameters
-        ----------
-        rnn_type：str
-            Legal types including RNN, LSTM, GRU,ELMO
-        vocab_size: int
-        embedding_dim: int
-        hidden_size: int
-        num_layers
-        bidirectional
-        embedding
-        model_params
-        kwargs
-
-        """
         super(LM, self).__init__()
         rnn_type = rnn_type.upper()
         self.embedding = torch.nn.Embedding(vocab_size, embedding_dim) if embedding is None else embedding

diff --git a/EduNLP/ModelZoo/utils/masker.py b/EduNLP/ModelZoo/utils/masker.py
@@ -7,6 +7,13 @@
 
 class Masker(object):
     """
+
+    Parameters
+    ----------
+    mask: int, str
+    per
+    seed
+
     Examples
     -------
     >>> masker = Masker(per=0.5, seed=10)
@@ -29,35 +36,17 @@ class Masker(object):
     [['a', '[MASK]', 'c'], ['d', '[PAD]', '[PAD]'], ['hello', '[MASK]', '[PAD]']]
     >>> mask_label
     [[0, 1, 0], [0, 0, 0], [0, 1, 0]]
-    """
 
+    Returns
+    ----------
+    list:list of masked_seq and list of masked_list
+    """
     def __init__(self, mask: (int, str, ...) = 0, per=0.2, seed=None):
-        """
-
-        Parameters
-        ----------
-        mask: int, str
-        per
-        seed
-        """
         self.seed = np.random.default_rng(seed)
         self.per = per
         self.mask = mask
 
     def __call__(self, seqs, length=None, *args, **kwargs) -> tuple:
-        """
-
-        Parameters
-        ----------
-        seqs:list
-        length
-        args
-        kwargs
-
-        Returns
-        ----------
-        list:list of masked_seq and list of masked_list
-        """
         seqs = deepcopy(seqs)
         masked_list = []
         if length is None:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -16,5 +16,7 @@

		[Meikai Bao](https://github.com/BAOOOOOM)

		[Yuting Ning](https://github.com/nnnyt)


		The stared contributors are the corresponding authors.