Merge pull request #155 from UnB-KnEDLe/update_polished_docs

[DOCUMENTATION] Updates
UnB-KnEDLe · Aug 16, 2021 · e5289a9 · e5289a9
2 parents bdada5a + 4ad7acf
commit e5289a9
Show file tree

Hide file tree

Showing 14 changed files with 181 additions and 105 deletions.
diff --git a/.gitignore b/.gitignore
@@ -140,4 +140,7 @@ dmypy.json
 .vscode/
 
 # temporary local files
-tmp/
+tmp/
+
+# dodfs 
+dodfs/
diff --git a/docs/source/polished/core.rst b/docs/source/polished/core.rst
@@ -2,12 +2,32 @@
 Polished Core
 =============
 
-.. automodule:: dodfminer.extract.polished.core
-    :members: _acts_ids
+.. contents:: Table of Contents
 
+.. automodule:: dodfminer.extract.polished.core
 
 The Act Extractor Class
 =======================
 
 .. autoclass:: dodfminer.extract.polished.core.ActsExtractor
     :members:
+
+Returning Objects
+-----------------
+
+The methods in this section return objects or vectors of objects.
+
+.. automethod:: dodfminer.extract.polished.core.ActsExtractor.get_act_obj
+
+.. automethod:: dodfminer.extract.polished.core.ActsExtractor.get_all_obj
+
+Returning Dataframes
+--------------------
+
+The methods in this section return dataframes or vectors of dataframes.
+
+.. automethod:: dodfminer.extract.polished.core.ActsExtractor.get_act_df
+
+.. automethod:: dodfminer.extract.polished.core.ActsExtractor.get_all_df
+
+.. automethod:: dodfminer.extract.polished.core.ActsExtractor.get_xml
diff --git a/docs/source/polished/utils.rst b/docs/source/polished/utils.rst
@@ -4,3 +4,17 @@ Polished Helper
 
 .. automodule:: dodfminer.extract.polished.helper
     :members:
+
+.. autofunction:: dodfminer.extract.polished.helper.xml_multiple
+
+.. autofunction:: dodfminer.extract.polished.helper.extract_multiple_acts
+
+.. autofunction:: dodfminer.extract.polished.helper.extract_multiple
+
+.. autofunction:: dodfminer.extract.polished.helper.extract_single
+
+.. autofunction:: dodfminer.extract.polished.helper.build_act_txt
+
+.. autofunction:: dodfminer.extract.polished.helper.print_dataframe
+
+.. autofunction:: dodfminer.extract.polished.helper.get_files_path
diff --git a/dodfminer/__version__.py b/dodfminer/__version__.py
@@ -1,4 +1,4 @@
-version_info = (1, 2, 0)
+version_info = (1, 3, 6)
 # format:
 # ('dodf_major', 'dodf_minor', 'dodf_patch')
 

diff --git a/dodfminer/downloader/__init__.py b/dodfminer/downloader/__init__.py
diff --git a/dodfminer/extract/polished/core.py b/dodfminer/extract/polished/core.py
@@ -8,9 +8,6 @@
     from dodfminer.extract.polished.core import ActsExtractor
     ActsExtractor.get_act_obj(ato_id, file, backend)
 
-Acts Available and IDs
-----------------------
-
 """
 
 from dodfminer.extract.polished.acts.aposentadoria import Retirements, RetAposentadoria
@@ -43,10 +40,10 @@ class ActsExtractor:
     """Polished Extraction main class.
 
     All interactions with the acts needs to be done through this interface.
-    This class handles all the requests to regex or ner extraction.
+    This class handles all the requests to Regex or NER extraction.
 
     Note:
-        This class is static
+        This class is static.
 
     """
 
@@ -59,7 +56,7 @@ def get_act_obj(ato_id, file, backend):
         Args:
             ato_id (string): The name of the act to extract.
             file (string): Path of the file.
-            backend (string): Backend of act extraction, either regex or ner.
+            backend (string): Backend of act extraction, either Regex or NER.
 
         Returns:
             An object of the desired act, already with extracted information.
@@ -69,16 +66,16 @@ def get_act_obj(ato_id, file, backend):
 
     @staticmethod
     def get_all_obj(file, backend):
-        """Extract all acts types from a single DODF.
+        """Extract all act types from a single DODF.
 
         Object format.
 
         Args:
             file (string): Path of the file.
-            backend (string): Backend of act extraction, either regex or ner.
+            backend (string): Backend of act extraction, either Regex or NER.
 
         Returns:
-            An vector of objects of all the acts, already with extracted
+            An vector of objects of all the acts with extracted
             information.
 
         """
@@ -97,17 +94,17 @@ def get_act_df(ato_id, file, backend):
         Args:
             ato_id (string): The name of the act to extract.
             file (string): Path of the file.
-            backend (string): Backend of act extraction, either regex or ner.
+            backend (string): Backend of act extraction, either Regex or NER.
 
         Returns:
-            An dataframe with extracted information, for the desired act.
+            A dataframe with extracted information, for the desired act.
 
         """
         return _acts_ids[ato_id](file, backend).data_frame
 
     @staticmethod
     def get_all_df(file, backend):
-        """Extract all acts types from a single DODF.
+        """Extract all act types from a single DODF.
 
         Dataframe format.
 
@@ -116,7 +113,7 @@ def get_all_df(file, backend):
             backend (string): Backend of act extraction, either regex or ner.
 
         Returns:
-            An vector of dataframed with extracted information for all acts.
+            A vector of dataframes with extracted information for all acts.
 
         """
         res = {}
@@ -127,7 +124,7 @@ def get_all_df(file, backend):
 
     @staticmethod
     def get_xml(file, backend, i):
-        """Extract all acts types from a single DODF.
+        """Extract all act types from a single DODF.
 
         Dataframe format.
 
@@ -136,7 +133,7 @@ def get_xml(file, backend, i):
             backend (string): Backend of act extraction, either regex or ner.
 
         Returns:
-            An vector of dataframed with extracted information for all acts.
+            A vector of dataframes with extracted information for all acts.
 
         """
         res = XMLFy(file, _acts_ids, i)

diff --git a/dodfminer/extract/polished/create_xml.py b/dodfminer/extract/polished/create_xml.py
@@ -8,21 +8,24 @@
 
 class XMLFy:
 
-    def __init__(self, file, acts_ids, i):
-        file_nums = list((map(int, re.findall(r'\d+', file))))
-        file_nums = file_nums[2:]
-        print(file_nums)
-        file_id = f"{i}_"
-        for s in file_nums:
-            file_id += str(s) + "."
-        file_id = file_id[:-1]
+    def __init__(self, file, acts_ids, id):
         self._file = file
         self._acts_ids = acts_ids
-        self._xml_id = file_id
+        self._xml_id = self.build_xml_id(id)
         self._annotation_id = 1
         self._relations_id = 1
         self.xml = self._create_xml()
 
+    def build_xml_id(self, id):
+        file_name = self._file.split('/')[-1]
+
+        str2int2str = lambda x : str(int(x))
+        file_numbers_list = map(str2int2str, re.findall(r'\d+', file_name))
+
+        file_id = ".".join(list(file_numbers_list)[1:])
+
+        return f"{id}_{file_id}"
+
     def print_tree(self):
         print(etree.tostring(self.xml, pretty_print=True).decode())
 

diff --git a/dodfminer/extract/polished/helper.py b/dodfminer/extract/polished/helper.py
@@ -6,7 +6,7 @@
 
 Usage Example::
 
-    from ddodfminer.extract.polished import helper
+    from dodfminer.extract.polished import helper
     helper.print_dataframe(df)
 
 Functions