Skip to content

Commit

Permalink
Merge pull request #155 from UnB-KnEDLe/update_polished_docs
Browse files Browse the repository at this point in the history
[DOCUMENTATION] Updates
  • Loading branch information
lacwerda committed Aug 16, 2021
2 parents bdada5a + 4ad7acf commit e5289a9
Show file tree
Hide file tree
Showing 14 changed files with 181 additions and 105 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,7 @@ dmypy.json
.vscode/

# temporary local files
tmp/
tmp/

# dodfs
dodfs/
24 changes: 22 additions & 2 deletions docs/source/polished/core.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,32 @@
Polished Core
=============

.. automodule:: dodfminer.extract.polished.core
:members: _acts_ids
.. contents:: Table of Contents

.. automodule:: dodfminer.extract.polished.core

The Act Extractor Class
=======================

.. autoclass:: dodfminer.extract.polished.core.ActsExtractor
:members:

Returning Objects
-----------------

The methods in this section return objects or vectors of objects.

.. automethod:: dodfminer.extract.polished.core.ActsExtractor.get_act_obj

.. automethod:: dodfminer.extract.polished.core.ActsExtractor.get_all_obj

Returning Dataframes
--------------------

The methods in this section return dataframes or vectors of dataframes.

.. automethod:: dodfminer.extract.polished.core.ActsExtractor.get_act_df

.. automethod:: dodfminer.extract.polished.core.ActsExtractor.get_all_df

.. automethod:: dodfminer.extract.polished.core.ActsExtractor.get_xml
14 changes: 14 additions & 0 deletions docs/source/polished/utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,17 @@ Polished Helper

.. automodule:: dodfminer.extract.polished.helper
:members:

.. autofunction:: dodfminer.extract.polished.helper.xml_multiple

.. autofunction:: dodfminer.extract.polished.helper.extract_multiple_acts

.. autofunction:: dodfminer.extract.polished.helper.extract_multiple

.. autofunction:: dodfminer.extract.polished.helper.extract_single

.. autofunction:: dodfminer.extract.polished.helper.build_act_txt

.. autofunction:: dodfminer.extract.polished.helper.print_dataframe

.. autofunction:: dodfminer.extract.polished.helper.get_files_path
2 changes: 1 addition & 1 deletion dodfminer/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version_info = (1, 2, 0)
version_info = (1, 3, 6)
# format:
# ('dodf_major', 'dodf_minor', 'dodf_patch')

Expand Down
Empty file.
27 changes: 12 additions & 15 deletions dodfminer/extract/polished/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
from dodfminer.extract.polished.core import ActsExtractor
ActsExtractor.get_act_obj(ato_id, file, backend)
Acts Available and IDs
----------------------
"""

from dodfminer.extract.polished.acts.aposentadoria import Retirements, RetAposentadoria
Expand Down Expand Up @@ -43,10 +40,10 @@ class ActsExtractor:
"""Polished Extraction main class.
All interactions with the acts needs to be done through this interface.
This class handles all the requests to regex or ner extraction.
This class handles all the requests to Regex or NER extraction.
Note:
This class is static
This class is static.
"""

Expand All @@ -59,7 +56,7 @@ def get_act_obj(ato_id, file, backend):
Args:
ato_id (string): The name of the act to extract.
file (string): Path of the file.
backend (string): Backend of act extraction, either regex or ner.
backend (string): Backend of act extraction, either Regex or NER.
Returns:
An object of the desired act, already with extracted information.
Expand All @@ -69,16 +66,16 @@ def get_act_obj(ato_id, file, backend):

@staticmethod
def get_all_obj(file, backend):
"""Extract all acts types from a single DODF.
"""Extract all act types from a single DODF.
Object format.
Args:
file (string): Path of the file.
backend (string): Backend of act extraction, either regex or ner.
backend (string): Backend of act extraction, either Regex or NER.
Returns:
An vector of objects of all the acts, already with extracted
An vector of objects of all the acts with extracted
information.
"""
Expand All @@ -97,17 +94,17 @@ def get_act_df(ato_id, file, backend):
Args:
ato_id (string): The name of the act to extract.
file (string): Path of the file.
backend (string): Backend of act extraction, either regex or ner.
backend (string): Backend of act extraction, either Regex or NER.
Returns:
An dataframe with extracted information, for the desired act.
A dataframe with extracted information, for the desired act.
"""
return _acts_ids[ato_id](file, backend).data_frame

@staticmethod
def get_all_df(file, backend):
"""Extract all acts types from a single DODF.
"""Extract all act types from a single DODF.
Dataframe format.
Expand All @@ -116,7 +113,7 @@ def get_all_df(file, backend):
backend (string): Backend of act extraction, either regex or ner.
Returns:
An vector of dataframed with extracted information for all acts.
A vector of dataframes with extracted information for all acts.
"""
res = {}
Expand All @@ -127,7 +124,7 @@ def get_all_df(file, backend):

@staticmethod
def get_xml(file, backend, i):
"""Extract all acts types from a single DODF.
"""Extract all act types from a single DODF.
Dataframe format.
Expand All @@ -136,7 +133,7 @@ def get_xml(file, backend, i):
backend (string): Backend of act extraction, either regex or ner.
Returns:
An vector of dataframed with extracted information for all acts.
A vector of dataframes with extracted information for all acts.
"""
res = XMLFy(file, _acts_ids, i)
Expand Down
21 changes: 12 additions & 9 deletions dodfminer/extract/polished/create_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,24 @@

class XMLFy:

def __init__(self, file, acts_ids, i):
file_nums = list((map(int, re.findall(r'\d+', file))))
file_nums = file_nums[2:]
print(file_nums)
file_id = f"{i}_"
for s in file_nums:
file_id += str(s) + "."
file_id = file_id[:-1]
def __init__(self, file, acts_ids, id):
self._file = file
self._acts_ids = acts_ids
self._xml_id = file_id
self._xml_id = self.build_xml_id(id)
self._annotation_id = 1
self._relations_id = 1
self.xml = self._create_xml()

def build_xml_id(self, id):
file_name = self._file.split('/')[-1]

str2int2str = lambda x : str(int(x))
file_numbers_list = map(str2int2str, re.findall(r'\d+', file_name))

file_id = ".".join(list(file_numbers_list)[1:])

return f"{id}_{file_id}"

def print_tree(self):
print(etree.tostring(self.xml, pretty_print=True).decode())

Expand Down
2 changes: 1 addition & 1 deletion dodfminer/extract/polished/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
Usage Example::
from ddodfminer.extract.polished import helper
from dodfminer.extract.polished import helper
helper.print_dataframe(df)
Functions
Expand Down

0 comments on commit e5289a9

Please sign in to comment.