Skip to content

Commit

Permalink
Merge pull request #378 from chezou/update-encoding
Browse files Browse the repository at this point in the history
Update encoding everytime when SubprocessTabule is initialized
  • Loading branch information
chezou committed Mar 10, 2024
2 parents 3cf4bcf + 72d9234 commit 235e25c
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 0 deletions.
14 changes: 14 additions & 0 deletions tabula/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,20 @@ def __init__(
self.java_options = java_options
self.encoding = encoding

def update_encoding(
self, encoding: str, java_options: List[str], silent: Optional[bool]
) -> None:
self.encoding = encoding
self.java_options = java_options
if silent:
self.java_options.extend(
(
"-Dorg.slf4j.simpleLogger.defaultLogLevel=off",
"-Dorg.apache.commons.logging.Log"
"=org.apache.commons.logging.impl.NoOpLog",
)
)

def call_tabula_java(
self, options: TabulaOption, path: Optional[str] = None
) -> str:
Expand Down
4 changes: 4 additions & 0 deletions tabula/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ def _run(
_tabula_vm = SubprocessTabula(
java_options=java_options, silent=options.silent, encoding=encoding
)
elif isinstance(_tabula_vm, SubprocessTabula):
_tabula_vm.update_encoding(
encoding=encoding, java_options=java_options, silent=options.silent
)
elif set(java_options) - IGNORED_JAVA_OPTIONS:
logger.warning("java_options is ignored until rebooting the Python process.")

Expand Down
3 changes: 3 additions & 0 deletions tests/test_read_pdf_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ def test_read_pdf_with_force_subprocess(self):
self.assertTrue(len(df), 1)
self.assertTrue(isinstance(df[0], pd.DataFrame))
self.assertTrue(df[0].equals(pd.read_csv(self.expected_csv1)))
self.assertTrue(tabula.io._tabula_vm.encoding, "utf-8")
tabula.read_pdf(self.pdf_path, stream=True, encoding="cp932")
self.assertTrue(tabula.io._tabula_vm.encoding, "cp932")

def test_read_pdf_into_json(self):
expected_json = "tests/resources/data_1.json"
Expand Down

0 comments on commit 235e25c

Please sign in to comment.