Skip to content

Commit

Permalink
Add handling java_options. Fix #30
Browse files Browse the repository at this point in the history
  • Loading branch information
chezou committed May 18, 2017
1 parent 14074f1 commit 1307e0d
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
12 changes: 10 additions & 2 deletions tabula/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,13 @@ def read_pdf(input_path, **kwargs):
elif output_format == 'json':
kwargs['format'] = 'JSON'

java_options = kwargs.get('java_options', [])
if isinstance(java_options, str):
java_options = [java_options]

options = build_options(kwargs)
path, is_url = localize_file(input_path)
args = ["java", "-jar", jar_path] + options + [path]
args = ["java"] + java_options + ["-jar", jar_path] + options + [path]

try:
output = subprocess.check_output(args)
Expand Down Expand Up @@ -106,9 +110,13 @@ def convert_into(input_path, output_path, **kwargs):
elif output_format == 'dataframe':
raise AttributeError("'output_format' has no attribute 'dataframe'")

java_options = kwargs.get('java_options', [])
if isinstance(java_options, str):
java_options = [java_options]

options = build_options(kwargs)
path, is_url = localize_file(input_path)
args = ["java", "-jar", jar_path] + options + [path]
args = ["java"] + java_options + ["-jar", jar_path] + options + [path]

try:
subprocess.check_output(args)
Expand Down
6 changes: 6 additions & 0 deletions tests/test_read_pdf_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ def test_read_pdf_with_option(self):
self.assertTrue(tabula.read_pdf(pdf_path, pages=(2, 3), nospreadsheet=True,
guess=False).equals(pd.read_csv(expected_csv2)))

def test_read_pdf_with_java_option(self):
pdf_path = 'tests/resources/data.pdf'
expected_csv1 = 'tests/resources/data_1.csv'
self.assertTrue(tabula.read_pdf(pdf_path, pages=1, java_options=['-Xmx256m']
).equals(pd.read_csv(expected_csv1)))

def test_convert_from(self):
pdf_path = 'tests/resources/data.pdf'
expected_csv = 'tests/resources/data_1.csv'
Expand Down

0 comments on commit 1307e0d

Please sign in to comment.