Skip to content

Commit

Permalink
Merge branch 'main' into release-please--branches--main
Browse files Browse the repository at this point in the history
  • Loading branch information
holtskinner committed Aug 9, 2023
2 parents fe76f7b + 7d61e85 commit df68a6f
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 40 deletions.
31 changes: 0 additions & 31 deletions google/cloud/documentai_toolbox/wrappers/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,37 +79,6 @@ def to_dataframe(self) -> pd.DataFrame:

return pd.DataFrame(self.body_rows, columns=columns)

def to_csv(self) -> str:
r"""Returns a csv str.
.. code-block:: python
from google.cloud.documentai_toolbox import Document
def sample_table_to_csv():
#Wrap document from gcs_path
merged_document = Document('gs://abc/def/gh/1')
#Use first page
page = merged_document.pages[0]
#export the first table in page 1 to csv
csv_text = page.tables[0].to_csv()
print(csv_text)
Args:
dataframe (pd.Dataframe):
Required. Two-dimensional, size-mutable, potentially heterogeneous tabular data.
Returns:
str:
The table in csv format.
"""
return self.to_dataframe().to_csv(index=False)


@dataclasses.dataclass
class FormField:
Expand Down
20 changes: 15 additions & 5 deletions samples/snippets/table_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,21 @@ def table_sample(document_path: str, output_file_prefix: str) -> None:
print("Tables in Document")
for page in wrapped_document.pages:
for table_index, table in enumerate(page.tables):
print(table.to_dataframe())
# Write table to CSV file
output_file = f"{output_file_prefix}-{page.page_number}-{table_index}.csv"
with open(output_file, "w", encoding="utf-8") as f:
f.write(table.to_csv())
# Convert table to Pandas Dataframe
# Refer to https://pandas.pydata.org/docs/reference/frame.html for all supported methods
df = table.to_dataframe()
print(df)

output_filename = f"{output_file_prefix}-{page.page_number}-{table_index}"

# Write Dataframe to CSV file
df.to_csv(f"{output_filename}.csv", index=False)

# Write Dataframe to HTML file
df.to_html(f"{output_filename}.html", index=False)

# Write Dataframe to Markdown file
df.to_markdown(f"{output_filename}.md", index=False)


# [END documentai_toolbox_table]
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0",
"pandas >= 1.0.0, <3.0.0",
"pandas >= 1.0.0, <2.0.0; python_version<'3.8'",
"tabulate >= 0.9.0, <1.0.0",
"proto-plus >= 1.22.0, <2.0.0dev",
"proto-plus >= 1.22.2, <2.0.0dev; python_version>='3.11'",
"grpc-google-iam-v1 >= 0.12.4, < 0.13dev",
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_table_to_csv(docproto):
documentai_object=docproto_page.tables[0], document_text=docproto.text
)

contents = table.to_csv()
contents = table.to_dataframe().to_csv(index=False)

assert (
contents
Expand All @@ -68,7 +68,7 @@ def test_table_to_csv_with_empty_body_rows(docproto):
)
table.body_rows = None

contents = table.to_csv()
contents = table.to_dataframe().to_csv(index=False)

assert (
contents
Expand All @@ -84,7 +84,7 @@ def test_table_to_csv_with_empty_header_rows(docproto):
)
table.header_rows = None

contents = table.to_csv()
contents = table.to_dataframe().to_csv(index=False)

assert (
contents
Expand All @@ -107,7 +107,7 @@ def test_table_to_csv_with_empty_header_rows_and_single_body(docproto):
table.header_rows = []
table.body_rows = [[table.body_rows[0][0]]]

contents = table.to_csv()
contents = table.to_dataframe().to_csv(index=False)
assert (
contents
== """""
Expand Down

0 comments on commit df68a6f

Please sign in to comment.