Skip to content

Commit d02b0d5

Browse files
Merge branch 'main' into 2325-support-athena-query-prepared-statements
2 parents bd32da9 + 44891b8 commit d02b0d5

File tree

4 files changed

+123
-119
lines changed

4 files changed

+123
-119
lines changed

awswrangler/s3/_read_orc.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import pandas as pd
1818
import pyarrow as pa
1919
import pyarrow.dataset
20-
import pyarrow.orc
2120
from typing_extensions import Literal
2221

2322
from awswrangler import _data_types, _utils, exceptions
@@ -40,16 +39,19 @@
4039

4140
if TYPE_CHECKING:
4241
from mypy_boto3_s3 import S3Client
42+
from pyarrow.orc import ORCFile
4343

4444
FULL_READ_S3_BLOCK_SIZE = 20_971_520 # 20 MB (20 * 2**20)
4545
METADATA_READ_S3_BLOCK_SIZE = 131_072 # 128 KB (128 * 2**10)
4646

4747
_logger: logging.Logger = logging.getLogger(__name__)
4848

4949

50-
def _pyarrow_orc_file_wrapper(source: Any) -> pyarrow.orc.ORCFile:
50+
def _pyarrow_orc_file_wrapper(source: Any) -> "ORCFile":
51+
from pyarrow.orc import ORCFile
52+
5153
try:
52-
return pyarrow.orc.ORCFile(source=source)
54+
return ORCFile(source=source)
5355
except pyarrow.ArrowInvalid as ex:
5456
if str(ex) == "ORC file size is 0 bytes":
5557
_logger.warning("Ignoring empty file...")
@@ -74,7 +76,7 @@ def _read_orc_metadata_file(
7476
s3_block_size=METADATA_READ_S3_BLOCK_SIZE,
7577
s3_additional_kwargs=s3_additional_kwargs,
7678
) as f:
77-
orc_file: Optional[pyarrow.orc.ORCFile] = _pyarrow_orc_file_wrapper(source=f)
79+
orc_file: Optional["ORCFile"] = _pyarrow_orc_file_wrapper(source=f)
7880
if orc_file:
7981
return orc_file.schema
8082
return None
@@ -118,7 +120,7 @@ def _read_orc_file(
118120
s3_additional_kwargs=s3_additional_kwargs,
119121
s3_client=s3_client,
120122
) as f:
121-
orc_file: Optional[pyarrow.orc.ORCFile] = _pyarrow_orc_file_wrapper(
123+
orc_file: Optional["ORCFile"] = _pyarrow_orc_file_wrapper(
122124
source=f,
123125
)
124126
if orc_file is None:

awswrangler/s3/_write_orc.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import boto3
99
import pandas as pd
1010
import pyarrow as pa
11-
import pyarrow.orc
1211

1312
from awswrangler import _utils, catalog, exceptions, typing
1413
from awswrangler._arrow import _df_to_table
@@ -33,6 +32,7 @@
3332

3433
if TYPE_CHECKING:
3534
from mypy_boto3_s3 import S3Client
35+
from pyarrow.orc import ORCWriter
3636

3737
_logger: logging.Logger = logging.getLogger(__name__)
3838

@@ -54,8 +54,10 @@ def _new_writer(
5454
s3_client: "S3Client",
5555
s3_additional_kwargs: Optional[Dict[str, str]],
5656
use_threads: Union[bool, int],
57-
) -> Iterator[pyarrow.orc.ORCWriter]:
58-
writer: Optional[pyarrow.orc.ORCWriter] = None
57+
) -> Iterator["ORCWriter"]:
58+
from pyarrow.orc import ORCWriter
59+
60+
writer: Optional["ORCWriter"] = None
5961
if not pyarrow_additional_kwargs:
6062
pyarrow_additional_kwargs = {}
6163

@@ -67,7 +69,7 @@ def _new_writer(
6769
s3_client=s3_client,
6870
) as f:
6971
try:
70-
writer = pyarrow.orc.ORCWriter(
72+
writer = ORCWriter(
7173
where=f,
7274
compression="uncompressed" if compression is None else compression,
7375
**pyarrow_additional_kwargs,

building/lambda/build-lambda-layer.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ cmake \
3333
-DARROW_WITH_ZLIB=ON \
3434
-DARROW_FLIGHT=OFF \
3535
-DARROW_GANDIVA=OFF \
36-
-DARROW_ORC=ON \
36+
-DARROW_ORC=OFF \
3737
-DARROW_CSV=ON \
3838
-DARROW_JSON=ON \
3939
-DARROW_COMPUTE=ON \

0 commit comments

Comments
 (0)